Source code for autogluon.timeseries.models.gluonts.mx.models

import logging
import re
from typing import Callable, List, Type

import gluonts
import mxnet as mx

from autogluon.core.utils import warning_filter
from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
from autogluon.timeseries.models.abstract.abstract_timeseries_model import AbstractTimeSeriesModelFactory
from autogluon.timeseries.models.gluonts.abstract_gluonts import AbstractGluonTSModel

with warning_filter():
    from gluonts.model.estimator import Estimator as GluonTSEstimator
    from gluonts.dataset.field_names import FieldName
    from gluonts.mx.context import get_mxnet_context
    from gluonts.mx.model.deepar import DeepAREstimator
    from gluonts.mx.model.simple_feedforward import SimpleFeedForwardEstimator
    from gluonts.mx.model.transformer import TransformerEstimator
    from gluonts.mx.model.tft import TemporalFusionTransformerEstimator
    from gluonts.mx.model.seq2seq import MQCNNEstimator, MQRNNEstimator

from .callback import GluonTSEarlyStoppingCallback, TimeLimitCallback

logger = logging.getLogger(__name__)
gts_logger = logging.getLogger(gluonts.__name__)


class AbstractGluonTSMXNetModel(AbstractGluonTSModel):
    def _get_callbacks(self, time_limit: int, *args, **kwargs) -> List[Callable]:
        callbacks = [TimeLimitCallback(time_limit)]

        early_stopping_patience = self._get_model_params().get("early_stopping_patience", None)
        if early_stopping_patience:
            callbacks.append(GluonTSEarlyStoppingCallback(early_stopping_patience))

        return callbacks


[docs]class DeepARMXNetModel(AbstractGluonTSMXNetModel): """DeepAR model from GluonTS based on the MXNet backend. The model consists of an RNN encoder (LSTM or GRU) and a decoder that outputs the distribution of the next target value. Close to the model described in [Salinas2020]_. Based on `gluonts.mx.model.deepar.DeepAREstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.deepar.html>`_. See GluonTS documentation for additional hyperparameters. References ---------- .. [Salinas2020] Salinas, David, et al. "DeepAR: Probabilistic forecasting with autoregressive recurrent networks." International Journal of Forecasting. 2020. Other Parameters ---------------- context_length : int, optional Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length) disable_static_features : bool, default = False If True, static features won't be used by the model even if they are present in the dataset. If False, static features will be used by the model if they are present in the dataset. disable_known_covariates : bool, default = False If True, known covariates won't be used by the model even if they are present in the dataset. If False, known covariates will be used by the model if they are present in the dataset. num_layers : int, default = 2 Number of RNN layers num_cells : int, default = 40 Number of RNN cells for each layer cell_type : str, default = "lstm" Type of recurrent cells to use (available: 'lstm' or 'gru') dropoutcell_type : str, default = 'ZoneoutCell' Type of dropout cells to use (available: 'ZoneoutCell', 'RNNZoneoutCell', 'VariationalDropoutCell' or 'VariationalZoneoutCell') dropout_rate : float, default = 0.1 Dropout regularization parameter embedding_dimension : int, optional Dimension of the embeddings for categorical features (if None, defaults to [min(50, (cat+1)//2) for cat in cardinality]) distr_output : gluonts.mx.DistributionOutput, default = StudentTOutput() Distribution to use to evaluate observations and sample predictions scaling: bool, default = True Whether to automatically scale the target values epochs : int, default = 100 Number of epochs the model will be trained for batch_size : int, default = 64 Size of batches used during training num_batches_per_epoch : int, default = 50 Number of batches processed every epoch learning_rate : float, default = 1e-3, Learning rate used during training """ gluonts_estimator_class: Type[GluonTSEstimator] = DeepAREstimator default_num_samples: int = 250 def _get_estimator_init_args(self) -> dict: init_kwargs = super()._get_estimator_init_args() # Our API hides these model kwargs from the user. They can only be controlled through disable_static_features # and disable_known_covariates init_kwargs["use_feat_static_cat"] = self.num_feat_static_cat > 0 init_kwargs["use_feat_static_real"] = self.num_feat_static_real > 0 init_kwargs["cardinality"] = self.feat_static_cat_cardinality init_kwargs["use_feat_dynamic_real"] = self.num_feat_dynamic_real > 0 return init_kwargs
class AbstractGluonTSSeq2SeqModel(AbstractGluonTSMXNetModel): """Abstract class for MQCNN and MQRNN which require hybridization to be turned off when fitting on the GPU. """ gluonts_estimator_class: Type[GluonTSEstimator] = None def _get_estimator_init_args(self): init_kwargs = super()._get_estimator_init_args() if get_mxnet_context() != mx.context.cpu(): init_kwargs["hybridize"] = False return init_kwargs
[docs]class MQCNNMXNetModel(AbstractGluonTSSeq2SeqModel): """MQCNN model from GluonTS. The model consists of a CNN encoder and a decoder that directly predicts the quantiles of the future target values' distribution. As described in [Wen2017]_. Based on `gluonts.mx.model.seq2seq.MQCNNEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.seq2seq.html#gluonts.mx.model.seq2seq.MQCNNEstimator>`_. See GluonTS documentation for additional hyperparameters. References ---------- .. [Wen2017] Wen, Ruofeng, et al. "A multi-horizon quantile recurrent forecaster." arXiv preprint arXiv:1711.11053 (2017) Other Parameters ---------------- context_length : int, optional Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length) disable_static_features : bool, default = False If True, static features won't be used by the model even if they are present in the dataset. If False, static features will be used by the model if they are present in the dataset. disable_known_covariates : bool, default = False If True, known covariates won't be used by the model even if they are present in the dataset. If False, known covariates will be used by the model if they are present in the dataset. embedding_dimension : int, optional Dimension of the embeddings for categorical features. (default: [min(50, (cat+1)//2) for cat in cardinality]) add_time_feature : bool, default = True Adds a set of time features. add_age_feature : bool, default = False Adds an age feature. The age feature starts with a small value at the start of the time series and grows over time. decoder_mlp_dim_seq : List[int], default = [30] The dimensionalities of the Multi Layer Perceptron layers of the decoder. channels_seq : List[int], default = [30, 30, 30] The number of channels (i.e. filters or convolutions) for each layer of the HierarchicalCausalConv1DEncoder. More channels usually correspond to better performance and larger network size. dilation_seq : List[int], default = [1, 3, 5] The dilation of the convolutions in each layer of the HierarchicalCausalConv1DEncoder. Greater numbers correspond to a greater receptive field of the network, which is usually better with longer context_length. (Same length as channels_seq) kernel_size_seq : List[int], default = [7, 3, 3] The kernel sizes (i.e. window size) of the convolutions in each layer of the HierarchicalCausalConv1DEncoder. (Same length as channels_seq) use_residual : bool, default = True Whether the hierarchical encoder should additionally pass the unaltered past target to the decoder. quantiles : List[float], default = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] The list of quantiles that will be optimized for, and predicted by, the model. Optimizing for more quantiles than are of direct interest to you can result in improved performance due to a regularizing effect. distr_output : gluonts.mx.DistributionOutput, optional DistributionOutput to use. Only one between `quantile` and `distr_output` can be set. scaling : bool, optional Whether to automatically scale the target values. (default: False if quantile_output is used, True otherwise) epochs : int, default = 100 Number of epochs the model will be trained for batch_size : int, default = 64 Size of batches used during training num_batches_per_epoch : int, default = 50 Number of batches processed every epoch learning_rate : float, default = 1e-3, Learning rate used during training """ gluonts_estimator_class: Type[GluonTSEstimator] = MQCNNEstimator def _get_estimator_init_args(self) -> dict: init_kwargs = super()._get_estimator_init_args() init_kwargs["use_feat_static_cat"] = self.num_feat_static_cat > 0 init_kwargs["use_feat_static_real"] = self.num_feat_static_real > 0 init_kwargs["cardinality"] = self.feat_static_cat_cardinality init_kwargs["use_feat_dynamic_real"] = self.num_feat_dynamic_real > 0 return init_kwargs
[docs]class MQRNNMXNetModel(AbstractGluonTSSeq2SeqModel): """MQRNN model from GluonTS. The model consists of an RNN encoder and a decoder that directly predicts the quantiles of the future target values' distribution. As described in [Wen2017]_. Based on `gluonts.mx.model.seq2seq.MQRNNEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.seq2seq.html#gluonts.mx.model.seq2seq.MQRNNEstimator>`_. See GluonTS documentation for additional hyperparameters. References ---------- .. [Wen2017] Wen, Ruofeng, et al. "A multi-horizon quantile recurrent forecaster." arXiv preprint arXiv:1711.11053 (2017) Other Parameters ---------------- context_length : int, optional Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length) embedding_dimension : int, optional Dimension of the embeddings for categorical features. (default: [min(50, (cat+1)//2) for cat in cardinality]) decoder_mlp_dim_seq : List[int], default = [30] The dimensionalities of the Multi Layer Perceptron layers of the decoder. quantiles : List[float], default = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] The list of quantiles that will be optimized for, and predicted by, the model. Optimizing for more quantiles than are of direct interest to you can result in improved performance due to a regularizing effect. distr_output : gluonts.mx.DistributionOutput, optional DistributionOutput to use. Only one between `quantile` and `distr_output` can be set. scaling : bool, optional Whether to automatically scale the target values. (default: False if quantile_output is used, True otherwise) epochs : int, default = 100 Number of epochs the model will be trained for batch_size : int, default = 64 Size of batches used during training num_batches_per_epoch : int, default = 50 Number of batches processed every epoch learning_rate : float, default = 1e-3, Learning rate used during training """ gluonts_estimator_class: Type[GluonTSEstimator] = MQRNNEstimator
[docs]class SimpleFeedForwardMXNetModel(AbstractGluonTSMXNetModel): """SimpleFeedForward model from GluonTS based on the MXNet backend. The model consists of a multilayer perceptron (MLP) that predicts the distribution of the next target value. Based on `gluonts.mx.model.simple_feedforward.SimpleFeedForwardEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.simple_feedforward.html>`_. See GluonTS documentation for additional hyperparameters. Note that AutoGluon uses hyperparameters ``hidden_dim`` and ``num_layers`` instead of ``num_hidden_dimensions`` used in GluonTS. This is done to ensure compatibility with Ray Tune. Other Parameters ---------------- context_length : int, optional Number of time units that condition the predictions (default: None, in which case context_length = prediction_length) hidden_dim: int, default = 40 Number of hidden units in each layer of the MLP num_layers : int, default = 2 Number of hidden layers in the MLP distr_output : gluonts.mx.DistributionOutput, default = StudentTOutput() Distribution to fit batch_normalization : bool, default = False Whether to use batch normalization mean_scaling : bool, default = True Scale the network input by the data mean and the network output by its inverse epochs : int, default = 100 Number of epochs the model will be trained for batch_size : int, default = 64 Size of batches used during training num_batches_per_epoch : int, default = 50 Number of batches processed every epoch learning_rate : float, default = 1e-3, Learning rate used during training """ gluonts_estimator_class: Type[GluonTSEstimator] = SimpleFeedForwardEstimator def _get_estimator_init_args(self): init_kwargs = super()._get_estimator_init_args() # Workaround: Ray Tune doesn't support lists as hyperparameters, so we build `num_hidden_dimensions` # from `hidden_dim` and `num_layers` if "num_hidden_dimensions" in init_kwargs: logger.warning( f"Hyperparameter 'num_hidden_dimensions' is ignored by {self.name}. " f"Please use hyperparameters 'hidden_dim' and 'num_layers' instead." ) hidden_dim = init_kwargs.pop("hidden_dim", 40) num_layers = init_kwargs.pop("num_layers", 2) init_kwargs["num_hidden_dimensions"] = [hidden_dim] * num_layers return init_kwargs
[docs]class TemporalFusionTransformerMXNetModel(AbstractGluonTSMXNetModel): """TemporalFusionTransformer model from GluonTS. The model combines an LSTM encoder, a transformer decoder, and directly predicts the quantiles of future target values. As described in [Lim2021]_. Based on `gluonts.mx.model.tft.TemporalFusionTransformerEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.tft.html>`_. See GluonTS documentation for additional hyperparameters. References ---------- .. [Lim2021] Lim, Bryan, et al. "Temporal Fusion Transformers for Interpretable Multi-horizon Time Series Forecasting." International Journal of Forecasting. 2021. Other Parameters ---------------- context_length : int or None, default = None Number of past values used for prediction. (default: None, in which case context_length = prediction_length) hidden_dim : int, default = 32 Size of the hidden layer. num_heads : int, default = 4 Number of attention heads in multi-head attention. dropout_rate : float, default = 0.1 Dropout regularization parameter epochs : int, default = 100 Number of epochs the model will be trained for batch_size : int, default = 64 Size of batches used during training num_batches_per_epoch : int, default = 50 Number of batches processed every epoch learning_rate : float, default = 1e-3, Learning rate used during training """ gluonts_estimator_class: Type[GluonTSEstimator] = TemporalFusionTransformerEstimator supported_quantiles: set = set([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) def _get_estimator_init_args(self) -> dict: init_kwargs = super()._get_estimator_init_args() if self.num_feat_static_real > 0: init_kwargs["static_feature_dims"] = {FieldName.FEAT_STATIC_REAL: self.num_feat_static_real} if self.num_feat_dynamic_real > 0: init_kwargs["dynamic_feature_dims"] = {FieldName.FEAT_DYNAMIC_REAL: self.num_feat_dynamic_real} # Turning off hybridization prevents MXNet errors when training on GPU init_kwargs["hybridize"] = False # TFT cannot handle arbitrary quantiles, this is a workaround init_kwargs["num_outputs"] = 9 if not set(self.quantile_levels).issubset(self.supported_quantiles): raise ValueError( f"{self.name} requires that quantile_levels are a subset of " f"{self.supported_quantiles} (received quantile_levels = {self.quantile_levels})" ) return init_kwargs def predict(self, data: TimeSeriesDataFrame, quantile_levels: List[float] = None, **kwargs) -> TimeSeriesDataFrame: if quantile_levels is not None and not set(quantile_levels).issubset(self.supported_quantiles): raise ValueError( f"{self.name} requires that quantile_levels are a subset of " f"{self.supported_quantiles} (received quantile_levels = {self.quantile_levels})" ) return super().predict(data=data, quantile_levels=quantile_levels, **kwargs)
[docs]class TransformerMXNetModel(AbstractGluonTSMXNetModel): """Autoregressive transformer forecasting model from GluonTS. The model consists of an Transformer encoder and a decoder that outputs the distribution of the next target value. The transformer architecture is close to the one described in [Vaswani2017]_. Based on `gluonts.mx.model.transformer.TransformerEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.transformer.html>`_. See GluonTS documentation for additional hyperparameters. References ---------- .. [Vaswani2017] Vaswani, Ashish, et al. "Attention is all you need." Advances in neural information processing systems. 2017. Other Parameters ---------------- context_length : int, optional Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length) model_dim : int, default = 32 Dimension of the transformer network, i.e., embedding dimension of the input dropout_rate : float, default = 0.1 Dropout regularization parameter distr_output : gluonts.mx.DistributionOutput, default = StudentTOutput() Distribution to use to evaluate observations and sample predictions inner_ff_dim_scale : int, default = 4 Dimension scale of the inner hidden layer of the transformer's feedforward network pre_seq : str, default = "dn" Sequence that defined operations of the processing block before the main transformer network. Available operations: 'd' for dropout, 'r' for residual connections and 'n' for normalization post_seq : str, default = "drn" Sequence that defined operations of the processing block in and after the main transformer network. Available operations: 'd' for dropout, 'r' for residual connections and 'n' for normalization epochs : int, default = 100 Number of epochs the model will be trained for batch_size : int, default = 64 Size of batches used during training num_batches_per_epoch : int, default = 50 Number of batches processed every epoch learning_rate : float, default = 1e-3, Learning rate used during training """ # TODO: Enable static and dynamic features gluonts_estimator_class: Type[GluonTSEstimator] = TransformerEstimator
class GenericGluonTSMXNetModel(AbstractGluonTSMXNetModel): """Generic wrapper model class for GluonTS models (in GluonTS terminology--- Estimators). While this class is meant to generally enable fast use of GluonTS models in autogluon, specific GluonTS models accessed through this wrapper may not have been tested and should be used at the user's own risk. Please refer to each GluonTS estimator's individual documentation for initialization parameters of each model. Parameters ---------- gluonts_estimator_class : Type[gluonts.model.estimator.Estimator] The class object of the GluonTS estimator to be used. """ def __init__(self, gluonts_estimator_class: Type[GluonTSEstimator], **kwargs): self.gluonts_estimator_class = gluonts_estimator_class gluonts_model_name = re.sub(r"Estimator$", "", self.gluonts_estimator_class.__name__) super().__init__(name=kwargs.pop("name", gluonts_model_name), **kwargs) def get_params(self) -> dict: params_dict = super().get_params() params_dict["gluonts_estimator_class"] = self.gluonts_estimator_class return params_dict def _get_estimator_init_args(self): init_kwargs = super()._get_estimator_init_args() if get_mxnet_context() != mx.context.cpu(): init_kwargs["hybridize"] = False return init_kwargs class GenericGluonTSMXNetModelFactory(AbstractTimeSeriesModelFactory): """Factory class for GenericGluonTSModel for convenience of use""" def __init__(self, gluonts_estimator_class: Type[GluonTSEstimator], **kwargs): self.gluonts_estimator_class = gluonts_estimator_class self.init_kwargs = kwargs def __call__(self, **kwargs): model_init_kwargs = self.init_kwargs.copy() model_init_kwargs.update(kwargs) return GenericGluonTSMXNetModel( gluonts_estimator_class=self.gluonts_estimator_class, **model_init_kwargs, )