Source code for autogluon.timeseries.models.gluonts.mx.models

import logging
import re
from typing import Callable, List, Type

import gluonts
import mxnet as mx

from autogluon.core.utils import warning_filter
from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
from autogluon.timeseries.models.abstract.abstract_timeseries_model import AbstractTimeSeriesModelFactory
from autogluon.timeseries.models.gluonts.abstract_gluonts import AbstractGluonTSModel

with warning_filter():
    from gluonts.model.estimator import Estimator as GluonTSEstimator
    from gluonts.dataset.field_names import FieldName
    from gluonts.mx.context import get_mxnet_context
    from gluonts.mx.model.deepar import DeepAREstimator
    from gluonts.mx.model.simple_feedforward import SimpleFeedForwardEstimator
    from gluonts.mx.model.transformer import TransformerEstimator
    from gluonts.mx.model.tft import TemporalFusionTransformerEstimator
    from gluonts.mx.model.seq2seq import MQCNNEstimator, MQRNNEstimator

from .callback import GluonTSEarlyStoppingCallback, TimeLimitCallback

logger = logging.getLogger(__name__)
gts_logger = logging.getLogger(gluonts.__name__)


class AbstractGluonTSMXNetModel(AbstractGluonTSModel):
    def _get_callbacks(self, time_limit: int, *args, **kwargs) -> List[Callable]:
        callbacks = [TimeLimitCallback(time_limit)]

        early_stopping_patience = self._get_model_params().get("early_stopping_patience", None)
        if early_stopping_patience:
            callbacks.append(GluonTSEarlyStoppingCallback(early_stopping_patience))

        return callbacks


[docs]class DeepARMXNetModel(AbstractGluonTSMXNetModel):
    """DeepAR model from GluonTS based on the MXNet backend.

    The model consists of an RNN encoder (LSTM or GRU) and a decoder that outputs the
    distribution of the next target value. Close to the model described in [Salinas2020]_.


    Based on `gluonts.mx.model.deepar.DeepAREstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.deepar.html>`_.
    See GluonTS documentation for additional hyperparameters.


    References
    ----------
    .. [Salinas2020] Salinas, David, et al.
        "DeepAR: Probabilistic forecasting with autoregressive recurrent networks."
        International Journal of Forecasting. 2020.


    Other Parameters
    ----------------
    context_length : int, optional
        Number of steps to unroll the RNN for before computing predictions
        (default: None, in which case context_length = prediction_length)
    disable_static_features : bool, default = False
        If True, static features won't be used by the model even if they are present in the dataset.
        If False, static features will be used by the model if they are present in the dataset.
    disable_known_covariates : bool, default = False
        If True, known covariates won't be used by the model even if they are present in the dataset.
        If False, known covariates will be used by the model if they are present in the dataset.
    num_layers : int, default = 2
        Number of RNN layers
    num_cells : int, default = 40
        Number of RNN cells for each layer
    cell_type : str, default = "lstm"
        Type of recurrent cells to use (available: 'lstm' or 'gru')
    dropoutcell_type : str, default = 'ZoneoutCell'
        Type of dropout cells to use
        (available: 'ZoneoutCell', 'RNNZoneoutCell', 'VariationalDropoutCell' or
        'VariationalZoneoutCell')
    dropout_rate : float, default = 0.1
        Dropout regularization parameter
    embedding_dimension : int, optional
        Dimension of the embeddings for categorical features
        (if None, defaults to [min(50, (cat+1)//2) for cat in cardinality])
    distr_output : gluonts.mx.DistributionOutput, default = StudentTOutput()
        Distribution to use to evaluate observations and sample predictions
    scaling: bool, default = True
        Whether to automatically scale the target values
    epochs : int, default = 100
        Number of epochs the model will be trained for
    batch_size : int, default = 64
        Size of batches used during training
    num_batches_per_epoch : int, default = 50
        Number of batches processed every epoch
    learning_rate : float, default = 1e-3,
        Learning rate used during training
    """

    gluonts_estimator_class: Type[GluonTSEstimator] = DeepAREstimator
    default_num_samples: int = 250

    def _get_estimator_init_args(self) -> dict:
        init_kwargs = super()._get_estimator_init_args()
        # Our API hides these model kwargs from the user. They can only be controlled through disable_static_features
        # and disable_known_covariates
        init_kwargs["use_feat_static_cat"] = self.num_feat_static_cat > 0
        init_kwargs["use_feat_static_real"] = self.num_feat_static_real > 0
        init_kwargs["cardinality"] = self.feat_static_cat_cardinality
        init_kwargs["use_feat_dynamic_real"] = self.num_feat_dynamic_real > 0
        return init_kwargs


class AbstractGluonTSSeq2SeqModel(AbstractGluonTSMXNetModel):
    """Abstract class for MQCNN and MQRNN which require hybridization to be turned off
    when fitting on the GPU.
    """

    gluonts_estimator_class: Type[GluonTSEstimator] = None

    def _get_estimator_init_args(self):
        init_kwargs = super()._get_estimator_init_args()
        if get_mxnet_context() != mx.context.cpu():
            init_kwargs["hybridize"] = False
        return init_kwargs


[docs]class MQCNNMXNetModel(AbstractGluonTSSeq2SeqModel):
    """MQCNN model from GluonTS.

    The model consists of a CNN encoder and a decoder that directly predicts the
    quantiles of the future target values' distribution. As described in [Wen2017]_.

    Based on `gluonts.mx.model.seq2seq.MQCNNEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.seq2seq.html#gluonts.mx.model.seq2seq.MQCNNEstimator>`_.
    See GluonTS documentation for additional hyperparameters.


    References
    ----------
    .. [Wen2017] Wen, Ruofeng, et al.
        "A multi-horizon quantile recurrent forecaster."
        arXiv preprint arXiv:1711.11053 (2017)


    Other Parameters
    ----------------
    context_length : int, optional
        Number of steps to unroll the RNN for before computing predictions
        (default: None, in which case context_length = prediction_length)
    disable_static_features : bool, default = False
        If True, static features won't be used by the model even if they are present in the dataset.
        If False, static features will be used by the model if they are present in the dataset.
    disable_known_covariates : bool, default = False
        If True, known covariates won't be used by the model even if they are present in the dataset.
        If False, known covariates will be used by the model if they are present in the dataset.
    embedding_dimension : int, optional
        Dimension of the embeddings for categorical features. (default: [min(50, (cat+1)//2) for cat in cardinality])
    add_time_feature : bool, default = True
        Adds a set of time features.
    add_age_feature : bool, default = False
        Adds an age feature.
        The age feature starts with a small value at the start of the time series and grows over time.
    decoder_mlp_dim_seq : List[int], default = [30]
        The dimensionalities of the Multi Layer Perceptron layers of the decoder.
    channels_seq : List[int], default = [30, 30, 30]
        The number of channels (i.e. filters or convolutions) for each layer of the HierarchicalCausalConv1DEncoder.
        More channels usually correspond to better performance and larger network size.
    dilation_seq : List[int], default = [1, 3, 5]
        The dilation of the convolutions in each layer of the HierarchicalCausalConv1DEncoder.
        Greater numbers correspond to a greater receptive field of the network, which is usually
        better with longer context_length. (Same length as channels_seq)
    kernel_size_seq : List[int], default = [7, 3, 3]
        The kernel sizes (i.e. window size) of the convolutions in each layer of the HierarchicalCausalConv1DEncoder.
        (Same length as channels_seq)
    use_residual : bool, default = True
        Whether the hierarchical encoder should additionally pass the unaltered
        past target to the decoder.
    quantiles : List[float], default = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
        The list of quantiles that will be optimized for, and predicted by, the model.
        Optimizing for more quantiles than are of direct interest to you can result
        in improved performance due to a regularizing effect.
    distr_output : gluonts.mx.DistributionOutput, optional
        DistributionOutput to use. Only one between `quantile` and `distr_output`
        can be set.
    scaling : bool, optional
        Whether to automatically scale the target values. (default: False if quantile_output is used,
        True otherwise)
    epochs : int, default = 100
        Number of epochs the model will be trained for
    batch_size : int, default = 64
        Size of batches used during training
    num_batches_per_epoch : int, default = 50
        Number of batches processed every epoch
    learning_rate : float, default = 1e-3,
        Learning rate used during training
    """

    gluonts_estimator_class: Type[GluonTSEstimator] = MQCNNEstimator

    def _get_estimator_init_args(self) -> dict:
        init_kwargs = super()._get_estimator_init_args()
        init_kwargs["use_feat_static_cat"] = self.num_feat_static_cat > 0
        init_kwargs["use_feat_static_real"] = self.num_feat_static_real > 0
        init_kwargs["cardinality"] = self.feat_static_cat_cardinality
        init_kwargs["use_feat_dynamic_real"] = self.num_feat_dynamic_real > 0
        return init_kwargs


[docs]class MQRNNMXNetModel(AbstractGluonTSSeq2SeqModel):
    """MQRNN model from GluonTS.

    The model consists of an RNN encoder and a decoder that directly predicts the
    quantiles of the future target values' distribution. As described in [Wen2017]_.

    Based on `gluonts.mx.model.seq2seq.MQRNNEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.seq2seq.html#gluonts.mx.model.seq2seq.MQRNNEstimator>`_.
    See GluonTS documentation for additional hyperparameters.


    References
    ----------
    .. [Wen2017] Wen, Ruofeng, et al.
        "A multi-horizon quantile recurrent forecaster."
        arXiv preprint arXiv:1711.11053 (2017)


    Other Parameters
    ----------------
    context_length : int, optional
        Number of steps to unroll the RNN for before computing predictions
        (default: None, in which case context_length = prediction_length)
    embedding_dimension : int, optional
        Dimension of the embeddings for categorical features. (default: [min(50, (cat+1)//2) for cat in cardinality])
    decoder_mlp_dim_seq : List[int], default = [30]
        The dimensionalities of the Multi Layer Perceptron layers of the decoder.
    quantiles : List[float], default = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
        The list of quantiles that will be optimized for, and predicted by, the model.
        Optimizing for more quantiles than are of direct interest to you can result
        in improved performance due to a regularizing effect.
    distr_output : gluonts.mx.DistributionOutput, optional
        DistributionOutput to use. Only one between `quantile` and `distr_output`
        can be set.
    scaling : bool, optional
        Whether to automatically scale the target values. (default: False if quantile_output is used,
        True otherwise)
    epochs : int, default = 100
        Number of epochs the model will be trained for
    batch_size : int, default = 64
        Size of batches used during training
    num_batches_per_epoch : int, default = 50
        Number of batches processed every epoch
    learning_rate : float, default = 1e-3,
        Learning rate used during training
    """

    gluonts_estimator_class: Type[GluonTSEstimator] = MQRNNEstimator


[docs]class SimpleFeedForwardMXNetModel(AbstractGluonTSMXNetModel):
    """SimpleFeedForward model from GluonTS based on the MXNet backend.

    The model consists of a multilayer perceptron (MLP) that predicts the distribution
    of the next target value.

    Based on `gluonts.mx.model.simple_feedforward.SimpleFeedForwardEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.simple_feedforward.html>`_.
    See GluonTS documentation for additional hyperparameters.

    Note that AutoGluon uses hyperparameters ``hidden_dim`` and ``num_layers`` instead of ``num_hidden_dimensions``
    used in GluonTS. This is done to ensure compatibility with Ray Tune.


    Other Parameters
    ----------------
    context_length : int, optional
        Number of time units that condition the predictions
        (default: None, in which case context_length = prediction_length)
    hidden_dim: int, default = 40
        Number of hidden units in each layer of the MLP
    num_layers : int, default = 2
        Number of hidden layers in the MLP
    distr_output : gluonts.mx.DistributionOutput, default = StudentTOutput()
        Distribution to fit
    batch_normalization : bool, default = False
        Whether to use batch normalization
    mean_scaling : bool, default = True
        Scale the network input by the data mean and the network output by
        its inverse
    epochs : int, default = 100
        Number of epochs the model will be trained for
    batch_size : int, default = 64
        Size of batches used during training
    num_batches_per_epoch : int, default = 50
        Number of batches processed every epoch
    learning_rate : float, default = 1e-3,
        Learning rate used during training
    """

    gluonts_estimator_class: Type[GluonTSEstimator] = SimpleFeedForwardEstimator

    def _get_estimator_init_args(self):
        init_kwargs = super()._get_estimator_init_args()
        # Workaround: Ray Tune doesn't support lists as hyperparameters, so we build `num_hidden_dimensions`
        # from `hidden_dim` and `num_layers`
        if "num_hidden_dimensions" in init_kwargs:
            logger.warning(
                f"Hyperparameter 'num_hidden_dimensions' is ignored by {self.name}. "
                f"Please use hyperparameters 'hidden_dim' and 'num_layers' instead."
            )
        hidden_dim = init_kwargs.pop("hidden_dim", 40)
        num_layers = init_kwargs.pop("num_layers", 2)
        init_kwargs["num_hidden_dimensions"] = [hidden_dim] * num_layers
        return init_kwargs


[docs]class TemporalFusionTransformerMXNetModel(AbstractGluonTSMXNetModel):
    """TemporalFusionTransformer model from GluonTS.

    The model combines an LSTM encoder, a transformer decoder, and directly predicts
    the quantiles of future target values. As described in [Lim2021]_.

    Based on `gluonts.mx.model.tft.TemporalFusionTransformerEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.tft.html>`_.
    See GluonTS documentation for additional hyperparameters.


    References
    ----------
    .. [Lim2021] Lim, Bryan, et al.
        "Temporal Fusion Transformers for Interpretable Multi-horizon Time Series Forecasting."
        International Journal of Forecasting. 2021.


    Other Parameters
    ----------------
    context_length : int or None, default = None
        Number of past values used for prediction.
        (default: None, in which case context_length = prediction_length)
    hidden_dim : int, default = 32
        Size of the hidden layer.
    num_heads : int, default = 4
        Number of attention heads in multi-head attention.
    dropout_rate : float, default = 0.1
        Dropout regularization parameter
    epochs : int, default = 100
        Number of epochs the model will be trained for
    batch_size : int, default = 64
        Size of batches used during training
    num_batches_per_epoch : int, default = 50
        Number of batches processed every epoch
    learning_rate : float, default = 1e-3,
        Learning rate used during training
    """

    gluonts_estimator_class: Type[GluonTSEstimator] = TemporalFusionTransformerEstimator
    supported_quantiles: set = set([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

    def _get_estimator_init_args(self) -> dict:
        init_kwargs = super()._get_estimator_init_args()
        if self.num_feat_static_real > 0:
            init_kwargs["static_feature_dims"] = {FieldName.FEAT_STATIC_REAL: self.num_feat_static_real}
        if self.num_feat_dynamic_real > 0:
            init_kwargs["dynamic_feature_dims"] = {FieldName.FEAT_DYNAMIC_REAL: self.num_feat_dynamic_real}

        # Turning off hybridization prevents MXNet errors when training on GPU
        init_kwargs["hybridize"] = False
        # TFT cannot handle arbitrary quantiles, this is a workaround
        init_kwargs["num_outputs"] = 9

        if not set(self.quantile_levels).issubset(self.supported_quantiles):
            raise ValueError(
                f"{self.name} requires that quantile_levels are a subset of "
                f"{self.supported_quantiles} (received quantile_levels = {self.quantile_levels})"
            )
        return init_kwargs

    def predict(self, data: TimeSeriesDataFrame, quantile_levels: List[float] = None, **kwargs) -> TimeSeriesDataFrame:
        if quantile_levels is not None and not set(quantile_levels).issubset(self.supported_quantiles):
            raise ValueError(
                f"{self.name} requires that quantile_levels are a subset of "
                f"{self.supported_quantiles} (received quantile_levels = {self.quantile_levels})"
            )
        return super().predict(data=data, quantile_levels=quantile_levels, **kwargs)


[docs]class TransformerMXNetModel(AbstractGluonTSMXNetModel):
    """Autoregressive transformer forecasting model from GluonTS.

    The model consists of an Transformer encoder and a decoder that outputs the
    distribution of the next target value. The transformer architecture is close to the
    one described in [Vaswani2017]_.

    Based on `gluonts.mx.model.transformer.TransformerEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.transformer.html>`_.
    See GluonTS documentation for additional hyperparameters.


    References
    ----------
    .. [Vaswani2017] Vaswani, Ashish, et al. "Attention is all you need."
        Advances in neural information processing systems. 2017.


    Other Parameters
    ----------------
    context_length : int, optional
        Number of steps to unroll the RNN for before computing predictions
        (default: None, in which case context_length = prediction_length)
    model_dim : int, default = 32
        Dimension of the transformer network, i.e., embedding dimension of the
        input
    dropout_rate : float, default = 0.1
        Dropout regularization parameter
    distr_output : gluonts.mx.DistributionOutput, default = StudentTOutput()
        Distribution to use to evaluate observations and sample predictions
    inner_ff_dim_scale : int, default = 4
        Dimension scale of the inner hidden layer of the transformer's
        feedforward network
    pre_seq : str, default = "dn"
        Sequence that defined operations of the processing block before the
        main transformer network. Available operations: 'd' for dropout, 'r'
        for residual connections and 'n' for normalization
    post_seq : str, default = "drn"
        Sequence that defined operations of the processing block in and after
        the main transformer network. Available operations: 'd' for
        dropout, 'r' for residual connections and 'n' for normalization
    epochs : int, default = 100
        Number of epochs the model will be trained for
    batch_size : int, default = 64
        Size of batches used during training
    num_batches_per_epoch : int, default = 50
        Number of batches processed every epoch
    learning_rate : float, default = 1e-3,
        Learning rate used during training
    """

    # TODO: Enable static and dynamic features
    gluonts_estimator_class: Type[GluonTSEstimator] = TransformerEstimator


class GenericGluonTSMXNetModel(AbstractGluonTSMXNetModel):
    """Generic wrapper model class for GluonTS models (in GluonTS terminology---
    Estimators). While this class is meant to generally enable fast use of GluonTS
    models in autogluon, specific GluonTS models accessed through this wrapper may
    not have been tested and should be used at the user's own risk.

    Please refer to each GluonTS estimator's individual documentation for
    initialization parameters of each model.

    Parameters
    ----------
    gluonts_estimator_class : Type[gluonts.model.estimator.Estimator]
        The class object of the GluonTS estimator to be used.
    """

    def __init__(self, gluonts_estimator_class: Type[GluonTSEstimator], **kwargs):
        self.gluonts_estimator_class = gluonts_estimator_class
        gluonts_model_name = re.sub(r"Estimator$", "", self.gluonts_estimator_class.__name__)

        super().__init__(name=kwargs.pop("name", gluonts_model_name), **kwargs)

    def get_params(self) -> dict:
        params_dict = super().get_params()
        params_dict["gluonts_estimator_class"] = self.gluonts_estimator_class
        return params_dict

    def _get_estimator_init_args(self):
        init_kwargs = super()._get_estimator_init_args()
        if get_mxnet_context() != mx.context.cpu():
            init_kwargs["hybridize"] = False
        return init_kwargs


class GenericGluonTSMXNetModelFactory(AbstractTimeSeriesModelFactory):
    """Factory class for GenericGluonTSModel for convenience of use"""

    def __init__(self, gluonts_estimator_class: Type[GluonTSEstimator], **kwargs):
        self.gluonts_estimator_class = gluonts_estimator_class
        self.init_kwargs = kwargs

    def __call__(self, **kwargs):
        model_init_kwargs = self.init_kwargs.copy()
        model_init_kwargs.update(kwargs)
        return GenericGluonTSMXNetModel(
            gluonts_estimator_class=self.gluonts_estimator_class,
            **model_init_kwargs,
        )