Source code for autogluon.timeseries.models.statsmodels.models

import logging
from typing import List

import numpy as np
import pandas as pd
from statsmodels.tsa.exponential_smoothing.ets import ETSModel as StatsmodelsETS
from statsmodels.tsa.forecasting.theta import ThetaModel as StatsmodelsTheta
from statsmodels.tsa.statespace.sarimax import SARIMAX as StatsmodelsSARIMAX

from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
from autogluon.timeseries.utils.seasonality import get_seasonality
from autogluon.timeseries.utils.warning_filters import statsmodels_warning_filter

from .abstract_statsmodels import AbstractStatsmodelsModel, FittedLocalModel

logger = logging.getLogger(__name__)


[docs]class ETSModel(AbstractStatsmodelsModel): """Exponential smoothing with trend and seasonality. Based on `statsmodels.tsa.exponential_smoothing.ets.ETSModel <https://www.statsmodels.org/stable/generated/statsmodels.tsa.exponential_smoothing.ets.ETSModel.html>`_. Our implementation contains several improvements over the Statsmodels version, such as multi-CPU training and reducing the disk usage when saving models. Other Parameters ---------------- error : {"add", "mul"}, default = "add" Error model. Allowed values are "add" (additive) and "mul" (multiplicative). Note that "mul" is only applicable to time series with positive values. trend : {"add", "mul", None}, default = "add" Trend component model. Allowed values are "add" (additive), "mul" (multiplicative) and None (disabled). Note that "mul" is only applicable to time series with positive values. damped_trend : bool, default = False Whether or not the included trend component is damped. seasonal : {"add", "mul", None}, default = "add" Seasonal component model. Allowed values are "add" (additive), "mul" (multiplicative) and None (disabled). Note that "mul" is only applicable to time series with positive values. seasonal_period : int or None, default = None Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a weekly cycle or 12 for monthly data with an annual cycle. When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be specified manually by providing an integer > 1. If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled. Seasonality will also be disabled, if the length of the time series is < 2 * seasonal_period. maxiter : int, default = 1000 Number of iterations during optimization. n_jobs : int or float, default = 0.5 Number of CPU cores used to fit the models in parallel. When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used. When set to a positive integer, that many cores are used. When set to -1, all CPU cores are used. """ quantile_method_name = "pred_int" statsmodels_allowed_init_args = [ "error", "trend", "damped_trend", "seasonal", "seasonal_period", ] statsmodels_allowed_fit_args = [ "maxiter", ] def _update_sm_model_init_args(self, sm_model_init_args: dict, data: TimeSeriesDataFrame) -> dict: sm_model_init_args = sm_model_init_args.copy() sm_model_init_args["freq"] = data.freq sm_model_init_args.setdefault("trend", "add") # Infer seasonal_period if seasonal_period is not given / is set to None seasonal_period = sm_model_init_args.pop("seasonal_period", None) if seasonal_period is None: seasonal_period = get_seasonality(data.freq) sm_model_init_args["seasonal_periods"] = seasonal_period seasonal = sm_model_init_args.setdefault("seasonal", "add") # Disable seasonality if seasonal_period is too short if seasonal is not None and seasonal_period <= 1: logger.warning( f"{self.name} with seasonal = {seasonal} requires seasonal_period > 1 " f"(received seasonal_period = {seasonal_period}). Disabling seasonality." ) sm_model_init_args["seasonal"] = None sm_model_init_args["seasonal_periods"] = 1 return sm_model_init_args def _fit_local_model( self, timeseries: pd.Series, sm_model_init_args: dict, sm_model_fit_args: dict ) -> FittedLocalModel: # Disable seasonality if timeseries is too short for given seasonal_period if sm_model_init_args["seasonal"] is not None and len(timeseries) < 2 * sm_model_init_args["seasonal_periods"]: sm_model_init_args = sm_model_init_args.copy() sm_model_init_args["seasonal"] = None with statsmodels_warning_filter(): model = StatsmodelsETS(endog=timeseries, **sm_model_init_args) fit_result = model.fit(full_output=False, disp=False, **sm_model_fit_args) # Only save the parameters of the trained model, not the model itself parameters = dict(zip(fit_result.param_names, fit_result.params)) return FittedLocalModel(model_name=self.name, sm_model_init_args=sm_model_init_args, parameters=parameters) def _predict_with_local_model( self, timeseries: pd.Series, fitted_model: FittedLocalModel, quantile_levels: List[float] ) -> pd.DataFrame: assert fitted_model.model_name == self.name with statsmodels_warning_filter(): base_model = StatsmodelsETS(endog=timeseries, **fitted_model.sm_model_init_args) parameters = np.array(list(fitted_model.parameters.values())) # This is a hack that allows us to set the parameters to their estimated values & initialize the model sm_model = base_model.fit(start_params=parameters, maxiter=0, disp=False) return self._get_predictions_from_statsmodels_model( sm_model=sm_model, cutoff=timeseries.index.max(), quantile_levels=quantile_levels, freq=fitted_model.sm_model_init_args["freq"], )
[docs]class ARIMAModel(AbstractStatsmodelsModel): """Autoregressive Integrated Moving Average (ARIMA) model. Based on `statsmodels.tsa.statespace.sarimax.SARIMAX <https://www.statsmodels.org/stable/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html>`_. Our implementation contains several improvements over the Statsmodels version, such as multi-CPU training and reducing the disk usage when saving models. Other Parameters ---------------- order: Tuple[int, int, int], default = (1, 1, 1) The (p, d, q) order of the model for the number of AR parameters, differences, and MA parameters to use. seasonal_order: Tuple[int, int, int], default = (0, 0, 0) The (P, D, Q) parameters of the seasonal ARIMA model. Setting to (0, 0, 0) disables seasonality. seasonal_period : int or None, default = None Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a weekly cycle or 12 for monthly data with an annual cycle. When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be specified manually by providing an integer > 1. If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled. enforce_stationarity : bool, default = True Whether to transform the AR parameters to enforce stationarity in the autoregressive component of the model. If ARIMA crashes during fitting with an LU decomposition error, you can either set enforce_stationarity to False or increase the differencing parameter ``d`` in ``order``. enforce_invertibility : bool, default = True Whether to transform the MA parameters to enforce invertibility in the moving average component of the model. maxiter : int, default = 50 Number of iterations during optimization. n_jobs : int or float, default = 0.5 Number of CPU cores used to fit the models in parallel. When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used. When set to a positive integer, that many cores are used. When set to -1, all CPU cores are used. """ quantile_method_name = "conf_int" statsmodels_allowed_init_args = [ "order", "seasonal_order", "seasonal_period", "enforce_stationarity", "enforce_invertibility", ] statsmodels_allowed_fit_args = [ "maxiter", ] def _update_sm_model_init_args(self, sm_model_init_args: dict, data: TimeSeriesDataFrame) -> dict: sm_model_init_args = sm_model_init_args.copy() sm_model_init_args["freq"] = data.freq sm_model_init_args["trend"] = "c" sm_model_init_args.setdefault("enforce_stationarity", True) sm_model_init_args.setdefault("order", (1, 1, 1)) sm_model_init_args.setdefault("maxiter", 50) # Infer seasonal_period if seasonal_period is not given / is set to None seasonal_period = sm_model_init_args.pop("seasonal_period", None) if seasonal_period is None: seasonal_period = get_seasonality(data.freq) seasonal_order = sm_model_init_args.pop("seasonal_order", (0, 0, 0)) seasonal_order_is_valid = len(seasonal_order) == 3 and all(isinstance(p, int) for p in seasonal_order) if not seasonal_order_is_valid: raise ValueError( f"{self.name} can't interpret received seasonal_order {seasonal_order} as a " "tuple with 3 nonnegative integers (P, D, Q)." ) # Disable seasonality if seasonal_period is too short if seasonal_period <= 1: sm_model_init_args["seasonal_order"] = (0, 0, 0, 0) else: sm_model_init_args["seasonal_order"] = tuple(seasonal_order) + (seasonal_period,) return sm_model_init_args def _fit_local_model( self, timeseries: pd.Series, sm_model_init_args: dict, sm_model_fit_args: dict ) -> FittedLocalModel: with statsmodels_warning_filter(): model = StatsmodelsSARIMAX(endog=timeseries, **sm_model_init_args) fit_result = model.fit(disp=False, **sm_model_fit_args) # Only save the parameters of the trained model, not the model itself parameters = dict(fit_result.params.iteritems()) return FittedLocalModel(model_name=self.name, sm_model_init_args=sm_model_init_args, parameters=parameters) def _predict_with_local_model( self, timeseries: pd.Series, fitted_model: FittedLocalModel, quantile_levels: List[float] ) -> pd.DataFrame: assert fitted_model.model_name == self.name parameters = np.array(list(fitted_model.parameters.values())) with statsmodels_warning_filter(): base_model = StatsmodelsSARIMAX(endog=timeseries, **fitted_model.sm_model_init_args) # This is a hack that allows us to set the parameters to their estimated values & initialize the model sm_model = base_model.fit(start_params=parameters, maxiter=0, disp=False) return self._get_predictions_from_statsmodels_model( sm_model=sm_model, cutoff=timeseries.index.max(), quantile_levels=quantile_levels, freq=fitted_model.sm_model_init_args["freq"], )
[docs]class ThetaModel(AbstractStatsmodelsModel): """The Theta forecasting model of Assimakopoulos and Nikolopoulos (2000). Based on `statsmodels.tsa.forecasting.theta.ThetaModel <https://www.statsmodels.org/stable/generated/statsmodels.tsa.forecasting.theta.ThetaModel.html>`_. Our implementation contains several improvements over the Statsmodels version, such as multi-CPU training and reducing the disk usage when saving models. Other Parameters ---------------- deseasonalize : bool, default = True Whether to deseasonalize the data. If True and use_test is True, the data is only deseasonalized if the null hypothesis of no seasonality is rejected. seasonal_period : int or None, default = None Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a weekly cycle or 12 for monthly data with an annual cycle. When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be specified manually by providing an integer > 1. If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled. Seasonality will also be disabled, if the length of the time series is < 2 * seasonal_period. use_test : bool, default = True Whether to use a statistical test for determining if the seasonality is present. method : {"auto", "additive", "multiplicative"}, default = "auto" The model used for the seasonal decomposition. "auto" uses multiplicative if the time series is non-negative and all estimated seasonal components are positive. If either of these conditions is False, then it uses an additive decomposition. difference : bool, default = False Whether to difference the data before testing for seasonality. n_jobs : int or float, default = 0.5 Number of CPU cores used to fit the models in parallel. When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used. When set to a positive integer, that many cores are used. When set to -1, all CPU cores are used. """ statsmodels_allowed_init_args = [ "deseasonalize", "seasonal_period", "use_test", "method", "difference", ] statsmodels_allowed_fit_args = [] def _update_sm_model_init_args(self, sm_model_init_args: dict, data: TimeSeriesDataFrame) -> dict: sm_model_init_args = sm_model_init_args.copy() sm_model_init_args.setdefault("deseasonalize", True) # Infer seasonal_period if seasonal_period is not given / is set to None seasonal_period = sm_model_init_args.pop("seasonal_period", None) if seasonal_period is None: seasonal_period = get_seasonality(data.freq) sm_model_init_args["period"] = seasonal_period return sm_model_init_args def _fit_local_model( self, timeseries: pd.Series, sm_model_init_args: dict, sm_model_fit_args: dict ) -> FittedLocalModel: # ThetaModel in statsmodels doesn't provide a way to initialize the model with trained parameters, # so we delegate training to `_predict_with_local_model` if sm_model_init_args["deseasonalize"] and len(timeseries) < 2 * sm_model_init_args["period"]: sm_model_init_args = sm_model_init_args.copy() sm_model_init_args["deseasonalize"] = False return FittedLocalModel(model_name=self.name, sm_model_init_args=sm_model_init_args, parameters=None) def _predict_with_local_model( self, timeseries: pd.Series, fitted_model: FittedLocalModel, quantile_levels: List[float] ) -> pd.DataFrame: assert fitted_model.model_name == self.name timeseries.index.freq = self.freq with statsmodels_warning_filter(): base_model = StatsmodelsTheta(endog=timeseries, **fitted_model.sm_model_init_args) sm_model = base_model.fit(disp=False) # The API is inconsistent with ETS/ARIMA, so we access the predictions differently here results = [sm_model.forecast(self.prediction_length).rename("mean")] for q in quantile_levels: if q < 0.5: coverage = 2 * q column_index = 0 else: coverage = 2 * (1 - q) column_index = 1 quantile_pred = sm_model.prediction_intervals(steps=self.prediction_length, alpha=coverage) # Select lower bound of the confidence interval if q < 0.5, upper bound otherwise results.append(quantile_pred.iloc[:, column_index].rename(str(q))) return pd.concat(results, axis=1)