--- title: Introduction keywords: fastai sidebar: home_sidebar nb_path: "examples/Getting_Started_with_Auto_Arima_and_ETS.ipynb" ---
%%capture
!pip install -U numba
!pip install -U statsmodels
!pip install statsforecast
import numpy as np
import pandas as pd
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
from statsforecast import StatsForecast
from statsforecast.models import auto_arima, ets
from statsforecast.utils import AirPassengers
#from statsforecast.models import __all__
#__all__
# We use the index functionality to make the training a lot faster.
Y_df = pd.DataFrame({'unique_id': np.ones(len(AirPassengers)),
'ds': pd.date_range(start='1949-01-01',
periods=len(AirPassengers), freq='M'),
'y': AirPassengers})
Y_df.tail(13)
ETS: The exponential smoothing (ETS) algorithm is especially suited for data with seasonality and trend. ETS computes a weighted average over all observations in the input time series dataset as its prediction. In contrast to moving average methods with constant weights, ETS weights exponentially decrease over time, capturing long term dependencies while prioritizing new observations.
AutoARIMA: The autoregressive integrated moving average (ARIMA), combines differencing steps, lag regression and moving averages into a single method capable of modeling non-stationary time series. This method complements on ETS and it is based on the description of data's autocorrelations.
Y_train_df = Y_df[Y_df.ds<='1959-12-31']
Y_test_df = Y_df[Y_df.ds>'1959-12-31']
print('len(Y_train_df)', len(Y_train_df))
print('len(Y_test_df)', len(Y_test_df))
# Note: For all models the following parameters are passed automaticly and
# don't need to be declared: (X, h, future_xreg).
# for ets we pass a ZMZ, model, which stands for error selected optimally,
season_length = 12
horizon = len(Y_test_df)
model = StatsForecast(Y_train_df.set_index('unique_id'),
models=[(auto_arima, season_length),
(ets, season_length, 'ZMZ')],
freq='M', n_jobs=-1)
# In this step, you could include further models like: ses, adida, historic_average,
# croston_classic, croston_sba, croston_optimized, seasonal_window_average, seasonal_naive,
# imapa naive, random_walk_with_drift, window_average, seasonal_exponential_smoothing and tsb.
# For some models like ARIMA, include confidence intervals
# For the moment confidence intervals for ETS are unavailable
Y_hat_df = model.forecast(horizon).reset_index()
Y_hat_df.head()
fig, ax = plt.subplots(1, 1, figsize = (20, 7))
plot_df = pd.concat([Y_train_df, Y_hat_df]).set_index('ds')
plot_df[['y',
'auto_arima_season_length-12',
'ets_season_length-12_model-ZMZ']].plot(ax=ax, linewidth=2)
ax.set_title('AirPassengers Forecast', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Timestamp [t]', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()
Finally, we evaluate the predictions accuracy using the Mean Absolute Error:
$\qquad MAE = \frac{1}{Horizon} \sum_{\tau} |y_{\tau} - \hat{y}_{\tau}|\qquad$
def mae(y_hat, y_true):
return np.mean(np.abs(y_hat-y_true))
y_true = Y_test_df['y'].values
ets_preds = Y_hat_df['ets_season_length-12_model-ZMZ'].values
arima_preds = Y_hat_df['auto_arima_season_length-12'].values
print('ETS MAE: %0.3f' % mae(ets_preds, y_true))
print('ARIMA MAE: %0.3f' % mae(arima_preds, y_true))
# as folloows
Y_hat_df_intervals = model.forecast(h=12, level=(80, 95))
fig, ax = plt.subplots(1, 1, figsize = (20, 7))
df_plot = pd.concat([Y_train_df, Y_hat_df_intervals]).set_index('ds')
df_plot[['y', 'auto_arima_season_length-12_mean','ets_season_length-12_model-ZMZ']].plot(ax=ax, linewidth=2)
ax.fill_between(df_plot.index,
df_plot['auto_arima_season_length-12_lo-80'],
df_plot['auto_arima_season_length-12_hi-80'],
alpha=.35,
color='orange',
label='auto_arima_level_80')
ax.fill_between(df_plot.index,
df_plot['auto_arima_season_length-12_lo-95'],
df_plot['auto_arima_season_length-12_hi-95'],
alpha=.2,
color='orange',
label='auto_arima_level_95')
ax.set_title('AirPassengers Forecast', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Timestamp [t]', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontsize(20)
Y_train_df['trend'] = np.arange(1, len(Y_train_df) + 1)
Y_train_df['intercept'] = np.ones(len(Y_train_df))
Y_train_df['month'] = Y_train_df['ds'].dt.month
Y_train_df = pd.get_dummies(Y_train_df, columns=['month'], drop_first=True)
Y_train_df
xreg_test = pd.DataFrame({
'unique_id': 1,
'ds': pd.date_range(start='1960-01-01', periods=len(Y_hat_df), freq='M')
})
# We construct xreg for test. The train series ends at the 133th step.
xreg_test['trend'] = np.arange(133, len(Y_hat_df) + 133)
xreg_test['intercept'] = np.ones(len(Y_hat_df))
xreg_test['month'] = xreg_test['ds'].dt.month
xreg_test = pd.get_dummies(xreg_test, columns=['month'], drop_first=True)
xreg_test
season_length = 12
# Note: For all models the following parameters are passed automaticly and don't need to be declared: (X, h, future_xreg)
model = StatsForecast(
Y_train_df.set_index('unique_id'),
models=[(auto_arima, season_length), (ets, season_length, 'ZMZ')],
freq='M',
n_jobs=-1
)
Y_hat_df_xreg = model.forecast(horizon, xreg=xreg_test.set_index('unique_id'))
Y_hat_df_xreg = Y_hat_df_xreg.reset_index()
fig, ax = plt.subplots(1, 1, figsize = (20, 7))
df_plot = pd.concat([Y_train_df, Y_hat_df_xreg]).set_index('ds')
df_plot[['y', 'auto_arima_season_length-12','ets_season_length-12_model-ZMZ']].plot(ax=ax, linewidth=2)
ax.set_title('AirPassengers Forecast (with AutoArima external regressors)', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Timestamp [t]', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontsize(20)
from statsforecast.models import seasonal_naive, naive
#Define the parameters that you want to use in your models.
season_length = 12
# Note: For all models the following parameters are passed automaticly and don't need to be declared: (X, h, future_xreg)
model = StatsForecast(
Y_train_df.set_index('unique_id'),
models=[(auto_arima, season_length), (ets, season_length, 'ZMZ'),
(seasonal_naive, season_length), naive],
freq='M',
n_jobs=-1
)
Y_hat_df_bench = model.forecast(horizon, xreg=xreg_test.set_index('unique_id'))
Y_hat_df_bench = Y_hat_df_bench.reset_index()
Y_hat_df_bench
fig, ax = plt.subplots(1, 1, figsize = (20, 7))
df_plot = pd.concat([Y_train_df, Y_hat_df_bench]).set_index('ds')
df_plot[['y', 'auto_arima_season_length-12','ets_season_length-12_model-ZMZ', 'seasonal_naive_season_length-12', 'naive']].plot(ax=ax, linewidth=2)
ax.set_title('AirPassengers Forecast (with AutoArima external regressors)', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Timestamp [t]', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontsize(20)