--- title: Core keywords: fastai sidebar: home_sidebar nb_path: "nbs/core.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
from fastcore.test import test_eq

from statsforecast.models import (
    adida,
    croston_classic,
    historic_average,
    naive,
    seasonal_naive,
    seasonal_window_average,
    ses,
    auto_arima
)
from statsforecast.utils import generate_series
{% endraw %} {% raw %}

class StatsForecast[source]

StatsForecast(df, models, freq, n_jobs=1)

{% endraw %} {% raw %}
{% endraw %}

Daily data

{% raw %}
fcst = StatsForecast(
    series,
    [adida, (ses, 0.1), historic_average, croston_classic],
    freq='D',
)
res = fcst.forecast(14)
res
ds adida ses_alpha-0.1 historic_average croston_classic
unique_id
0 2000-08-10 157.559219 157.559219 161.040253 157.559219
0 2000-08-11 157.559219 157.559219 161.040253 157.559219
0 2000-08-12 157.559219 157.559219 161.040253 157.559219
0 2000-08-13 157.559219 157.559219 161.040253 157.559219
0 2000-08-14 157.559219 157.559219 161.040253 157.559219
... ... ... ... ... ...
9999 2000-06-27 87.646744 87.646744 78.274399 87.646744
9999 2000-06-28 87.646744 87.646744 78.274399 87.646744
9999 2000-06-29 87.646744 87.646744 78.274399 87.646744
9999 2000-06-30 87.646744 87.646744 78.274399 87.646744
9999 2000-07-01 87.646744 87.646744 78.274399 87.646744

140000 rows × 5 columns

{% endraw %}

Monthly data

{% raw %}
monthly_series = generate_series(10_000, freq='M', min_length=10, max_length=20, equal_ends=True)
monthly_series
ds y
unique_id
0 2000-06-30 0.317078
0 2000-07-31 1.183993
0 2000-08-31 2.458650
0 2000-09-30 3.396637
0 2000-10-31 4.160418
... ... ...
9999 2001-04-30 7.087452
9999 2001-05-31 8.106541
9999 2001-06-30 9.162334
9999 2001-07-31 10.052648
9999 2001-08-31 11.327798

150180 rows × 2 columns

{% endraw %} {% raw %}
fcst = StatsForecast(
    monthly_series,
    [adida, (ses, 0.1), historic_average, croston_classic],
    freq='M',
)
%time monthly_res = fcst.forecast(4)
monthly_res
CPU times: user 5.29 s, sys: 4.27 ms, total: 5.29 s
Wall time: 5.29 s
ds adida ses_alpha-0.1 historic_average croston_classic
unique_id
0 2001-09-30 4.069533 4.101110 4.895469 4.101110
0 2001-10-31 4.069533 4.101110 4.895469 4.101110
0 2001-11-30 4.069533 4.101110 4.895469 4.101110
0 2001-12-31 4.069533 4.101110 4.895469 4.101110
1 2001-09-30 6.407427 5.631709 6.588876 5.631709
... ... ... ... ... ...
9998 2001-12-31 4.352780 5.783700 5.083481 5.783700
9999 2001-09-30 7.166113 7.166113 6.789791 7.166113
9999 2001-10-31 7.166113 7.166113 6.789791 7.166113
9999 2001-11-30 7.166113 7.166113 6.789791 7.166113
9999 2001-12-31 7.166113 7.166113 6.789791 7.166113

40000 rows × 5 columns

{% endraw %}

Integer datestamp

{% raw %}
from statsforecast.utils import AirPassengers as ap
{% endraw %} {% raw %}
int_ds_df = pd.DataFrame({'ds': np.arange(1, len(ap) + 1), 'y': ap})
int_ds_df.insert(0, 'unique_id', 'AirPassengers')
int_ds_df.set_index('unique_id', inplace=True)
int_ds_df.head()
ds y
unique_id
AirPassengers 1 112.0
AirPassengers 2 118.0
AirPassengers 3 132.0
AirPassengers 4 129.0
AirPassengers 5 121.0
{% endraw %} {% raw %}
int_ds_df.tail()
ds y
unique_id
AirPassengers 140 606.0
AirPassengers 141 508.0
AirPassengers 142 461.0
AirPassengers 143 390.0
AirPassengers 144 432.0
{% endraw %} {% raw %}
fcst = StatsForecast(int_ds_df, models=[historic_average], freq='D')
horizon = 7
forecast = fcst.forecast(horizon)
forecast.head()
ds historic_average
unique_id
AirPassengers 145 280.298615
AirPassengers 146 280.298615
AirPassengers 147 280.298615
AirPassengers 148 280.298615
AirPassengers 149 280.298615
{% endraw %} {% raw %}
last_date = int_ds_df['ds'].max()
test_eq(forecast['ds'].values, np.arange(last_date + 1, last_date + 1 + horizon))
{% endraw %}

External regressors

Every column after y is considered an external regressor and will be passed to the models that allow them. If you use them you must supply the future values to the forecast method.

{% raw %}
def linear_regression(X, h, future_xreg):
    y = X[:, 0]
    xreg = X[:, 1:]
    coefs, *_ = np.linalg.lstsq(xreg, y, rcond=None)
    return future_xreg @ coefs
{% endraw %} {% raw %}
series_xreg = series = generate_series(10_000, equal_ends=True)
series_xreg['intercept'] = 1
series_xreg['dayofweek'] = series_xreg['ds'].dt.dayofweek
series_xreg = pd.get_dummies(series_xreg, columns=['dayofweek'], drop_first=True)
series_xreg
ds y intercept dayofweek_1 dayofweek_2 dayofweek_3 dayofweek_4 dayofweek_5 dayofweek_6
unique_id
0 2000-10-05 0.123838 1 0 0 1 0 0 0
0 2000-10-06 1.460113 1 0 0 0 1 0 0
0 2000-10-07 2.418577 1 0 0 0 0 1 0
0 2000-10-08 3.372232 1 0 0 0 0 0 1
0 2000-10-09 4.241948 1 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ...
9999 2001-05-10 2.338823 1 0 0 1 0 0 0
9999 2001-05-11 3.402687 1 0 0 0 1 0 0
9999 2001-05-12 4.235569 1 0 0 0 0 1 0
9999 2001-05-13 5.210396 1 0 0 0 0 0 1
9999 2001-05-14 6.238846 1 0 0 0 0 0 0

2769354 rows × 9 columns

{% endraw %} {% raw %}
dates = sorted(series_xreg['ds'].unique())
valid_start = dates[-14]
train_mask = series_xreg['ds'] < valid_start
series_train = series_xreg[train_mask]
series_valid = series_xreg[~train_mask]
X_valid = series_valid.drop(columns=['y'])
fcst = StatsForecast(
    series_train,
    [linear_regression],
    freq='D',
)
%time xreg_res = fcst.forecast(14, xreg=X_valid)
xreg_res['y'] = series_valid['y'].values
CPU times: user 1.21 s, sys: 40 ms, total: 1.25 s
Wall time: 1.21 s
{% endraw %} {% raw %}
xreg_res.groupby('ds').mean().plot();
{% endraw %}

Confidence intervals

{% raw %}
ap_df = pd.DataFrame({'ds': np.arange(ap.size), 'y': ap}, index=pd.Index([0] * ap.size, name='unique_id'))
fcst = StatsForecast(
    ap_df,
    [(seasonal_naive, 12), (auto_arima, 12)],
    freq='M',
)
ap_ci = fcst.forecast(12, level=(80, 95))
ap_ci.set_index('ds').plot(marker='.', figsize=(10, 6));
{% endraw %}