--- title: Core keywords: fastai sidebar: home_sidebar nb_path: "nbs/core.ipynb" ---
from fastcore.test import test_eq, test_fail
from statsforecast.models import (
adida,
auto_arima,
croston_classic,
croston_optimized,
croston_sba,
ets,
historic_average,
imapa,
naive,
random_walk_with_drift,
seasonal_exponential_smoothing,
seasonal_naive,
seasonal_window_average,
ses,
tsb,
window_average,
)
from statsforecast.utils import generate_series
fcst = StatsForecast(
series,
[naive, adida, croston_classic, croston_optimized,
croston_sba, historic_average, imapa, naive,
random_walk_with_drift, (seasonal_exponential_smoothing, 7, 0.1),
(seasonal_naive, 7), (seasonal_window_average, 7, 4),
(ses, 0.1), (tsb, 0.1, 0.3), (window_average, 4)],
freq='D',
)
res = fcst.forecast(14)
res
def test_cv_fitted(n_jobs=1):
resids_fcst = StatsForecast(
series_cv,
[sum_ahead, naive],
freq='D',
n_jobs=n_jobs
)
resids_res_cv = resids_fcst.cross_validation(h=2, n_windows=4, fitted=True)
resids_cv = resids_fcst.cross_validation_fitted_values()
for uid in resids_cv.index.unique():
for cutoff in resids_cv.loc[uid]['cutoff'].unique():
pd.testing.assert_frame_equal(
resids_cv.loc[uid].query('cutoff == @cutoff')[['ds', 'y']],
series_cv.query('ds <= @cutoff & unique_id == @uid')[['ds', 'y']],
check_dtype=False
)
test_cv_fitted()
series_eq_ends = generate_series(10_000, equal_ends=True)
fcst = StatsForecast(
series_eq_ends,
[adida, croston_classic, croston_optimized,
croston_sba, historic_average, imapa, naive,
random_walk_with_drift, (seasonal_exponential_smoothing, 7, 0.1),
(seasonal_naive, 7), (seasonal_window_average, 7, 4),
(ses, 0.1), (tsb, 0.1, 0.3), (window_average, 4)],
freq='D',
)
res = fcst.forecast(14)
res
try: from nbdev.imports import IN_NOTEBOOK
except: IN_NOTEBOOK=False
if __name__=="__main__" and not IN_NOTEBOOK:
fcst = StatsForecast(
series,
[adida, (ses, 0.1), historic_average, croston_classic],
freq='D',
n_jobs=2
)
res = fcst.forecast(14)
res_cv = fcst.cross_validation(h=3, test_size=10, n_windows=None)
print(res)
print(res_cv)
fcst = StatsForecast(
series_cv,
[sum_ahead],
freq='D',
)
res_cv = fcst.cross_validation(h=2, test_size=5, n_windows=None)
test_eq(0., np.mean(res_cv['y'] - res_cv['sum_ahead']))
test_fcst_fitted(n_jobs=2)
test_cv_fitted(n_jobs=2)
# check n_windows argument
n_windows = fcst.cross_validation(h=2, n_windows=2).groupby('unique_id').size().unique()
test_eq(n_windows, 2 * 2)
test_eq(0., np.mean(res_cv['y'] - res_cv['sum_ahead']))
# check step_size argument
n_windows = fcst.cross_validation(h=3, n_windows=3, step_size=3).groupby('unique_id').size().unique()
test_eq(n_windows, 3 * 3)
test_eq(0., np.mean(res_cv['y'] - res_cv['sum_ahead']))
monthly_series = generate_series(10_000, freq='M', min_length=10, max_length=20, equal_ends=True)
monthly_series
fcst = StatsForecast(
monthly_series,
[adida, (ses, 0.1), historic_average, croston_classic],
freq='M'
)
%time monthly_res = fcst.forecast(4)
monthly_res
from statsforecast.utils import AirPassengers as ap
int_ds_df = pd.DataFrame({'ds': np.arange(1, len(ap) + 1), 'y': ap})
int_ds_df.insert(0, 'unique_id', 'AirPassengers')
int_ds_df.set_index('unique_id', inplace=True)
int_ds_df.head()
int_ds_df.tail()
fcst = StatsForecast(int_ds_df, models=[historic_average], freq='D')
horizon = 7
forecast = fcst.forecast(horizon)
forecast.head()
last_date = int_ds_df['ds'].max()
test_eq(forecast['ds'].values, np.arange(last_date + 1, last_date + 1 + horizon))
int_ds_cv = fcst.cross_validation(h=7, test_size=8, n_windows=None)
int_ds_cv
Every column after y is considered an external regressor and will be passed to the models that allow them. If you use them you must supply the future values to the forecast
method.
def linear_regression(X, h, future_xreg, residuals):
y = X[:, 0]
xreg = X[:, 1:]
coefs, *_ = np.linalg.lstsq(xreg, y, rcond=None)
return {'mean': future_xreg @ coefs}
series_xreg = series = generate_series(10_000, equal_ends=True)
series_xreg['intercept'] = 1
series_xreg['dayofweek'] = series_xreg['ds'].dt.dayofweek
series_xreg = pd.get_dummies(series_xreg, columns=['dayofweek'], drop_first=True)
series_xreg
dates = sorted(series_xreg['ds'].unique())
valid_start = dates[-14]
train_mask = series_xreg['ds'] < valid_start
series_train = series_xreg[train_mask]
series_valid = series_xreg[~train_mask]
X_valid = series_valid.drop(columns=['y'])
fcst = StatsForecast(
series_train,
[linear_regression],
freq='D',
)
%time xreg_res = fcst.forecast(14, xreg=X_valid)
xreg_res['y'] = series_valid['y'].values
xreg_res.groupby('ds').mean().plot()
xreg_res_cv = fcst.cross_validation(h=3, test_size=5, n_windows=None)
ap_df = pd.DataFrame({'ds': np.arange(ap.size), 'y': ap}, index=pd.Index([0] * ap.size, name='unique_id'))
fcst = StatsForecast(
ap_df,
[(seasonal_naive, 12), (auto_arima, 12)],
freq='M',
)
ap_ci = fcst.forecast(12, level=(80, 95))
ap_ci.set_index('ds').plot(marker='.', figsize=(10, 6));
ap_df_2 = pd.DataFrame(
{'ds': np.hstack([np.arange(ap.size), np.arange(ap.size)]),
'y': np.hstack([ap, ap])},
index=pd.Index([0] * ap.size + [1] * ap.size, name='unique_id')
)
if __name__=="__main__" and not IN_NOTEBOOK:
ap_df = pd.DataFrame({'ds': np.arange(ap.size), 'y': ap}, index=pd.Index([0] * ap.size, name='unique_id'))
fcst = StatsForecast(
ap_df,
[(seasonal_naive, 12), (auto_arima, 12)],
freq='M',
n_jobs=101
)
ap_ci = fcst.forecast(12, level=(80, 95))
ap_ci.set_index('ds').plot(marker='.', figsize=(10, 6))