--- title: Examples keywords: fastai sidebar: home_sidebar nb_path: "nbs/auto.ipynb" ---
The forecasting task we selected is to predict the number of patients with influenza-like illnesses from the US CDC dataset, the dataset contains 7 target variables, and has 966 weeks of history.
We will be creating point forecasts with N-BEATS, N-HiTS and RNN models. The predictive features will be the autoregressive features. More information on the dataset can be found in the N-HiTS paper.
Table of Contents
#!pip install neuralforecast
#!pip install matplotlib
import matplotlib.pyplot as plt
import neuralforecast as nf
from neuralforecast.data.datasets.long_horizon import LongHorizon
Y_df, _, _ = LongHorizon.load(directory='./', group='ILI')
Y_df.head()
n_series = len(Y_df.unique_id.unique())
n_time = len(Y_df.ds.unique()) # dataset is balanced
n_ts_test = 193
n_ts_val = 97
print('n_time', n_time)
print('n_series', n_series)
print('n_ts_test', n_ts_test)
print('n_ts_val', n_ts_val)
# 'AGE 5-24', 'ILITOTAL', 'NUM. OF PROVIDERS', 'OT']
y_plot = Y_df[Y_df.unique_id=='% WEIGHTED ILI'].y.values
x_plot = pd.to_datetime(Y_df[Y_df.unique_id=='% WEIGHTED ILI'].ds).values
plt.plot(x_plot, y_plot)
plt.axvline(x_plot[n_time-n_ts_val-n_ts_test], color='black', linestyle='-.')
plt.axvline(x_plot[n_time-n_ts_test], color='black', linestyle='-.')
plt.ylabel('Weighted ILI [ratio]')
plt.xlabel('Date')
plt.grid()
plt.show()
plt.close()
auto_nhits = NHITS(n_time_out=24)
auto_nhits.space['max_steps'] = hp.choice('max_steps', [1]) # Override max_steps for faster example
auto_nhits.fit(Y_df=Y_df, X_df=None, S_df=None, hyperopt_steps=2,
n_ts_val=n_ts_val,
n_ts_test=n_ts_test,
results_dir='./results/autonhits',
save_trials=True,
loss_function_val=nf.losses.numpy.mae,
loss_functions_test={'mae':nf.losses.numpy.mae,
'mse':nf.losses.numpy.mse},
return_test_forecast=True,
verbose=False)
forecasts = auto_nhits.forecast(Y_df=Y_df)
forecasts
auto_nbeats = NBEATS(n_time_out=24)
auto_nbeats.space['max_steps'] = hp.choice('max_steps', [1]) # Override max_steps for faster example
auto_nbeats.fit(Y_df=Y_df, X_df=None, S_df=None, hyperopt_steps=2,
n_ts_val=n_ts_val,
n_ts_test=n_ts_test,
results_dir='./results/autonbeats',
save_trials=True,
loss_function_val=nf.losses.numpy.mae,
loss_functions_test={'mae':nf.losses.numpy.mae,
'mse':nf.losses.numpy.mse},
return_test_forecast=True,
verbose=False)
forecasts = auto_nbeats.forecast(Y_df=Y_df)
forecasts
auto_rnn = RNN(n_time_out=24)
auto_rnn.space['max_steps'] = hp.choice('max_steps', [1]) # Override max_steps for faster example
auto_rnn.fit(Y_df=Y_df, X_df=None, S_df=None, hyperopt_steps=2,
n_ts_val=n_ts_val,
n_ts_test=n_ts_test,
results_dir='./results/autornn',
save_trials=True,
loss_function_val=nf.losses.numpy.mae,
loss_functions_test={'mae':nf.losses.numpy.mae,
'mse':nf.losses.numpy.mse},
return_test_forecast=True,
verbose=False)
forecasts = auto_rnn.forecast(Y_df=Y_df)
forecasts
A temporal train-evaluation split procedure allows us to estimate the model’s generalization performance on future data unseen by the model. We use the train set to optimize the model parameters, and the validation and test sets to evaluate the accuracy of the model’s predictions.
In this case we set the space to None
, that implicitly uses the predefined model space, but the space can be specified as a dictionary following the conventions of the Hyperopt package.
forecast_horizon = 24
nhits_space_dict = nhits_space(n_time_out=forecast_horizon)
nhits_space_dict['max_steps'] = hp.choice('max_steps', [10])
nhits_space_dict['max_epochs'] = hp.choice('max_epochs', [None])
nbeats_space_dict = nbeats_space(n_time_out=forecast_horizon)
nbeats_space_dict['max_steps'] = hp.choice('max_steps', [10])
nbeats_space_dict['max_epochs'] = hp.choice('max_epochs', [None])
rnn_space_dict = rnn_space(n_time_out=forecast_horizon)
rnn_space_dict['max_steps'] = hp.choice('max_steps', [10])
rnn_space_dict['max_epochs'] = hp.choice('max_epochs', [None])
config_dict = {'nbeats': dict(space=nhits_space_dict, hyperopt_steps=3), # Use space=None for default dict
'nhits': dict(space=nbeats_space_dict, hyperopt_steps=3), # Use space=None for default dict
'rnn': dict(space=rnn_space_dict, hyperopt_steps=3) # Use space=None for default dict
}
A temporal train-validation-test (676,97,193) split procedure allows us to estimate the model’s generalization performance on future data unseen by the model. We use the train set to optimize the model parameters, and the validation and test sets to evaluate the accuracy of the model’s predictions.
forecast_horizon = 24
model = AutoNF(config_dict=config_dict, n_time_out=forecast_horizon)
model.fit(Y_df=Y_df, X_df=None, S_df=None,
loss_function_val=nf.losses.numpy.mae,
loss_functions_test={'mae':nf.losses.numpy.mae,
'mse':nf.losses.numpy.mse},
n_ts_val=n_ts_val,
n_ts_test=n_ts_test,
results_dir='./results/auto',
return_forecasts=True,
verbose=False)
print('MAE')
print('NHITS:\t', model.results_dict['nhits']['best_val_loss'])
print('NBEATS:\t', model.results_dict['nbeats']['best_val_loss'])
print('RNN:\t', model.results_dict['rnn']['best_val_loss'])
time = model.results_dict['nbeats']['optimization_times']
losses = model.results_dict['nbeats']['optimization_losses']
plt.plot(time, losses)
plt.xlabel('segs')
plt.ylabel('val loss')
plt.grid()
plt.show()
Here we wrangle the numpy predictions to evaluate and plot the predictions.
assert((model.results_dict['nbeats']['y_true'] == model.results_dict['rnn']['y_true']).mean() == 1)
y_hat_nhits = model.results_dict['nhits']['y_hat']
y_hat_nbeats = model.results_dict['nbeats']['y_hat']
y_hat_rnn = model.results_dict['rnn']['y_hat']
y_true = model.results_dict['nbeats']['y_true']
print('\n Shapes')
print('1. y_hat_nhits.shape (N,T,H) \t', y_hat_nhits.shape)
print('1. y_hat_nbeats.shape (N,T,H)\t', y_hat_nbeats.shape)
print('1. y_hat_rnn.shape (N,T,H)\t', y_hat_rnn.shape)
print('1. y_true.shape (N,T,H)\t\t', y_true.shape)
w_idx = 0
u_idx = 0
plt.plot(y_true[u_idx,w_idx,:], label='True Signal')
plt.plot(y_hat_nbeats[u_idx,w_idx,:], label='N-BEATS')
plt.plot(y_hat_nhits[u_idx,w_idx,:], label='N-HiTS')
plt.plot(y_hat_rnn[u_idx,w_idx,:], label='RNN')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Weighted ILI [ratio]')
plt.grid()
plt.show()
model.best_model