--- title: WRAPPER keywords: fastai sidebar: home_sidebar nb_path: "nbs/models_rnn__rnn.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class RNN[source]

RNN(input_size:int, output_size:int, n_x:int=0, n_s:int=0, sample_freq:int=1, cell_type:str='LSTM', state_hsize:int=50, dilations:List[List[int]]=[[1, 2], [4, 8]], add_nl_layer:bool=False, learning_rate:float=0.001, lr_scheduler_step_size:int=1000, lr_decay:float=0.9, gradient_eps:float=1e-08, gradient_clipping_threshold:float=20.0, weight_decay:float=0.0, noise_std:float=0.001, loss_train:str='MAE', loss_valid:str='MAE', loss_hypar:float=0.0, frequency:str='D', random_seed:int=1) :: LightningModule

Hooks to be used in LightningModule.

{% endraw %} {% raw %}
{% endraw %} {% raw %}

RNN.forecast[source]

RNN.forecast(Y_df, X_df=None, S_df=None, batch_size=1, trainer=None)

Method for forecasting self.output_size periods after last timestamp of Y_df.

Parameters

Y_df: pd.DataFrame Dataframe with target time-series data, needs 'unique_id','ds' and 'y' columns. X_df: pd.DataFrame Dataframe with exogenous time-series data, needs 'unique_id' and 'ds' columns. Note that 'unique_id' and 'ds' must match Y_df plus the forecasting horizon. S_df: pd.DataFrame Dataframe with static data, needs 'unique_id' column. bath_size: int Batch size for forecasting.

Returns

forecast_df: pd.DataFrame Dataframe with forecasts.

{% endraw %} {% raw %}
{% endraw %}

RNN Example

{% raw %}
import matplotlib.pyplot as plt

from neuralforecast.data.datasets.epf import EPF, EPFInfo

import torch as t
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping

from neuralforecast.data.tsdataset import TimeSeriesDataset
from neuralforecast.data.tsloader import TimeSeriesLoader
from neuralforecast.losses.utils import LossFunction
{% endraw %} {% raw %}
import pandas as pd
from neuralforecast.data.datasets.epf import EPF
from neuralforecast.data.tsloader import TimeSeriesLoader

import pylab as plt
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
plt.rcParams['font.family'] = 'serif'

FONTSIZE = 19

# Load and plot data
Y_df, X_df, S_df = EPF.load_groups(directory='./data', groups=['FR'])
Y_df_2, X_df_2, S_df_2 = EPF.load_groups(directory='./data', groups=['NP'])
Y_df_2['ds'] = Y_df['ds']
X_df_2['ds'] = X_df['ds']
Y_df = Y_df.append(Y_df_2).reset_index(drop=True)
X_df = X_df.append(X_df_2).reset_index(drop=True)
S_df = S_df.append(S_df_2).reset_index(drop=True)
{% endraw %} {% raw %}
X_df = X_df[['unique_id', 'ds', 'week_day']]

# Trimming series to avoid slow backprop through time
Y_df = Y_df.groupby('unique_id').tail(60*24+7*24).reset_index(drop=True)
X_df = X_df.groupby('unique_id').tail(60*24+7*24).reset_index(drop=True)

Y_df['y'] = Y_df['y']/Y_df['y'].max()

train_dataset = TimeSeriesDataset(Y_df=Y_df, X_df=X_df,
                                  ds_in_test=7*24,
                                  is_test=False,
                                  input_size=1*24,
                                  output_size=24,
                                  verbose=True)

valid_dataset = TimeSeriesDataset(Y_df=Y_df, X_df=X_df,
                                  ds_in_test=7*24,
                                  is_test=True,
                                  input_size=1*24,
                                  output_size=24,
                                  verbose=True)

train_loader = TimeSeriesLoader(dataset=train_dataset,
                                batch_size=2,
                                shuffle=True)

valid_loader = TimeSeriesLoader(dataset=valid_dataset,
                                batch_size=2,
                                shuffle=False)
INFO:root:Train Validation splits

INFO:root:                              ds                    
                             min                 max
unique_id sample_mask                               
FR        0           2016-12-25 2016-12-31 23:00:00
          1           2016-10-26 2016-12-24 23:00:00
NP        0           2016-12-25 2016-12-31 23:00:00
          1           2016-10-26 2016-12-24 23:00:00
INFO:root:
Total data 			3216 time stamps 
Available percentage=100.0, 	3216 time stamps 
Insample  percentage=89.55, 	2880 time stamps 
Outsample percentage=10.45, 	336 time stamps 

INFO:root:Train Validation splits

INFO:root:                              ds                    
                             min                 max
unique_id sample_mask                               
FR        0           2016-10-26 2016-12-24 23:00:00
          1           2016-12-25 2016-12-31 23:00:00
NP        0           2016-10-26 2016-12-24 23:00:00
          1           2016-12-25 2016-12-31 23:00:00
INFO:root:
Total data 			3216 time stamps 
Available percentage=100.0, 	3216 time stamps 
Insample  percentage=10.45, 	336 time stamps 
Outsample percentage=89.55, 	2880 time stamps 

{% endraw %} {% raw %}
model = RNN(# Architecture parameters
    n_s=train_dataset.n_s,
    n_x=train_dataset.n_x,
    input_size=3*train_dataset.input_size,
    output_size=train_dataset.output_size,
    sample_freq=train_dataset.output_size,
    cell_type='LSTM',
    state_hsize=50,
    dilations=[[1, 2, 4, 8]],
    add_nl_layer=False,
    # Regularization and optimization parameters
    learning_rate=1e-2,
    lr_scheduler_step_size=333,
    lr_decay=0.8,
    gradient_eps=1e-8,
    gradient_clipping_threshold=10,
    weight_decay=0,
    noise_std=0.0001,
    loss_train='MAE',
    loss_valid='MAE',
    frequency='H',
    random_seed=1
)
{% endraw %} {% raw %}
early_stopping = EarlyStopping(monitor="val_loss", 
                               min_delta=1e-4, 
                               patience=3, verbose=True, 
                               mode="min")

trainer = pl.Trainer(max_epochs=10, progress_bar_refresh_rate=1, 
                     log_every_n_steps=100, check_val_every_n_epoch=100,
                     callbacks=[early_stopping])
trainer.fit(model, train_loader, valid_loader)
/Users/cchallu/opt/anaconda3/envs/neuralforecast/lib/python3.7/site-packages/pytorch_lightning/trainer/connectors/callback_connector.py:91: LightningDeprecationWarning: Setting `Trainer(progress_bar_refresh_rate=1)` is deprecated in v1.5 and will be removed in v1.7. Please pass `pytorch_lightning.callbacks.progress.TQDMProgressBar` with `refresh_rate` directly to the Trainer's `callbacks` argument instead. Or, to disable the progress bar pass `enable_progress_bar = False` to the Trainer.
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"

  | Name  | Type | Params
-------------------------------
0 | model | _RNN | 106 K 
-------------------------------
106 K     Trainable params
0         Non-trainable params
106 K     Total params
0.426     Total estimated model params size (MB)
                                                              
/Users/cchallu/opt/anaconda3/envs/neuralforecast/lib/python3.7/site-packages/pytorch_lightning/trainer/data_loading.py:133: UserWarning: The dataloader, val_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 12 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
/Users/cchallu/opt/anaconda3/envs/neuralforecast/lib/python3.7/site-packages/pytorch_lightning/trainer/data_loading.py:133: UserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 12 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
/Users/cchallu/opt/anaconda3/envs/neuralforecast/lib/python3.7/site-packages/pytorch_lightning/trainer/data_loading.py:433: UserWarning: The number of training samples (1) is smaller than the logging interval Trainer(log_every_n_steps=100). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.42it/s, v_num=293, train_loss_step=0.0192, train_loss_epoch=0.0192]
{% endraw %} {% raw %}
outputs = trainer.predict(model, valid_loader)

y_true, y_hat, sample_mask = zip(*outputs)
y_true = t.cat(y_true).cpu()
y_hat = t.cat(y_hat).cpu()
sample_mask = t.cat(sample_mask).cpu()

print("Original")
print("y_true.shape", y_true.shape)
print("y_hat.shape", y_hat.shape)

y_true = y_true.flatten(1,2)
y_hat = y_hat.flatten(1,2)
sample_mask = sample_mask.flatten(1,2)

print("\nFlatten")
print("y_true.shape", y_true.shape)
print("y_hat.shape", y_hat.shape)
print("sample_mask.shape", sample_mask.shape)
/Users/cchallu/opt/anaconda3/envs/neuralforecast/lib/python3.7/site-packages/pytorch_lightning/trainer/data_loading.py:133: UserWarning: The dataloader, predict_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 12 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
Predicting: 100%|██████████| 1/1 [00:00<?, ?it/s]
Original
y_true.shape torch.Size([2, 7, 24])
y_hat.shape torch.Size([2, 7, 24])

Flatten
y_true.shape torch.Size([2, 168])
y_hat.shape torch.Size([2, 168])
sample_mask.shape torch.Size([2, 168])
{% endraw %} {% raw %}
start = 0
end = 7 * 24

fig = plt.figure(figsize=(15, 6))

plt.plot(y_true[0], color='#628793', linewidth=1, label='true')
plt.plot(y_hat[0], color='peru', linewidth=1, label='forecast')
plt.ylabel('Price [EUR/MWh]', fontsize=15)
plt.xlabel('Date', fontsize=15)
plt.legend()
plt.grid()
plt.show()
{% endraw %}

Forecast

{% raw %}
Y_forecast_df = Y_df[Y_df['ds']<'2016-11-26'].reset_index(drop=True)
Y_forecast_df.tail()
unique_id ds y
1483 NP 2016-11-25 19:00:00 0.056452
1484 NP 2016-11-25 20:00:00 0.053729
1485 NP 2016-11-25 21:00:00 0.052803
1486 NP 2016-11-25 22:00:00 0.053169
1487 NP 2016-11-25 23:00:00 0.050526
{% endraw %} {% raw %}
X_forecast_df = X_df[X_df['ds']<'2016-11-27'].reset_index(drop=True)
X_forecast_df.tail()
unique_id ds week_day
1531 NP 2016-11-26 19:00:00 0
1532 NP 2016-11-26 20:00:00 0
1533 NP 2016-11-26 21:00:00 0
1534 NP 2016-11-26 22:00:00 0
1535 NP 2016-11-26 23:00:00 0
{% endraw %} {% raw %}
forecast_df = model.forecast(Y_df=Y_forecast_df, X_df=X_forecast_df, S_df=S_df, batch_size=2)
INFO:root:Train Validation splits

INFO:root:                              ds                    
                             min                 max
unique_id sample_mask                               
FR        0           2016-10-26 2016-11-25 23:00:00
          1           2016-11-26 2016-11-26 23:00:00
NP        0           2016-10-26 2016-11-25 23:00:00
          1           2016-11-26 2016-11-26 23:00:00
INFO:root:
Total data 			1536 time stamps 
Available percentage=100.0, 	1536 time stamps 
Insample  percentage=3.12, 	48 time stamps 
Outsample percentage=96.88, 	1488 time stamps 

/Users/cchallu/opt/anaconda3/envs/neuralforecast/lib/python3.7/site-packages/pytorch_lightning/trainer/connectors/callback_connector.py:91: LightningDeprecationWarning: Setting `Trainer(progress_bar_refresh_rate=1)` is deprecated in v1.5 and will be removed in v1.7. Please pass `pytorch_lightning.callbacks.progress.TQDMProgressBar` with `refresh_rate` directly to the Trainer's `callbacks` argument instead. Or, to disable the progress bar pass `enable_progress_bar = False` to the Trainer.
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
/Users/cchallu/opt/anaconda3/envs/neuralforecast/lib/python3.7/site-packages/pytorch_lightning/trainer/data_loading.py:133: UserWarning: The dataloader, predict_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 12 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
Predicting: 100%|██████████| 1/1 [00:00<00:00, 179.47it/s]
{% endraw %} {% raw %}
plt.plot(Y_df[Y_df['unique_id']=='FR']['y'][-24:].values)
plt.plot(forecast_df['y'].values[:24])
[<matplotlib.lines.Line2D at 0x7f80a18aee90>]
{% endraw %} {% raw %}
plt.plot(Y_df[Y_df['unique_id']=='NP']['y'][-24:].values)
plt.plot(forecast_df['y'].values[24:])
[<matplotlib.lines.Line2D at 0x7f805162fdd0>]
{% endraw %}