--- title: WRAPPER keywords: fastai sidebar: home_sidebar nb_path: "nbs/models_rnn__rnn.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class RNN[source]

RNN(input_size:int, output_size:int, n_x:int=0, n_s:int=0, sample_freq:int=1, cell_type:str='LSTM', state_hsize:int=50, dilations:List[List[int]]=[[1, 2], [4, 8]], add_nl_layer:bool=False, learning_rate:float=0.001, lr_scheduler_step_size:int=1000, lr_decay:float=0.9, gradient_eps:float=1e-08, gradient_clipping_threshold:float=20.0, weight_decay:float=0.0, noise_std:float=0.001, loss_train:str='MAE', loss_valid:str='MAE', loss_hypar:float=0.0, frequency:str='D', random_seed:int=1) :: LightningModule

Hooks to be used in LightningModule.

{% endraw %} {% raw %}
{% endraw %} {% raw %}

RNN.forecast[source]

RNN.forecast(Y_df, X_df=None, S_df=None, batch_size=1, trainer=None)

Method for forecasting self.output_size periods after last timestamp of Y_df.

Parameters

Y_df: pd.DataFrame Dataframe with target time-series data, needs 'unique_id','ds' and 'y' columns. X_df: pd.DataFrame Dataframe with exogenous time-series data, needs 'unique_id' and 'ds' columns. Note that 'unique_id' and 'ds' must match Y_df plus the forecasting horizon. S_df: pd.DataFrame Dataframe with static data, needs 'unique_id' column. bath_size: int Batch size for forecasting.

Returns

forecast_df: pd.DataFrame Dataframe with forecasts.

{% endraw %} {% raw %}
{% endraw %}

RNN Example

{% raw %}
import matplotlib.pyplot as plt

from neuralforecast.data.datasets.epf import EPF, EPFInfo

import torch as t
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping

from neuralforecast.data.tsdataset import TimeSeriesDataset
from neuralforecast.data.tsloader import TimeSeriesLoader
from neuralforecast.losses.utils import LossFunction
{% endraw %} {% raw %}
import pandas as pd
from neuralforecast.data.datasets.epf import EPF
from neuralforecast.data.tsloader import TimeSeriesLoader

import pylab as plt
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
plt.rcParams['font.family'] = 'serif'

FONTSIZE = 19

# Load and plot data
Y_df, X_df, S_df = EPF.load_groups(directory='./data', groups=['FR'])
Y_df_2, X_df_2, S_df_2 = EPF.load_groups(directory='./data', groups=['NP'])
Y_df_2['ds'] = Y_df['ds']
X_df_2['ds'] = X_df['ds']
Y_df = Y_df.append(Y_df_2).reset_index(drop=True)
X_df = X_df.append(X_df_2).reset_index(drop=True)
S_df = S_df.append(S_df_2).reset_index(drop=True)
{% endraw %} {% raw %}
X_df = X_df[['unique_id', 'ds', 'week_day']]

# Trimming series to avoid slow backprop through time
Y_df = Y_df.groupby('unique_id').tail(60*24+7*24).reset_index(drop=True)
X_df = X_df.groupby('unique_id').tail(60*24+7*24).reset_index(drop=True)

Y_df['y'] = Y_df['y']/Y_df['y'].max()

train_dataset = TimeSeriesDataset(Y_df=Y_df, X_df=X_df,
                                  ds_in_test=7*24,
                                  is_test=False,
                                  input_size=1*24,
                                  output_size=24,
                                  verbose=True)

valid_dataset = TimeSeriesDataset(Y_df=Y_df, X_df=X_df,
                                  ds_in_test=7*24,
                                  is_test=True,
                                  input_size=1*24,
                                  output_size=24,
                                  verbose=True)

train_loader = TimeSeriesLoader(dataset=train_dataset,
                                batch_size=2,
                                shuffle=True)

valid_loader = TimeSeriesLoader(dataset=valid_dataset,
                                batch_size=2,
                                shuffle=False)
{% endraw %} {% raw %}
model = RNN(# Architecture parameters
    n_s=train_dataset.n_s,
    n_x=train_dataset.n_x,
    input_size=3*train_dataset.input_size,
    output_size=train_dataset.output_size,
    sample_freq=train_dataset.output_size,
    cell_type='LSTM',
    state_hsize=50,
    dilations=[[1, 2, 4, 8]],
    add_nl_layer=False,
    # Regularization and optimization parameters
    learning_rate=1e-2,
    lr_scheduler_step_size=333,
    lr_decay=0.8,
    gradient_eps=1e-8,
    gradient_clipping_threshold=10,
    weight_decay=0,
    noise_std=0.0001,
    loss_train='MAE',
    loss_valid='MAE',
    frequency='H',
    random_seed=1
)
{% endraw %} {% raw %}
early_stopping = EarlyStopping(monitor="val_loss", 
                               min_delta=1e-4, 
                               patience=3, verbose=True, 
                               mode="min")

trainer = pl.Trainer(max_epochs=10, progress_bar_refresh_rate=1, 
                     log_every_n_steps=100, check_val_every_n_epoch=100,
                     callbacks=[early_stopping])
trainer.fit(model, train_loader, valid_loader)
{% endraw %} {% raw %}
outputs = trainer.predict(model, valid_loader)

y_true, y_hat, sample_mask = zip(*outputs)
y_true = t.cat(y_true).cpu()
y_hat = t.cat(y_hat).cpu()
sample_mask = t.cat(sample_mask).cpu()

print("Original")
print("y_true.shape", y_true.shape)
print("y_hat.shape", y_hat.shape)
{% endraw %} {% raw %}
start = 0
end = 7 * 24

fig = plt.figure(figsize=(15, 6))

plt.plot(y_true[0], color='#628793', linewidth=1, label='true')
plt.plot(y_hat[0], color='peru', linewidth=1, label='forecast')
plt.ylabel('Price [EUR/MWh]', fontsize=15)
plt.xlabel('Date', fontsize=15)
plt.legend()
plt.grid()
plt.show()
{% endraw %}

Forecast

{% raw %}
Y_forecast_df = Y_df[Y_df['ds']<'2016-11-26'].reset_index(drop=True)
Y_forecast_df.tail()
{% endraw %} {% raw %}
X_forecast_df = X_df[X_df['ds']<'2016-11-27'].reset_index(drop=True)
X_forecast_df.tail()
{% endraw %} {% raw %}
forecast_df = model.forecast(Y_df=Y_forecast_df, X_df=X_forecast_df, S_df=S_df, batch_size=2)
{% endraw %} {% raw %}
plt.plot(Y_df[Y_df['unique_id']=='FR']['y'][-24:].values)
plt.plot(forecast_df['y'].values[:24])
{% endraw %} {% raw %}
plt.plot(Y_df[Y_df['unique_id']=='NP']['y'][-24:].values)
plt.plot(forecast_df['y'].values[24:])
{% endraw %}