Source code for tsfeast.splitter

"""
Time Series Windows Module.

*Note* These classes split data into n, equal-length, sliding training/test windows.  This differs
from Scikit-Learn's TimeSeriesSplit implementation where windows are accumulated:

Scikit-Learn
-------------
Win 0   |----|
Win 1   |--------|
Win2    |------------|

TimeSeriesWindows
-----------------
Win 0   |----|--------
Win 1   -|----|-------
Win2    --|----|------
"""
from typing import List, Optional

import pandas as pd

# pylint: disable=missing-docstring


[docs]class TimeSeriesWindows: def __init__(self, train_length: int, test_length: int, gap_length: int = 0) -> None: self.train_length = train_length self.test_length = test_length self.gap_length = gap_length
[docs] def split(self, y: pd.DataFrame, x: pd.DataFrame) -> List[pd.DataFrame]: windows = [] for i in range(len(x)): train_start = i train_end = i + self.train_length test_start = train_end + self.gap_length test_end = test_start + self.test_length if test_end <= len(x): x_train = x.iloc[train_start:train_end] y_train = y.iloc[train_start:train_end] x_test = x.iloc[test_start:test_end] y_test = y.iloc[test_start:test_end] split = x_train, x_test, y_train, y_test windows.append(split) return windows
[docs]class EndogSeriesWindows(TimeSeriesWindows): def __init__( self, min_train_length: int, test_length: int, max_train_length: Optional[int] = None, gap_length: int = 0) -> None: super().__init__(min_train_length, test_length, gap_length) self.min_train_length = min_train_length self.max_train_length = max_train_length
[docs] def split(self, y: pd.DataFrame, x=None) -> List[pd.DataFrame]: windows = [] for i in range(self.min_train_length, len(y)): if i + self.test_length <= len(y): if self.max_train_length is not None: train_start = i - self.max_train_length else: train_start = 0 train_end = i if self.gap_length is not None: test_start = train_end + self.gap_length else: test_start = train_end test_end = test_start + self.test_length y_train = y.iloc[train_start:train_end] y_test = y.iloc[test_start:test_end] split = y_train, y_test windows.append(split) return windows