"""Time series feature generators as Scikit-Learn compatible transformers."""fromitertoolsimportcombinationsfromtypingimportList,Optionalimportnumpyasnpimportpandasaspdfromsklearn.baseimportBaseEstimator,TransformerMixinfromsklearn.preprocessingimportPolynomialFeatures,StandardScalerfromsklearn.utils.validationimportcheck_is_fittedfromtsfeast.funcsimport(get_change_features,get_datetime_features,get_difference_features,get_ewma_features,get_lag_features,get_rolling_features,)fromtsfeast.utilsimportData,array_to_dataframe
[docs]deftransform(self,X:Data,y=None)->Data:""" Transform fitted data. Parameters ---------- X: array of shape [n_samples, n_features] The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Array-like object of transformed data. Notes ----- Scikit-Learn Pipelines only call the `.transform()` method during the `.predict()` method, which is appropriate to prevent data leakage in predictions. However, most of the transformers in this module take a set of features and generate new features; there's no inherent method to transform some timeseries features given a fitted estimator. For time series lags, changes, etc., we have access to past data for feature generation without risk of data leakage; certain features (e.g. lags) require this to avoid NaNs or zeros. We append new X to our original features and transform on entire dataset, keeping only the last n rows. Appropriate for time series transformations, only. """ifisinstance(X,np.ndarray):X=array_to_dataframe(X)ifhasattr(self,'input_features_'):rows=X.shape[0]X=pd.concat([self.input_features_,X])# pylint: disable=E0203self.output_features_=self._transform(X,y).iloc[-rows:,:]ifself.fillna:returnself.output_features_.fillna(0)returnself.output_features_self.input_features_:pd.DataFrame=Xself.n_features_in_=X.shape[0]self.output_features_=self._transform(X,y)self.feature_names_=self.output_features_.columnsifself.fillna:returnself.output_features_.fillna(0)returnself.output_features_
[docs]defget_feature_names(self)->List[str]:"""Get list of feature names."""check_is_fitted(self)returnlist(self.feature_names_)
def_transform(self,X:pd.DataFrame,y=None)->pd.DataFrame:""" Transform input data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """raiseNotImplementedError
[docs]deffit(self,X:Data,y=None)->"BaseTransformer":""" Fit transformer object to data. Parameters ---------- X: array of shape [n_samples, n_features] The input samples. y: None Not used; included for compatibility, only. Returns ------- BaseTransformer Self. """_,_=X,yreturnself
[docs]classOriginalFeatures(BaseTransformer):"""Return original features."""def_transform(self,X:pd.DataFrame,y=None)->Data:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """returnX
[docs]classScaler(BaseTransformer):"""Wrap StandardScaler to maintain column names."""
[docs]deffit(self,X:pd.DataFrame,y=None)->"Scaler":""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """self.scaler.fit(X)returnself
[docs]deftransform(self,X:pd.DataFrame,y=None)->Data:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """self.feature_names_=X.columnsreturnpd.DataFrame(self.scaler.transform(X),columns=X.columns,index=X.index)
[docs]definverse_transform(self,X:pd.DataFrame,copy:bool=True)->pd.DataFrame:""" Transform scaled data into original feature space. Parameters ---------- X: pd.DataFrame The input samples. copy: bool Default True; if False, try to avoid a copy and do inplace scaling instead. Returns ------- Data Data in original feature space. """returnpd.DataFrame(self.scaler.inverse_transform(X,copy=copy),columns=self.feature_names_,index=X.index)
[docs]def__init__(self,date_col:Optional[str]=None,dt_format:Optional[str]=None,freq:Optional[str]=None):""" Instantiate transformer object. date_col: Optional[str] Column name containing date/timestamp. dt_format: Optional[str] Date/timestamp format, e.g. `%Y-%m-%d` for `2020-01-31`. """super().__init__()self.date_col=date_colself.dt_format=dt_formatself.freq=freq
[docs]deffit(self,X:Data,y=None)->"DateTimeFeatures":_=yifisinstance(X,pd.DataFrame):dates=X[self.date_col]elifisinstance(X,pd.Series):dates=Xelse:raiseValueError('`data` must be a DataFrame or Series.')ifnotself.freq:self.freq=pd.infer_freq(pd.DatetimeIndex(pd.to_datetime(dates,format=self.dt_format)))returnself
def_transform(self,X:pd.DataFrame,y=None)->Data:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """returnget_datetime_features(X,self.date_col,dt_format=self.dt_format,freq=self.freq)
[docs]classLagFeatures(BaseTransformer):"""Generate lag features."""
[docs]def__init__(self,n_lags:int,fillna:bool=True):""" Instantiate transformer object. Parameters ---------- n_lags: int Number of lags to generate. """super().__init__(fillna=fillna)self.n_lags=n_lags
def_transform(self,X:pd.DataFrame,y=None)->Data:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """returnget_lag_features(X,n_lags=self.n_lags)
[docs]classRollingFeatures(BaseTransformer):"""Generate rolling features."""
[docs]def__init__(self,window_lengths:List[int],fillna:bool=True):""" Instantiate transformer object. Parameters ---------- window_lengths: L:ist[int] Length of window(s) to create. """super().__init__(fillna=fillna)self.window_lengths=window_lengths
def_transform(self,X:pd.DataFrame,y=None)->Data:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """returnget_rolling_features(X,window_lengths=self.window_lengths)
[docs]def__init__(self,window_lengths:List[int],fillna:bool=True):""" Instantiate transformer object. Parameters ---------- window_lengths: L:ist[int] Length of window(s) to create. """super().__init__(fillna=fillna)self.window_lengths=window_lengths
def_transform(self,X:pd.DataFrame,y=None)->Data:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """returnget_ewma_features(X,window_lengths=self.window_lengths)
[docs]classChangeFeatures(BaseTransformer):"""Generate period change features."""
[docs]def__init__(self,period_lengths:List[int],fillna:bool=True):""" Instantiate transformer object. Parameters ---------- period_lengths: List[int] Length of period[s] to generate change features. """super().__init__(fillna=fillna)self.period_lengths=period_lengths
def_transform(self,X:pd.DataFrame,y=None)->Data:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """returnget_change_features(X,period_lengths=self.period_lengths)
[docs]def__init__(self,n_diffs:int,fillna:bool=True):""" Instantiate transformer object. Parameters ---------- n_diffs: int Number of differences to calculate. """super().__init__(fillna=fillna)self.n_diffs=n_diffs
def_transform(self,X:pd.DataFrame,y=None)->Data:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """returnget_difference_features(X,n_diffs=self.n_diffs)
[docs]def__init__(self,degree=2):""" Instantiate transformer object. Parameters ---------- degree: int Degree of polynomial to use. """super().__init__()self.degree=degree
def_transform(self,X:pd.DataFrame,y=None)->pd.DataFrame:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """poly=[]df=X.copy()foriinrange(2,self.degree+1):poly.append(pd.DataFrame(df.values**i,columns=[f'{c}^{i}'forcindf.columns],index=df.index))returnpd.concat(poly,axis=1)
[docs]classInteractionFeatures(BaseTransformer):"""Wrap PolynomialFeatures to extract interactions and keep column names."""def_transform(self,X:pd.DataFrame,y=None)->pd.DataFrame:""" Fit transformer object to data. Parameters ---------- X: pd.DataFrame The input samples. y: None Not used; included for compatibility, only. Returns ------- Data Transformed features. """transformer=PolynomialFeatures(interaction_only=True,include_bias=False)interactions=transformer.fit_transform(X.fillna(0))cols=[':'.join(x)forxincombinations(X.columns,r=2)]returnpd.DataFrame(interactions[:,X.shape[1]:],# drop original valuescolumns=cols,index=X.index)