Module mogptk.dataset
Expand source code Browse git
import numpy as np
import pandas as pd
from .data import *
import matplotlib.pyplot as plt
def LoadCSV(filename, x_col=0, y_col=1, name=None, formats={}, **kwargs):
"""
LoadCSV loads a dataset from a given CSV file. It loads in x_cols as the names of the input dimension columns, and y_cols the name of the output columns. Setting a formatter for a column will enable parsing for example date fields such as '2019-03-01'. A filter can be set to filter out data from the CSV, such as ensuring that another column has a certain value.
Args:
filename (str): CSV filename.
x_col (int, str, list of int or str): Names or indices of X column(s) in CSV.
y_col (int, str, list of int or str): Names or indices of Y column(s) in CSV.
name (str, list, optional): Name or names of data channels.
formats (dict, optional): Dictionary with x_col values as keys containing FormatNumber (default), FormatDate, FormetDateTime, ...
**kwargs: Additional keyword arguments for csv.DictReader.
Returns:
mogptk.data.Data or mogptk.dataset.DataSet
Examples:
>>> LoadCSV('gold.csv', 'Date', 'Price', name='Gold', formats={'Date': FormatDate})
<mogptk.dataset.DataSet at ...>
>>> LoadCSV('gold.csv', 'Date', 'Price', sep=' ', quotechar='|')
<mogptk.dataset.DataSet at ...>
"""
df = pd.read_csv(filename, **kwargs)
return LoadDataFrame(df, x_col, y_col, name, formats)
def LoadDataFrame(df, x_col=0, y_col=1, name=None, formats={}):
"""
LoadDataFrame loads a DataFrame from Pandas. It loads in x_cols as the names of the input dimension columns, and y_cols the names of the output columns. Setting a formatter for a column will enable parsing for example date fields such as '2019-03-01'. A filter can be set to filter out data from the CSV, such as ensuring that another column has a certain value.
Args:
df (pandas.DataFrame): The Pandas DataFrame.
x_col (int, str, list of int or str): Names or indices of X column(s) in DataFrame.
y_col (int, str, list of int or str): Names or indices of Y column(s) in DataFrame.
name (str, list of str, optional): Name or names of data channels.
formats (dict, optional): Dictionary with x_col values as keys containing FormatNumber (default), FormatDate, FormetDateTime, ...
Returns:
mogptk.data.Data or mogptk.dataset.DataSet
Examples:
>>> df = pd.DataFrame(...)
>>> LoadDataFrame(df, 'Date', 'Price', name='Gold')
<mogptk.dataset.DataSet at ...>
"""
if (not isinstance(x_col, list) or not all(isinstance(item, int) for item in x_col) and not all(isinstance(item, str) for item in x_col)) and not isinstance(x_col, int) and not isinstance(x_col, str):
raise ValueError("x_col must be integer, string or list of integers or strings")
if (not isinstance(y_col, list) or not all(isinstance(item, int) for item in y_col) and not all(isinstance(item, str) for item in y_col)) and not isinstance(y_col, int) and not isinstance(y_col, str):
raise ValueError("y_col must be integer, string or list of integers or strings")
if not isinstance(x_col, list):
x_col = [x_col]
if not isinstance(y_col, list):
y_col = [y_col]
if name == None:
name = [None] * len(y_col)
else:
if not isinstance(name, list):
name = [name]
if len(y_col) != len(name):
raise ValueError("y_col and name must be of the same length")
# if columns are indices, convert to column names
if all(isinstance(item, int) for item in x_col):
x_col = [df.columns[item] for item in x_col]
if all(isinstance(item, int) for item in y_col):
y_col = [df.columns[item] for item in y_col]
df = df[x_col + y_col]
if len(df.index) == 0:
raise ValueError("dataframe cannot be empty")
#df = df.dropna()
#if len(df.index) == 0:
# raise ValueError("dataframe has NaN values for every row, consider selecting X and Y columns that have valid values by setting x_col and y_col")
input_dims = len(x_col)
x_data = df[x_col]
x_labels = [str(item) for item in x_col]
# set formatters automatically if not already set, try and see if we can parse datetime values
for col in df.columns:
if col not in formats:
dtype = df.dtypes[col]
if np.issubdtype(dtype, np.number):
formats[col] = FormatNumber()
elif np.issubdtype(dtype, np.datetime64):
formats[col] = FormatDateTime()
elif np.issubdtype(dtype, np.object_):
first = df[col].iloc[0]
try:
_ = float(first)
formats[col] = FormatNumber()
except:
try:
_ = dateutil.parser.parse(first)
formats[col] = FormatDateTime()
except:
raise ValueError("unknown format for column %s, must be a number type or datetime" % (col,))
dataset = DataSet()
for i in range(len(y_col)):
channel = df[x_col + [y_col[i]]].dropna()
dataset.append(Data(
channel[x_col].values,
channel[y_col[i]].values,
name=name[i],
formats=formats,
x_labels=x_labels,
y_label=str(y_col[i]),
))
if dataset.get_output_dims() == 1:
return dataset[0]
return dataset
################################################################
################################################################
################################################################
class DataSet:
"""
DataSet is a class that holds multiple Data objects as channels.
Args:
*args (mogptk.data.Data, mogptk.dataset.DataSet, list, dict): Accepts multiple arguments,
each of which should be either a DataSet or Data object, a list of
Data objects or a dictionary of Data objects. Each Data object will be added to the
list of channels. In case of a dictionary, the key will set the name of the Data object.
If a DataSet is passed, its channels will be added.
Examples:
>>> dataset = mogptk.DataSet(channel_a, channel_b, channel_c)
"""
def __init__(self, *args):
self.channels = []
for arg in args:
self.append(arg)
def __iter__(self):
return self.channels.__iter__()
def __len__(self):
return len(self.channels)
def __getitem__(self, key):
if isinstance(key, str):
return self.channels[self.get_names().index(key)]
return self.channels[key]
def __str__(self):
return self.__repr__()
def __repr__(self):
s = ''
for channel in self.channels:
s += channel.__repr__() + "\n"
return s
def append(self, arg):
"""
Append channel(s) to DataSet.
Args:
arg (mogptk.data.Data, mogptk.dataset.DataSet, list, dict): Argument can be either a DataSet or Data object, a list of Data objects or a dictionary of Data objects. Each Data object will be added to the list of channels. In case of a dictionary, the key will set the name of the Data object. If a DataSet is passed, its channels will be added.
Examples:
>>> dataset.append(mogptk.LoadFunction(lambda x: np.sin(5*x[:,0]), n=200, start=0.0, end=4.0, name='A'))
"""
if isinstance(arg, Data):
self.channels.append(arg)
elif isinstance(arg, DataSet):
for val in arg.channels:
self.channels.append(val)
elif isinstance(arg, list) and all(isinstance(val, Data) for val in arg):
for val in arg:
self.channels.append(val)
elif isinstance(arg, dict) and all(isinstance(val, Data) for val in arg.values()):
for key, val in arg.items():
val.name = key
self.channels.append(val)
else:
raise Exception("unknown data type %s in append to DataSet" % (type(arg)))
return self
def get_input_dims(self):
"""
Return the input dimensions per channel.
Returns:
list: List of input dimensions per channel.
Examples:
>>> dataset.get_input_dims()
[2, 1]
"""
return [channel.get_input_dims() for channel in self.channels]
def get_output_dims(self):
"""
Return the output dimensions of the dataset, i.e. the number of channels.
Returns:
int: Output dimensions.
Examples:
>>> dataset.get_output_dims()
4
"""
return len(self.channels)
def get_names(self):
"""
Return the names of the channels.
Returns:
list: List of names.
Examples:
>>> dataset.get_names()
['A', 'B', 'C']
"""
return [channel.get_name() if channel.get_name() != "" else "#"+str(i+1) for i, channel in enumerate(self.channels)]
def get(self, index):
"""
Return Data object given a channel index or name.
Args:
index (int, str): Index or name of the channel.
Returns:
mogptk.data.Data: Channel data.
Examples:
>>> channel = dataset.get('A')
"""
if isinstance(index, int):
if index < len(self.channels):
return self.channels[index]
elif isinstance(index, str):
for channel in self.channels:
if channel.name == index:
return channel
raise ValueError("channel '%d' does not exist in DataSet" % (index))
def get_train_data(self):
"""
Returns observations used for training.
Returns:
list: X data of shape (n,input_dims) per channel.
list: Y data of shape (n,) per channel.
Examples:
>>> x, y = dataset.get_train_data()
"""
return [channel.get_train_data()[0] for channel in self.channels], [channel.get_train_data()[1] for channel in self.channels]
def get_data(self):
"""
Returns all observations, train and test.
Returns:
list: X data of shape (n,input_dims) per channel.
list: Y data of shape (n,) per channel.
Examples:
>>> x, y = dataset.get_data()
"""
return [channel.get_data()[0] for channel in self.channels], [channel.get_data()[1] for channel in self.channels]
def get_test_data(self):
"""
Returns the observations used for testing.
Returns:
list: X data of shape (n,input_dims) per channel.
list: Y data of shape (n,) per channel.
Examples:
>>> x, y = dataset.get_test_data()
"""
return [channel.get_test_data()[0] for channel in self.channels], [channel.get_test_data()[1] for channel in self.channels]
def get_prediction(self, name, sigma=2):
"""
Returns the prediction of a given name with a normal variance of sigma.
Args:
name (str): Name of the prediction, equals the name of the model that made the prediction.
sigma (float): The uncertainty interval calculated at mean-sigma*var and mean+sigma*var. Defaults to 2,
Returns:
list: X prediction of shape (n,input_dims) per channel.
list: Y mean prediction of shape (n,) per channel.
list: Y lower prediction of uncertainty interval of shape (n,) per channel.
list: Y upper prediction of uncertainty interval of shape (n,) per channel.
Examples:
>>> x, y_mean, y_var_lower, y_var_upper = dataset.get_prediction('MOSM', sigma=1)
"""
x = []
mu = []
lower = []
upper = []
for channel in self.channels:
channel_x, channel_mu, channel_lower, channel_upper = channel.get_prediction(name, sigma)
x.append(channel_x)
mu.append(channel_mu)
lower.append(channel_lower)
upper.append(channel_upper)
return x, mu, lower, upper
def set_prediction_x(self, x):
"""
Set the prediction range per channel.
Args:
x (list, dict): Array of shape (n,) or (n,input_dims) per channel with prediction X values. If a dictionary is passed, the index is the channel index or name.
Examples:
>>> dataset.set_prediction_x([[5.0, 5.5, 6.0, 6.5, 7.0], [0.1, 0.2, 0.3]])
>>> dataset.set_prediction_x({'A': [5.0, 5.5, 6.0, 6.5, 7.0], 'B': [0.1, 0.2, 0.3]})
"""
if isinstance(x, list):
if len(x) != len(self.channels):
raise ValueError("prediction x expected to be a list of shape (output_dims,n)")
for i, channel in enumerate(self.channels):
channel.set_prediction_x(x[i])
elif isinstance(x, dict):
for name in x:
self.get(name).set_prediction_x(x[name])
else:
for i, channel in enumerate(self.channels):
channel.set_prediction_x(x)
def set_prediction_range(self, start, end, n=None, step=None):
"""
Set the prediction range per channel. Inputs should be lists of shape (input_dims,) for each channel or dicts where the keys are the channel indices.
Args:
start (list, dict): Start values for prediction range per channel.
end (list, dict): End values for prediction range per channel.
n (list, dict, optional): Number of points for prediction range per channel.
step (list, dict, optional): Step size for prediction range per channel.
Examples:
>>> dataset.set_prediction_range([2, 3], [5, 6], [4, None], [None, 0.5])
>>> dataset.set_prediction_range(0.0, 5.0, n=200) # the same for each channel
"""
if not isinstance(start, (list, dict)):
start = [start] * self.get_output_dims()
elif isinstance(start, dict):
start = [start[name] for name in self.get_names()]
if not isinstance(end, (list, dict)):
end = [end] * self.get_output_dims()
elif isinstance(end, dict):
end = [end[name] for name in self.get_names()]
if n == None:
n = [None] * self.get_output_dims()
elif not isinstance(n, (list, dict)):
n = [n] * self.get_output_dims()
elif isinstance(n, dict):
n = [n[name] for name in self.get_names()]
if step == None:
step = [None] * self.get_output_dims()
elif not isinstance(step, (list, dict)):
step = [step] * self.get_output_dims()
elif isinstance(step, dict):
step = [step[name] for name in self.get_names()]
if len(start) != len(self.channels) or len(end) != len(self.channels) or len(n) != len(self.channels) or len(step) != len(self.channels):
raise ValueError("start, end, n, and/or step must be lists of shape (output_dims,n)")
for i, channel in enumerate(self.channels):
channel.set_prediction_range(start[i], end[i], n[i], step[i])
def get_nyquist_estimation(self):
"""
Estimate nyquist frequency by taking 0.5/(minimum distance of points).
Returns:
list: Nyquist frequency array of shape (input_dims) per channel.
Examples:
>>> freqs = dataset.get_nyquist_estimation()
"""
return [channel.get_nyquist_estimation() for channel in self.channels]
def get_bnse_estimation(self, Q, n=5000):
"""
Peaks estimation using BNSE (Bayesian Non-parametric Spectral Estimation).
Args:
Q (int): Number of peaks to find, defaults to 1.
n (int): Number of points of the grid to evaluate frequencies, defaults to 5000.
Returns:
list: Amplitude array of shape (input_dims,Q) per channel.
list: Frequency array of shape (input_dims,Q) per channel.
list: Variance array of shape (input_dims,Q) per channel.
Examples:
>>> amplitudes, means, variances = dataset.get_bnse_estimation()
"""
amplitudes = []
means = []
variances = []
for channel in self.channels:
channel_amplitudes, channel_means, channel_variances = channel.get_bnse_estimation(Q, n)
amplitudes.append(channel_amplitudes)
means.append(channel_means)
variances.append(channel_variances)
return amplitudes, means, variances
def get_lombscargle_estimation(self, Q, n=50000):
"""
Peaks estimation using Lomb Scargle.
Args:
Q (int): Number of peaks to find, defaults to 1.
n (int): Number of points of the grid to evaluate frequencies, defaults to 50000.
Returns:
list: Amplitude array of shape (input_dims,Q) per channel.
list: Frequency array of shape (input_dims,Q) per channel.
list: Variance array of shape (input_dims,Q) per channel.
Examples:
>>> amplitudes, means, variances = dataset.get_lombscargle_estimation()
"""
amplitudes = []
means = []
variances = []
for channel in self.channels:
channel_amplitudes, channel_means, channel_variances = channel.get_lombscargle_estimation(Q, n)
amplitudes.append(channel_amplitudes)
means.append(channel_means)
variances.append(channel_variances)
return amplitudes, means, variances
def rescale_x(self):
xmin = {}
xmax = {}
for channel in self.channels:
for i, formatter in enumerate(channel.formatters[:-1]):
if not hasattr(formatter, 'category'):
formatter.category = 'none'
x = channel.get_data()[0]
if formatter.category not in xmin:
xmin[formatter.category] = np.min(x[:,i])
xmax[formatter.category] = np.max(x[:,i])
else:
xmin[formatter.category] = min(xmin[formatter.category], np.min(x[:,i]))
xmax[formatter.category] = max(xmax[formatter.category], np.max(x[:,i]))
for channel in self.channels:
offsets = []
scales = []
for i, formatter in enumerate(channel.formatters[:-1]):
offsets.append(xmin[formatter.category])
scales.append(1000.0 / (xmax[formatter.category] - xmin[formatter.category]))
channel.set_x_scaling(offsets, scales)
def _to_kernel(self):
"""
Return the data vectors in the format as used by the kernels.
Returns:
numpy.ndarray: X data of shape (n,2) where X[:,0] contains the channel indices and X[:,1] the X values.
numpy.ndarray: Y data.
Examples:
>>> x, y = dataset._to_kernel()
"""
x = [channel.X[channel.mask] for channel in self.channels]
y = [channel.Y[channel.mask] for channel in self.channels]
chan = [i * np.ones(len(x[i])) for i in range(len(x))]
chan = np.concatenate(chan).reshape(-1, 1)
x = np.concatenate(x)
x = np.concatenate((chan, x), axis=1)
if y == None:
return x
y = np.concatenate(y).reshape(-1, 1)
return x, y
def _to_kernel_prediction(self):
"""
Return the prediction input vectors in the format as used by the kernels.
Returns:
numpy.ndarray: X data of shape (n,2) where X[:,0] contains the channel indices and X[:,1] the X values.
Examples:
>>> x = dataset._to_kernel_prediction()
"""
x = [channel.X_pred for channel in self.channels]
chan = [i * np.ones(len(x[i])) for i in range(len(x))]
chan = np.concatenate(chan).reshape(-1, 1)
if len(chan) == 0:
return np.array([]).reshape(-1, 1)
x = np.concatenate(x)
x = np.concatenate((chan, x), axis=1)
return x
def _from_kernel_prediction(self, name, mu, var):
"""
Returns the predictions from the format as used by the kernels. The prediction is stored in the Data class by the given name.
Args:
name (str): Name to store the prediction under.
mu (numpy.ndarray): Y mean prediction of shape (m*n(m)), i.e. a flat array of n(m) data points per channel m.
var (numpy.ndarray): Y variance prediction of shape (m*n(m)), i.e. a flat array of n(m) data points per channel m.
Examples:
>>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0])
>>> mu, var = model.model.predict_f(x)
>>> dataset._from_kernel_prediction('MOSM', mu, var)
"""
N = [len(channel.X_pred) for channel in self.channels]
if len(mu) != len(var) or sum(N) != len(mu):
raise ValueError("prediction mu or var different length from prediction x")
i = 0
for idx in range(len(self.channels)):
self.channels[idx].Y_mu_pred[name] = np.squeeze(mu[i:i+N[idx]])
self.channels[idx].Y_var_pred[name] = np.squeeze(var[i:i+N[idx]])
i += N[idx]
def copy(self):
"""
Make a deep copy of DataSet.
Returns:
mogptk.dataset.DataSet
Examples:
>>> other = dataset.copy()
"""
return copy.deepcopy(self)
def plot(self, title=None, figsize=None):
"""
Plot each Data channel.
Args:
title (str, optional): Set the title of the plot.
Returns:
matplotlib.figure.Figure: The figure.
list of matplotlib.axes.Axes: List of axes.
Examples:
>>> fig, axes = dataset.plot('Title')
"""
if figsize is None:
figsize = (12, 2.5 * len(self))
fig, axes = plt.subplots(self.get_output_dims(), 1, constrained_layout=True, squeeze=False, figsize=figsize)
if title != None:
fig.suptitle(title)
for channel in range(self.get_output_dims()):
if channel == 0:
self.channels[channel].plot(ax=axes[channel,0], plot_legend=True)
else:
self.channels[channel].plot(ax=axes[channel,0])
return fig, axes
Functions
def LoadCSV(filename, x_col=0, y_col=1, name=None, formats={}, **kwargs)
-
LoadCSV loads a dataset from a given CSV file. It loads in x_cols as the names of the input dimension columns, and y_cols the name of the output columns. Setting a formatter for a column will enable parsing for example date fields such as '2019-03-01'. A filter can be set to filter out data from the CSV, such as ensuring that another column has a certain value.
Args
filename
:str
- CSV filename.
x_col
:int
,str
,list
ofint
orstr
- Names or indices of X column(s) in CSV.
y_col
:int
,str
,list
ofint
orstr
- Names or indices of Y column(s) in CSV.
name
:str
,list
, optional- Name or names of data channels.
formats
:dict
, optional- Dictionary with x_col values as keys containing FormatNumber (default), FormatDate, FormetDateTime, …
**kwargs
- Additional keyword arguments for csv.DictReader.
Returns
Data
orDataSet
Examples
>>> LoadCSV('gold.csv', 'Date', 'Price', name='Gold', formats={'Date': FormatDate}) <mogptk.dataset.DataSet at ...> >>> LoadCSV('gold.csv', 'Date', 'Price', sep=' ', quotechar='|') <mogptk.dataset.DataSet at ...>
Expand source code Browse git
def LoadCSV(filename, x_col=0, y_col=1, name=None, formats={}, **kwargs): """ LoadCSV loads a dataset from a given CSV file. It loads in x_cols as the names of the input dimension columns, and y_cols the name of the output columns. Setting a formatter for a column will enable parsing for example date fields such as '2019-03-01'. A filter can be set to filter out data from the CSV, such as ensuring that another column has a certain value. Args: filename (str): CSV filename. x_col (int, str, list of int or str): Names or indices of X column(s) in CSV. y_col (int, str, list of int or str): Names or indices of Y column(s) in CSV. name (str, list, optional): Name or names of data channels. formats (dict, optional): Dictionary with x_col values as keys containing FormatNumber (default), FormatDate, FormetDateTime, ... **kwargs: Additional keyword arguments for csv.DictReader. Returns: mogptk.data.Data or mogptk.dataset.DataSet Examples: >>> LoadCSV('gold.csv', 'Date', 'Price', name='Gold', formats={'Date': FormatDate}) <mogptk.dataset.DataSet at ...> >>> LoadCSV('gold.csv', 'Date', 'Price', sep=' ', quotechar='|') <mogptk.dataset.DataSet at ...> """ df = pd.read_csv(filename, **kwargs) return LoadDataFrame(df, x_col, y_col, name, formats)
def LoadDataFrame(df, x_col=0, y_col=1, name=None, formats={})
-
LoadDataFrame loads a DataFrame from Pandas. It loads in x_cols as the names of the input dimension columns, and y_cols the names of the output columns. Setting a formatter for a column will enable parsing for example date fields such as '2019-03-01'. A filter can be set to filter out data from the CSV, such as ensuring that another column has a certain value.
Args
df
:pandas.DataFrame
- The Pandas DataFrame.
x_col
:int
,str
,list
ofint
orstr
- Names or indices of X column(s) in DataFrame.
y_col
:int
,str
,list
ofint
orstr
- Names or indices of Y column(s) in DataFrame.
name
:str
,list
ofstr
, optional- Name or names of data channels.
formats
:dict
, optional- Dictionary with x_col values as keys containing FormatNumber (default), FormatDate, FormetDateTime, …
Returns
Data
orDataSet
Examples
>>> df = pd.DataFrame(...) >>> LoadDataFrame(df, 'Date', 'Price', name='Gold') <mogptk.dataset.DataSet at ...>
Expand source code Browse git
def LoadDataFrame(df, x_col=0, y_col=1, name=None, formats={}): """ LoadDataFrame loads a DataFrame from Pandas. It loads in x_cols as the names of the input dimension columns, and y_cols the names of the output columns. Setting a formatter for a column will enable parsing for example date fields such as '2019-03-01'. A filter can be set to filter out data from the CSV, such as ensuring that another column has a certain value. Args: df (pandas.DataFrame): The Pandas DataFrame. x_col (int, str, list of int or str): Names or indices of X column(s) in DataFrame. y_col (int, str, list of int or str): Names or indices of Y column(s) in DataFrame. name (str, list of str, optional): Name or names of data channels. formats (dict, optional): Dictionary with x_col values as keys containing FormatNumber (default), FormatDate, FormetDateTime, ... Returns: mogptk.data.Data or mogptk.dataset.DataSet Examples: >>> df = pd.DataFrame(...) >>> LoadDataFrame(df, 'Date', 'Price', name='Gold') <mogptk.dataset.DataSet at ...> """ if (not isinstance(x_col, list) or not all(isinstance(item, int) for item in x_col) and not all(isinstance(item, str) for item in x_col)) and not isinstance(x_col, int) and not isinstance(x_col, str): raise ValueError("x_col must be integer, string or list of integers or strings") if (not isinstance(y_col, list) or not all(isinstance(item, int) for item in y_col) and not all(isinstance(item, str) for item in y_col)) and not isinstance(y_col, int) and not isinstance(y_col, str): raise ValueError("y_col must be integer, string or list of integers or strings") if not isinstance(x_col, list): x_col = [x_col] if not isinstance(y_col, list): y_col = [y_col] if name == None: name = [None] * len(y_col) else: if not isinstance(name, list): name = [name] if len(y_col) != len(name): raise ValueError("y_col and name must be of the same length") # if columns are indices, convert to column names if all(isinstance(item, int) for item in x_col): x_col = [df.columns[item] for item in x_col] if all(isinstance(item, int) for item in y_col): y_col = [df.columns[item] for item in y_col] df = df[x_col + y_col] if len(df.index) == 0: raise ValueError("dataframe cannot be empty") #df = df.dropna() #if len(df.index) == 0: # raise ValueError("dataframe has NaN values for every row, consider selecting X and Y columns that have valid values by setting x_col and y_col") input_dims = len(x_col) x_data = df[x_col] x_labels = [str(item) for item in x_col] # set formatters automatically if not already set, try and see if we can parse datetime values for col in df.columns: if col not in formats: dtype = df.dtypes[col] if np.issubdtype(dtype, np.number): formats[col] = FormatNumber() elif np.issubdtype(dtype, np.datetime64): formats[col] = FormatDateTime() elif np.issubdtype(dtype, np.object_): first = df[col].iloc[0] try: _ = float(first) formats[col] = FormatNumber() except: try: _ = dateutil.parser.parse(first) formats[col] = FormatDateTime() except: raise ValueError("unknown format for column %s, must be a number type or datetime" % (col,)) dataset = DataSet() for i in range(len(y_col)): channel = df[x_col + [y_col[i]]].dropna() dataset.append(Data( channel[x_col].values, channel[y_col[i]].values, name=name[i], formats=formats, x_labels=x_labels, y_label=str(y_col[i]), )) if dataset.get_output_dims() == 1: return dataset[0] return dataset
Classes
class DataSet (*args)
-
DataSet is a class that holds multiple Data objects as channels.
Args
*args
:Data
,DataSet
,list
,dict
- Accepts multiple arguments, each of which should be either a DataSet or Data object, a list of Data objects or a dictionary of Data objects. Each Data object will be added to the list of channels. In case of a dictionary, the key will set the name of the Data object. If a DataSet is passed, its channels will be added.
Examples
>>> dataset = mogptk.DataSet(channel_a, channel_b, channel_c)
Expand source code Browse git
class DataSet: """ DataSet is a class that holds multiple Data objects as channels. Args: *args (mogptk.data.Data, mogptk.dataset.DataSet, list, dict): Accepts multiple arguments, each of which should be either a DataSet or Data object, a list of Data objects or a dictionary of Data objects. Each Data object will be added to the list of channels. In case of a dictionary, the key will set the name of the Data object. If a DataSet is passed, its channels will be added. Examples: >>> dataset = mogptk.DataSet(channel_a, channel_b, channel_c) """ def __init__(self, *args): self.channels = [] for arg in args: self.append(arg) def __iter__(self): return self.channels.__iter__() def __len__(self): return len(self.channels) def __getitem__(self, key): if isinstance(key, str): return self.channels[self.get_names().index(key)] return self.channels[key] def __str__(self): return self.__repr__() def __repr__(self): s = '' for channel in self.channels: s += channel.__repr__() + "\n" return s def append(self, arg): """ Append channel(s) to DataSet. Args: arg (mogptk.data.Data, mogptk.dataset.DataSet, list, dict): Argument can be either a DataSet or Data object, a list of Data objects or a dictionary of Data objects. Each Data object will be added to the list of channels. In case of a dictionary, the key will set the name of the Data object. If a DataSet is passed, its channels will be added. Examples: >>> dataset.append(mogptk.LoadFunction(lambda x: np.sin(5*x[:,0]), n=200, start=0.0, end=4.0, name='A')) """ if isinstance(arg, Data): self.channels.append(arg) elif isinstance(arg, DataSet): for val in arg.channels: self.channels.append(val) elif isinstance(arg, list) and all(isinstance(val, Data) for val in arg): for val in arg: self.channels.append(val) elif isinstance(arg, dict) and all(isinstance(val, Data) for val in arg.values()): for key, val in arg.items(): val.name = key self.channels.append(val) else: raise Exception("unknown data type %s in append to DataSet" % (type(arg))) return self def get_input_dims(self): """ Return the input dimensions per channel. Returns: list: List of input dimensions per channel. Examples: >>> dataset.get_input_dims() [2, 1] """ return [channel.get_input_dims() for channel in self.channels] def get_output_dims(self): """ Return the output dimensions of the dataset, i.e. the number of channels. Returns: int: Output dimensions. Examples: >>> dataset.get_output_dims() 4 """ return len(self.channels) def get_names(self): """ Return the names of the channels. Returns: list: List of names. Examples: >>> dataset.get_names() ['A', 'B', 'C'] """ return [channel.get_name() if channel.get_name() != "" else "#"+str(i+1) for i, channel in enumerate(self.channels)] def get(self, index): """ Return Data object given a channel index or name. Args: index (int, str): Index or name of the channel. Returns: mogptk.data.Data: Channel data. Examples: >>> channel = dataset.get('A') """ if isinstance(index, int): if index < len(self.channels): return self.channels[index] elif isinstance(index, str): for channel in self.channels: if channel.name == index: return channel raise ValueError("channel '%d' does not exist in DataSet" % (index)) def get_train_data(self): """ Returns observations used for training. Returns: list: X data of shape (n,input_dims) per channel. list: Y data of shape (n,) per channel. Examples: >>> x, y = dataset.get_train_data() """ return [channel.get_train_data()[0] for channel in self.channels], [channel.get_train_data()[1] for channel in self.channels] def get_data(self): """ Returns all observations, train and test. Returns: list: X data of shape (n,input_dims) per channel. list: Y data of shape (n,) per channel. Examples: >>> x, y = dataset.get_data() """ return [channel.get_data()[0] for channel in self.channels], [channel.get_data()[1] for channel in self.channels] def get_test_data(self): """ Returns the observations used for testing. Returns: list: X data of shape (n,input_dims) per channel. list: Y data of shape (n,) per channel. Examples: >>> x, y = dataset.get_test_data() """ return [channel.get_test_data()[0] for channel in self.channels], [channel.get_test_data()[1] for channel in self.channels] def get_prediction(self, name, sigma=2): """ Returns the prediction of a given name with a normal variance of sigma. Args: name (str): Name of the prediction, equals the name of the model that made the prediction. sigma (float): The uncertainty interval calculated at mean-sigma*var and mean+sigma*var. Defaults to 2, Returns: list: X prediction of shape (n,input_dims) per channel. list: Y mean prediction of shape (n,) per channel. list: Y lower prediction of uncertainty interval of shape (n,) per channel. list: Y upper prediction of uncertainty interval of shape (n,) per channel. Examples: >>> x, y_mean, y_var_lower, y_var_upper = dataset.get_prediction('MOSM', sigma=1) """ x = [] mu = [] lower = [] upper = [] for channel in self.channels: channel_x, channel_mu, channel_lower, channel_upper = channel.get_prediction(name, sigma) x.append(channel_x) mu.append(channel_mu) lower.append(channel_lower) upper.append(channel_upper) return x, mu, lower, upper def set_prediction_x(self, x): """ Set the prediction range per channel. Args: x (list, dict): Array of shape (n,) or (n,input_dims) per channel with prediction X values. If a dictionary is passed, the index is the channel index or name. Examples: >>> dataset.set_prediction_x([[5.0, 5.5, 6.0, 6.5, 7.0], [0.1, 0.2, 0.3]]) >>> dataset.set_prediction_x({'A': [5.0, 5.5, 6.0, 6.5, 7.0], 'B': [0.1, 0.2, 0.3]}) """ if isinstance(x, list): if len(x) != len(self.channels): raise ValueError("prediction x expected to be a list of shape (output_dims,n)") for i, channel in enumerate(self.channels): channel.set_prediction_x(x[i]) elif isinstance(x, dict): for name in x: self.get(name).set_prediction_x(x[name]) else: for i, channel in enumerate(self.channels): channel.set_prediction_x(x) def set_prediction_range(self, start, end, n=None, step=None): """ Set the prediction range per channel. Inputs should be lists of shape (input_dims,) for each channel or dicts where the keys are the channel indices. Args: start (list, dict): Start values for prediction range per channel. end (list, dict): End values for prediction range per channel. n (list, dict, optional): Number of points for prediction range per channel. step (list, dict, optional): Step size for prediction range per channel. Examples: >>> dataset.set_prediction_range([2, 3], [5, 6], [4, None], [None, 0.5]) >>> dataset.set_prediction_range(0.0, 5.0, n=200) # the same for each channel """ if not isinstance(start, (list, dict)): start = [start] * self.get_output_dims() elif isinstance(start, dict): start = [start[name] for name in self.get_names()] if not isinstance(end, (list, dict)): end = [end] * self.get_output_dims() elif isinstance(end, dict): end = [end[name] for name in self.get_names()] if n == None: n = [None] * self.get_output_dims() elif not isinstance(n, (list, dict)): n = [n] * self.get_output_dims() elif isinstance(n, dict): n = [n[name] for name in self.get_names()] if step == None: step = [None] * self.get_output_dims() elif not isinstance(step, (list, dict)): step = [step] * self.get_output_dims() elif isinstance(step, dict): step = [step[name] for name in self.get_names()] if len(start) != len(self.channels) or len(end) != len(self.channels) or len(n) != len(self.channels) or len(step) != len(self.channels): raise ValueError("start, end, n, and/or step must be lists of shape (output_dims,n)") for i, channel in enumerate(self.channels): channel.set_prediction_range(start[i], end[i], n[i], step[i]) def get_nyquist_estimation(self): """ Estimate nyquist frequency by taking 0.5/(minimum distance of points). Returns: list: Nyquist frequency array of shape (input_dims) per channel. Examples: >>> freqs = dataset.get_nyquist_estimation() """ return [channel.get_nyquist_estimation() for channel in self.channels] def get_bnse_estimation(self, Q, n=5000): """ Peaks estimation using BNSE (Bayesian Non-parametric Spectral Estimation). Args: Q (int): Number of peaks to find, defaults to 1. n (int): Number of points of the grid to evaluate frequencies, defaults to 5000. Returns: list: Amplitude array of shape (input_dims,Q) per channel. list: Frequency array of shape (input_dims,Q) per channel. list: Variance array of shape (input_dims,Q) per channel. Examples: >>> amplitudes, means, variances = dataset.get_bnse_estimation() """ amplitudes = [] means = [] variances = [] for channel in self.channels: channel_amplitudes, channel_means, channel_variances = channel.get_bnse_estimation(Q, n) amplitudes.append(channel_amplitudes) means.append(channel_means) variances.append(channel_variances) return amplitudes, means, variances def get_lombscargle_estimation(self, Q, n=50000): """ Peaks estimation using Lomb Scargle. Args: Q (int): Number of peaks to find, defaults to 1. n (int): Number of points of the grid to evaluate frequencies, defaults to 50000. Returns: list: Amplitude array of shape (input_dims,Q) per channel. list: Frequency array of shape (input_dims,Q) per channel. list: Variance array of shape (input_dims,Q) per channel. Examples: >>> amplitudes, means, variances = dataset.get_lombscargle_estimation() """ amplitudes = [] means = [] variances = [] for channel in self.channels: channel_amplitudes, channel_means, channel_variances = channel.get_lombscargle_estimation(Q, n) amplitudes.append(channel_amplitudes) means.append(channel_means) variances.append(channel_variances) return amplitudes, means, variances def rescale_x(self): xmin = {} xmax = {} for channel in self.channels: for i, formatter in enumerate(channel.formatters[:-1]): if not hasattr(formatter, 'category'): formatter.category = 'none' x = channel.get_data()[0] if formatter.category not in xmin: xmin[formatter.category] = np.min(x[:,i]) xmax[formatter.category] = np.max(x[:,i]) else: xmin[formatter.category] = min(xmin[formatter.category], np.min(x[:,i])) xmax[formatter.category] = max(xmax[formatter.category], np.max(x[:,i])) for channel in self.channels: offsets = [] scales = [] for i, formatter in enumerate(channel.formatters[:-1]): offsets.append(xmin[formatter.category]) scales.append(1000.0 / (xmax[formatter.category] - xmin[formatter.category])) channel.set_x_scaling(offsets, scales) def _to_kernel(self): """ Return the data vectors in the format as used by the kernels. Returns: numpy.ndarray: X data of shape (n,2) where X[:,0] contains the channel indices and X[:,1] the X values. numpy.ndarray: Y data. Examples: >>> x, y = dataset._to_kernel() """ x = [channel.X[channel.mask] for channel in self.channels] y = [channel.Y[channel.mask] for channel in self.channels] chan = [i * np.ones(len(x[i])) for i in range(len(x))] chan = np.concatenate(chan).reshape(-1, 1) x = np.concatenate(x) x = np.concatenate((chan, x), axis=1) if y == None: return x y = np.concatenate(y).reshape(-1, 1) return x, y def _to_kernel_prediction(self): """ Return the prediction input vectors in the format as used by the kernels. Returns: numpy.ndarray: X data of shape (n,2) where X[:,0] contains the channel indices and X[:,1] the X values. Examples: >>> x = dataset._to_kernel_prediction() """ x = [channel.X_pred for channel in self.channels] chan = [i * np.ones(len(x[i])) for i in range(len(x))] chan = np.concatenate(chan).reshape(-1, 1) if len(chan) == 0: return np.array([]).reshape(-1, 1) x = np.concatenate(x) x = np.concatenate((chan, x), axis=1) return x def _from_kernel_prediction(self, name, mu, var): """ Returns the predictions from the format as used by the kernels. The prediction is stored in the Data class by the given name. Args: name (str): Name to store the prediction under. mu (numpy.ndarray): Y mean prediction of shape (m*n(m)), i.e. a flat array of n(m) data points per channel m. var (numpy.ndarray): Y variance prediction of shape (m*n(m)), i.e. a flat array of n(m) data points per channel m. Examples: >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0]) >>> mu, var = model.model.predict_f(x) >>> dataset._from_kernel_prediction('MOSM', mu, var) """ N = [len(channel.X_pred) for channel in self.channels] if len(mu) != len(var) or sum(N) != len(mu): raise ValueError("prediction mu or var different length from prediction x") i = 0 for idx in range(len(self.channels)): self.channels[idx].Y_mu_pred[name] = np.squeeze(mu[i:i+N[idx]]) self.channels[idx].Y_var_pred[name] = np.squeeze(var[i:i+N[idx]]) i += N[idx] def copy(self): """ Make a deep copy of DataSet. Returns: mogptk.dataset.DataSet Examples: >>> other = dataset.copy() """ return copy.deepcopy(self) def plot(self, title=None, figsize=None): """ Plot each Data channel. Args: title (str, optional): Set the title of the plot. Returns: matplotlib.figure.Figure: The figure. list of matplotlib.axes.Axes: List of axes. Examples: >>> fig, axes = dataset.plot('Title') """ if figsize is None: figsize = (12, 2.5 * len(self)) fig, axes = plt.subplots(self.get_output_dims(), 1, constrained_layout=True, squeeze=False, figsize=figsize) if title != None: fig.suptitle(title) for channel in range(self.get_output_dims()): if channel == 0: self.channels[channel].plot(ax=axes[channel,0], plot_legend=True) else: self.channels[channel].plot(ax=axes[channel,0]) return fig, axes
Methods
def append(self, arg)
-
Append channel(s) to DataSet.
Args
arg
:Data
,DataSet
,list
,dict
- Argument can be either a DataSet or Data object, a list of Data objects or a dictionary of Data objects. Each Data object will be added to the list of channels. In case of a dictionary, the key will set the name of the Data object. If a DataSet is passed, its channels will be added.
Examples
>>> dataset.append(mogptk.LoadFunction(lambda x: np.sin(5*x[:,0]), n=200, start=0.0, end=4.0, name='A'))
Expand source code Browse git
def append(self, arg): """ Append channel(s) to DataSet. Args: arg (mogptk.data.Data, mogptk.dataset.DataSet, list, dict): Argument can be either a DataSet or Data object, a list of Data objects or a dictionary of Data objects. Each Data object will be added to the list of channels. In case of a dictionary, the key will set the name of the Data object. If a DataSet is passed, its channels will be added. Examples: >>> dataset.append(mogptk.LoadFunction(lambda x: np.sin(5*x[:,0]), n=200, start=0.0, end=4.0, name='A')) """ if isinstance(arg, Data): self.channels.append(arg) elif isinstance(arg, DataSet): for val in arg.channels: self.channels.append(val) elif isinstance(arg, list) and all(isinstance(val, Data) for val in arg): for val in arg: self.channels.append(val) elif isinstance(arg, dict) and all(isinstance(val, Data) for val in arg.values()): for key, val in arg.items(): val.name = key self.channels.append(val) else: raise Exception("unknown data type %s in append to DataSet" % (type(arg))) return self
def copy(self)
-
Make a deep copy of DataSet.
Returns
DataSet
Examples
>>> other = dataset.copy()
Expand source code Browse git
def copy(self): """ Make a deep copy of DataSet. Returns: mogptk.dataset.DataSet Examples: >>> other = dataset.copy() """ return copy.deepcopy(self)
def get(self, index)
-
Return Data object given a channel index or name.
Args
index
:int
,str
- Index or name of the channel.
Returns
mogptk.data.Data: Channel data.
Examples
>>> channel = dataset.get('A')
Expand source code Browse git
def get(self, index): """ Return Data object given a channel index or name. Args: index (int, str): Index or name of the channel. Returns: mogptk.data.Data: Channel data. Examples: >>> channel = dataset.get('A') """ if isinstance(index, int): if index < len(self.channels): return self.channels[index] elif isinstance(index, str): for channel in self.channels: if channel.name == index: return channel raise ValueError("channel '%d' does not exist in DataSet" % (index))
def get_bnse_estimation(self, Q, n=5000)
-
Peaks estimation using BNSE (Bayesian Non-parametric Spectral Estimation).
Args
Q
:int
- Number of peaks to find, defaults to 1.
n
:int
- Number of points of the grid to evaluate frequencies, defaults to 5000.
Returns
list
- Amplitude array of shape (input_dims,Q) per channel.
list
- Frequency array of shape (input_dims,Q) per channel.
list
- Variance array of shape (input_dims,Q) per channel.
Examples
>>> amplitudes, means, variances = dataset.get_bnse_estimation()
Expand source code Browse git
def get_bnse_estimation(self, Q, n=5000): """ Peaks estimation using BNSE (Bayesian Non-parametric Spectral Estimation). Args: Q (int): Number of peaks to find, defaults to 1. n (int): Number of points of the grid to evaluate frequencies, defaults to 5000. Returns: list: Amplitude array of shape (input_dims,Q) per channel. list: Frequency array of shape (input_dims,Q) per channel. list: Variance array of shape (input_dims,Q) per channel. Examples: >>> amplitudes, means, variances = dataset.get_bnse_estimation() """ amplitudes = [] means = [] variances = [] for channel in self.channels: channel_amplitudes, channel_means, channel_variances = channel.get_bnse_estimation(Q, n) amplitudes.append(channel_amplitudes) means.append(channel_means) variances.append(channel_variances) return amplitudes, means, variances
def get_data(self)
-
Returns all observations, train and test.
Returns
list
- X data of shape (n,input_dims) per channel.
list
- Y data of shape (n,) per channel.
Examples
>>> x, y = dataset.get_data()
Expand source code Browse git
def get_data(self): """ Returns all observations, train and test. Returns: list: X data of shape (n,input_dims) per channel. list: Y data of shape (n,) per channel. Examples: >>> x, y = dataset.get_data() """ return [channel.get_data()[0] for channel in self.channels], [channel.get_data()[1] for channel in self.channels]
def get_input_dims(self)
-
Return the input dimensions per channel.
Returns
list
- List of input dimensions per channel.
Examples
>>> dataset.get_input_dims() [2, 1]
Expand source code Browse git
def get_input_dims(self): """ Return the input dimensions per channel. Returns: list: List of input dimensions per channel. Examples: >>> dataset.get_input_dims() [2, 1] """ return [channel.get_input_dims() for channel in self.channels]
def get_lombscargle_estimation(self, Q, n=50000)
-
Peaks estimation using Lomb Scargle.
Args
Q
:int
- Number of peaks to find, defaults to 1.
n
:int
- Number of points of the grid to evaluate frequencies, defaults to 50000.
Returns
list
- Amplitude array of shape (input_dims,Q) per channel.
list
- Frequency array of shape (input_dims,Q) per channel.
list
- Variance array of shape (input_dims,Q) per channel.
Examples
>>> amplitudes, means, variances = dataset.get_lombscargle_estimation()
Expand source code Browse git
def get_lombscargle_estimation(self, Q, n=50000): """ Peaks estimation using Lomb Scargle. Args: Q (int): Number of peaks to find, defaults to 1. n (int): Number of points of the grid to evaluate frequencies, defaults to 50000. Returns: list: Amplitude array of shape (input_dims,Q) per channel. list: Frequency array of shape (input_dims,Q) per channel. list: Variance array of shape (input_dims,Q) per channel. Examples: >>> amplitudes, means, variances = dataset.get_lombscargle_estimation() """ amplitudes = [] means = [] variances = [] for channel in self.channels: channel_amplitudes, channel_means, channel_variances = channel.get_lombscargle_estimation(Q, n) amplitudes.append(channel_amplitudes) means.append(channel_means) variances.append(channel_variances) return amplitudes, means, variances
def get_names(self)
-
Return the names of the channels.
Returns
list
- List of names.
Examples
>>> dataset.get_names() ['A', 'B', 'C']
Expand source code Browse git
def get_names(self): """ Return the names of the channels. Returns: list: List of names. Examples: >>> dataset.get_names() ['A', 'B', 'C'] """ return [channel.get_name() if channel.get_name() != "" else "#"+str(i+1) for i, channel in enumerate(self.channels)]
def get_nyquist_estimation(self)
-
Estimate nyquist frequency by taking 0.5/(minimum distance of points).
Returns
list
- Nyquist frequency array of shape (input_dims) per channel.
Examples
>>> freqs = dataset.get_nyquist_estimation()
Expand source code Browse git
def get_nyquist_estimation(self): """ Estimate nyquist frequency by taking 0.5/(minimum distance of points). Returns: list: Nyquist frequency array of shape (input_dims) per channel. Examples: >>> freqs = dataset.get_nyquist_estimation() """ return [channel.get_nyquist_estimation() for channel in self.channels]
def get_output_dims(self)
-
Return the output dimensions of the dataset, i.e. the number of channels.
Returns
int
- Output dimensions.
Examples
>>> dataset.get_output_dims() **`4`**
Expand source code Browse git
def get_output_dims(self): """ Return the output dimensions of the dataset, i.e. the number of channels. Returns: int: Output dimensions. Examples: >>> dataset.get_output_dims() 4 """ return len(self.channels)
def get_prediction(self, name, sigma=2)
-
Returns the prediction of a given name with a normal variance of sigma.
Args
name
:str
- Name of the prediction, equals the name of the model that made the prediction.
sigma
:float
- The uncertainty interval calculated at mean-sigmavar and mean+sigmavar. Defaults to 2,
Returns
list
- X prediction of shape (n,input_dims) per channel.
list
- Y mean prediction of shape (n,) per channel.
list
- Y lower prediction of uncertainty interval of shape (n,) per channel.
list
- Y upper prediction of uncertainty interval of shape (n,) per channel.
Examples
>>> x, y_mean, y_var_lower, y_var_upper = dataset.get_prediction('MOSM', sigma=1)
Expand source code Browse git
def get_prediction(self, name, sigma=2): """ Returns the prediction of a given name with a normal variance of sigma. Args: name (str): Name of the prediction, equals the name of the model that made the prediction. sigma (float): The uncertainty interval calculated at mean-sigma*var and mean+sigma*var. Defaults to 2, Returns: list: X prediction of shape (n,input_dims) per channel. list: Y mean prediction of shape (n,) per channel. list: Y lower prediction of uncertainty interval of shape (n,) per channel. list: Y upper prediction of uncertainty interval of shape (n,) per channel. Examples: >>> x, y_mean, y_var_lower, y_var_upper = dataset.get_prediction('MOSM', sigma=1) """ x = [] mu = [] lower = [] upper = [] for channel in self.channels: channel_x, channel_mu, channel_lower, channel_upper = channel.get_prediction(name, sigma) x.append(channel_x) mu.append(channel_mu) lower.append(channel_lower) upper.append(channel_upper) return x, mu, lower, upper
def get_test_data(self)
-
Returns the observations used for testing.
Returns
list
- X data of shape (n,input_dims) per channel.
list
- Y data of shape (n,) per channel.
Examples
>>> x, y = dataset.get_test_data()
Expand source code Browse git
def get_test_data(self): """ Returns the observations used for testing. Returns: list: X data of shape (n,input_dims) per channel. list: Y data of shape (n,) per channel. Examples: >>> x, y = dataset.get_test_data() """ return [channel.get_test_data()[0] for channel in self.channels], [channel.get_test_data()[1] for channel in self.channels]
def get_train_data(self)
-
Returns observations used for training.
Returns
list
- X data of shape (n,input_dims) per channel.
list
- Y data of shape (n,) per channel.
Examples
>>> x, y = dataset.get_train_data()
Expand source code Browse git
def get_train_data(self): """ Returns observations used for training. Returns: list: X data of shape (n,input_dims) per channel. list: Y data of shape (n,) per channel. Examples: >>> x, y = dataset.get_train_data() """ return [channel.get_train_data()[0] for channel in self.channels], [channel.get_train_data()[1] for channel in self.channels]
def plot(self, title=None, figsize=None)
-
Plot each Data channel.
Args
title
:str
, optional- Set the title of the plot.
Returns
matplotlib.figure.Figure: The figure. list of matplotlib.axes.Axes: List of axes.
Examples
>>> fig, axes = dataset.plot('Title')
Expand source code Browse git
def plot(self, title=None, figsize=None): """ Plot each Data channel. Args: title (str, optional): Set the title of the plot. Returns: matplotlib.figure.Figure: The figure. list of matplotlib.axes.Axes: List of axes. Examples: >>> fig, axes = dataset.plot('Title') """ if figsize is None: figsize = (12, 2.5 * len(self)) fig, axes = plt.subplots(self.get_output_dims(), 1, constrained_layout=True, squeeze=False, figsize=figsize) if title != None: fig.suptitle(title) for channel in range(self.get_output_dims()): if channel == 0: self.channels[channel].plot(ax=axes[channel,0], plot_legend=True) else: self.channels[channel].plot(ax=axes[channel,0]) return fig, axes
def rescale_x(self)
-
Expand source code Browse git
def rescale_x(self): xmin = {} xmax = {} for channel in self.channels: for i, formatter in enumerate(channel.formatters[:-1]): if not hasattr(formatter, 'category'): formatter.category = 'none' x = channel.get_data()[0] if formatter.category not in xmin: xmin[formatter.category] = np.min(x[:,i]) xmax[formatter.category] = np.max(x[:,i]) else: xmin[formatter.category] = min(xmin[formatter.category], np.min(x[:,i])) xmax[formatter.category] = max(xmax[formatter.category], np.max(x[:,i])) for channel in self.channels: offsets = [] scales = [] for i, formatter in enumerate(channel.formatters[:-1]): offsets.append(xmin[formatter.category]) scales.append(1000.0 / (xmax[formatter.category] - xmin[formatter.category])) channel.set_x_scaling(offsets, scales)
def set_prediction_range(self, start, end, n=None, step=None)
-
Set the prediction range per channel. Inputs should be lists of shape (input_dims,) for each channel or dicts where the keys are the channel indices.
Args
start
:list
,dict
- Start values for prediction range per channel.
end
:list
,dict
- End values for prediction range per channel.
n
:list
,dict
, optional- Number of points for prediction range per channel.
step
:list
,dict
, optional- Step size for prediction range per channel.
Examples
>>> dataset.set_prediction_range([2, 3], [5, 6], [4, None], [None, 0.5]) >>> dataset.set_prediction_range(0.0, 5.0, n=200) # the same for each channel
Expand source code Browse git
def set_prediction_range(self, start, end, n=None, step=None): """ Set the prediction range per channel. Inputs should be lists of shape (input_dims,) for each channel or dicts where the keys are the channel indices. Args: start (list, dict): Start values for prediction range per channel. end (list, dict): End values for prediction range per channel. n (list, dict, optional): Number of points for prediction range per channel. step (list, dict, optional): Step size for prediction range per channel. Examples: >>> dataset.set_prediction_range([2, 3], [5, 6], [4, None], [None, 0.5]) >>> dataset.set_prediction_range(0.0, 5.0, n=200) # the same for each channel """ if not isinstance(start, (list, dict)): start = [start] * self.get_output_dims() elif isinstance(start, dict): start = [start[name] for name in self.get_names()] if not isinstance(end, (list, dict)): end = [end] * self.get_output_dims() elif isinstance(end, dict): end = [end[name] for name in self.get_names()] if n == None: n = [None] * self.get_output_dims() elif not isinstance(n, (list, dict)): n = [n] * self.get_output_dims() elif isinstance(n, dict): n = [n[name] for name in self.get_names()] if step == None: step = [None] * self.get_output_dims() elif not isinstance(step, (list, dict)): step = [step] * self.get_output_dims() elif isinstance(step, dict): step = [step[name] for name in self.get_names()] if len(start) != len(self.channels) or len(end) != len(self.channels) or len(n) != len(self.channels) or len(step) != len(self.channels): raise ValueError("start, end, n, and/or step must be lists of shape (output_dims,n)") for i, channel in enumerate(self.channels): channel.set_prediction_range(start[i], end[i], n[i], step[i])
def set_prediction_x(self, x)
-
Set the prediction range per channel.
Args
x
:list
,dict
- Array of shape (n,) or (n,input_dims) per channel with prediction X values. If a dictionary is passed, the index is the channel index or name.
Examples
>>> dataset.set_prediction_x([[5.0, 5.5, 6.0, 6.5, 7.0], [0.1, 0.2, 0.3]]) >>> dataset.set_prediction_x({'A': [5.0, 5.5, 6.0, 6.5, 7.0], 'B': [0.1, 0.2, 0.3]})
Expand source code Browse git
def set_prediction_x(self, x): """ Set the prediction range per channel. Args: x (list, dict): Array of shape (n,) or (n,input_dims) per channel with prediction X values. If a dictionary is passed, the index is the channel index or name. Examples: >>> dataset.set_prediction_x([[5.0, 5.5, 6.0, 6.5, 7.0], [0.1, 0.2, 0.3]]) >>> dataset.set_prediction_x({'A': [5.0, 5.5, 6.0, 6.5, 7.0], 'B': [0.1, 0.2, 0.3]}) """ if isinstance(x, list): if len(x) != len(self.channels): raise ValueError("prediction x expected to be a list of shape (output_dims,n)") for i, channel in enumerate(self.channels): channel.set_prediction_x(x[i]) elif isinstance(x, dict): for name in x: self.get(name).set_prediction_x(x[name]) else: for i, channel in enumerate(self.channels): channel.set_prediction_x(x)