Module mogptk.errors
Expand source code Browse git
import numpy as np
import pandas as pd # TODO: remove dependency on sklearn?
from sklearn import metrics # TODO: remove dependency on sklearn?
def mean_absolute_percentage_error(y_true, y_pred):
idx = np.nonzero(y_true)
y_true = y_true[idx]
y_pred = y_pred[idx]
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
# TODO: use relative error per channel
def errors(*models, **kwargs):
"""
Return error metrics for given models.
errors will returns error measures (MAE, MSE, MAPE) for the model by comparing the deleted observations from the predicted means.
The predicted values are interpolated linearly to match the X position of the delete dobservations.
However if a latent function is defined in the data this will be used as the true values, which gets rid of the imposed Gaussian error on the observations.
Args:
model_list: Iterable with mogptk models to evaluate.
Returns:
errors (dic): Dictionary with lists of ndarrays containing different error metrics per model, per channel.
The dictionary has three keys, 'model' which contains model name; 'MAE' contains mean absolute error; 'MSE' mean squared error; 'MAPE' mean absolute percentage error.
"""
all_obs = False
if "all_obs" in kwargs:
all_obs = kwargs["all_obs"]
output = False
if "print" in kwargs:
output = kwargs["print"]
errors = []
for model in models:
if model.get_input_dims() != 1:
raise Exception("cannot (yet) estimate errors when using multi input dimensions")
Y_true = np.empty(0)
Y_pred = np.empty(0)
for channel in model.data:
if len(channel.X_pred) == 0:
continue
if all_obs:
x, y_true = channel.get_data()
else:
x, y_true = channel.get_test_data()
if len(x) > 0:
if channel.F != None:
y_true = channel.F(x) # use exact latent function to remove imposed Gaussian error on data points
y_pred = np.interp(x, channel.X_pred.reshape(-1), channel.Y_mu_pred) # TODO: multi input dims
Y_true = np.append(Y_true, y_true)
Y_pred = np.append(Y_pred, y_pred)
errors.append({
"model": model.name,
"MAE": metrics.mean_absolute_error(Y_true, Y_pred),
"MSE": metrics.mean_squared_error(Y_true, Y_pred),
"MAPE": mean_absolute_percentage_error(Y_true, Y_pred),
})
if output:
df = pd.DataFrame(errors)
df.set_index('model', inplace=True)
display(df)
else:
return errors
def test_errors(*models, x_test, y_test, raw_errors=False):
"""
Return test errors given a model and test set.
The function assumes all models have been trained and all models
share equal number of inputs and outputs (channels).
Args:
models (mogptk.model): Trained model to evaluate, can be more than one
x_test (list): List of numpy arrays with the inputs of the test set.
Length is the output dimension.
y_test (list): List of numpy array with the true ouputs of test set.
Length is the output dimension.
raw_errors (bool): If true returns for each model a list is returned
with the errors of each channel (y_true - y_pred).
If false returns for each model a list of 4 arrays with the
mean absolute error (MAE), range-normalized mean absolute error (nMAE),
root mean squared error (RMSE) and range-normalized root mean
squared error (nRMSE) for each channel.
Returns:
List with length equal to the number of models, each element
contains a list of length of the output dim and each
element is an array with the errors.
Example:
Given model1, model2, x_test, y_test of correct format.
>>> errors = mogptk.test_errors(model1, model2, x_test, y_test)
>>> errors[i][j]
numpy array with errors from model 'i' at channel 'j'
"""
error_per_model = []
for model in models:
n_channels = model.dataset.get_output_dims()
if n_channels==1:
if not isinstance(y_test, list):
y_test = [y_test]
error_per_channel = []
# print([a.std() for a in y_test])
# predict with model
y_pred, _, _ = model.predict(x_test)
for i in range(n_channels):
errors = y_test[i] - y_pred[i]
# if only error values
if raw_errors:
error_per_channel.append(errors)
# composite errors
else:
y_range = y_test[i].max() - y_test[i].min()
mae = np.abs(errors).mean()
nmae = mae / y_range
rmse = np.sqrt((errors**2).mean())
nrmse = rmse / y_range
error_per_channel.append(np.array([mae, nmae, rmse, nrmse]))
error_per_model.append(error_per_channel)
return error_per_model
Functions
def errors(*models, **kwargs)
-
Return error metrics for given models.
errors will returns error measures (MAE, MSE, MAPE) for the model by comparing the deleted observations from the predicted means. The predicted values are interpolated linearly to match the X position of the delete dobservations. However if a latent function is defined in the data this will be used as the true values, which gets rid of the imposed Gaussian error on the observations.
Args
model_list
- Iterable with mogptk models to evaluate.
Returns
errors()
:dic
- Dictionary with lists of ndarrays containing different error metrics per model, per channel.
The dictionary has three keys, 'model' which contains model name; 'MAE' contains mean absolute error; 'MSE' mean squared error; 'MAPE' mean absolute percentage error.
Expand source code Browse git
def errors(*models, **kwargs): """ Return error metrics for given models. errors will returns error measures (MAE, MSE, MAPE) for the model by comparing the deleted observations from the predicted means. The predicted values are interpolated linearly to match the X position of the delete dobservations. However if a latent function is defined in the data this will be used as the true values, which gets rid of the imposed Gaussian error on the observations. Args: model_list: Iterable with mogptk models to evaluate. Returns: errors (dic): Dictionary with lists of ndarrays containing different error metrics per model, per channel. The dictionary has three keys, 'model' which contains model name; 'MAE' contains mean absolute error; 'MSE' mean squared error; 'MAPE' mean absolute percentage error. """ all_obs = False if "all_obs" in kwargs: all_obs = kwargs["all_obs"] output = False if "print" in kwargs: output = kwargs["print"] errors = [] for model in models: if model.get_input_dims() != 1: raise Exception("cannot (yet) estimate errors when using multi input dimensions") Y_true = np.empty(0) Y_pred = np.empty(0) for channel in model.data: if len(channel.X_pred) == 0: continue if all_obs: x, y_true = channel.get_data() else: x, y_true = channel.get_test_data() if len(x) > 0: if channel.F != None: y_true = channel.F(x) # use exact latent function to remove imposed Gaussian error on data points y_pred = np.interp(x, channel.X_pred.reshape(-1), channel.Y_mu_pred) # TODO: multi input dims Y_true = np.append(Y_true, y_true) Y_pred = np.append(Y_pred, y_pred) errors.append({ "model": model.name, "MAE": metrics.mean_absolute_error(Y_true, Y_pred), "MSE": metrics.mean_squared_error(Y_true, Y_pred), "MAPE": mean_absolute_percentage_error(Y_true, Y_pred), }) if output: df = pd.DataFrame(errors) df.set_index('model', inplace=True) display(df) else: return errors
def mean_absolute_percentage_error(y_true, y_pred)
-
Expand source code Browse git
def mean_absolute_percentage_error(y_true, y_pred): idx = np.nonzero(y_true) y_true = y_true[idx] y_pred = y_pred[idx] return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def test_errors(*models, x_test, y_test, raw_errors=False)
-
Return test errors given a model and test set.
The function assumes all models have been trained and all models share equal number of inputs and outputs (channels).
Args
models
:mogptk.model
- Trained model to evaluate, can be more than one
x_test
:list
- List of numpy arrays with the inputs of the test set. Length is the output dimension.
y_test
:list
- List of numpy array with the true ouputs of test set. Length is the output dimension.
raw_errors
:bool
- If true returns for each model a list is returned with the errors of each channel (y_true - y_pred). If false returns for each model a list of 4 arrays with the mean absolute error (MAE), range-normalized mean absolute error (nMAE), root mean squared error (RMSE) and range-normalized root mean squared error (nRMSE) for each channel.
Returns
List
with
length
equal
to
the
number
ofmodels
,each
element
contains
a
list
oflength
ofthe
output
dim
and
each
element is an array with the errors.
Example
Given model1, model2, x_test, y_test of correct format.
>>> errors = mogptk.test_errors(model1, model2, x_test, y_test) >>> errors[i][j] numpy array with errors from model 'i' at channel 'j'
Expand source code Browse git
def test_errors(*models, x_test, y_test, raw_errors=False): """ Return test errors given a model and test set. The function assumes all models have been trained and all models share equal number of inputs and outputs (channels). Args: models (mogptk.model): Trained model to evaluate, can be more than one x_test (list): List of numpy arrays with the inputs of the test set. Length is the output dimension. y_test (list): List of numpy array with the true ouputs of test set. Length is the output dimension. raw_errors (bool): If true returns for each model a list is returned with the errors of each channel (y_true - y_pred). If false returns for each model a list of 4 arrays with the mean absolute error (MAE), range-normalized mean absolute error (nMAE), root mean squared error (RMSE) and range-normalized root mean squared error (nRMSE) for each channel. Returns: List with length equal to the number of models, each element contains a list of length of the output dim and each element is an array with the errors. Example: Given model1, model2, x_test, y_test of correct format. >>> errors = mogptk.test_errors(model1, model2, x_test, y_test) >>> errors[i][j] numpy array with errors from model 'i' at channel 'j' """ error_per_model = [] for model in models: n_channels = model.dataset.get_output_dims() if n_channels==1: if not isinstance(y_test, list): y_test = [y_test] error_per_channel = [] # print([a.std() for a in y_test]) # predict with model y_pred, _, _ = model.predict(x_test) for i in range(n_channels): errors = y_test[i] - y_pred[i] # if only error values if raw_errors: error_per_channel.append(errors) # composite errors else: y_range = y_test[i].max() - y_test[i].min() mae = np.abs(errors).mean() nmae = mae / y_range rmse = np.sqrt((errors**2).mean()) nrmse = rmse / y_range error_per_channel.append(np.array([mae, nmae, rmse, nrmse])) error_per_model.append(error_per_channel) return error_per_model