Module mogptk.model
Expand source code Browse git
import os
import json
import time
import numpy as np
import gpflow
import tensorflow as tf
from .dataset import DataSet
import matplotlib
import matplotlib.pyplot as plt
import matplotlib as mpl
from mpl_toolkits.axes_grid1 import make_axes_locatable
from IPython.display import display, HTML
from tabulate import tabulate
import logging
logging.getLogger('tensorflow').propagate = False
logging.getLogger('tensorflow').setLevel(logging.ERROR)
tf.autograph.set_verbosity(0) # TODO: remove and fix problem
gpflow.config.set_default_positive_minimum(1e-6)
logger = logging.getLogger('mogptk')
class model:
def __init__(self, name, dataset):
"""
Base class for Multi-Output Gaussian process models. See subclasses for instantiation.
Args:
name (str): Name of the model.
dataset (mogptk.dataset.DataSet, mogptk.data.Data): DataSet with Data objects for all the channels.
When a (list or dict of) Data object is passed, it will automatically be converted to a DataSet.
"""
if not isinstance(dataset, DataSet):
dataset = DataSet(dataset)
if dataset.get_output_dims() == 0:
raise Exception("dataset must have at least one channel")
if len(set(dataset.get_names())) != len(dataset.get_names()):
raise Exception("all data channels must have unique names")
if len(set(dataset.get_input_dims())) != 1:
raise Exception("all data channels must have the same amount of input dimensions")
for channel in dataset:
for dim in range(channel.X.shape[1]):
xran = np.max(channel.X[:,dim]) - np.min(channel.X[:,dim])
if xran < 1e-3:
logger.warning("Very small X range may give problems, it is suggested to scale up your X-axis")
elif 1e4 < xran:
logger.warning("Very large X range may give problems, it is suggested to scale down your X-axis")
self.name = name
self.dataset = dataset
def _build(self, kernel, likelihood, variational, sparse, like_params):
"""
Build the model using the given kernel and likelihood. The variational and sparse booleans decide which GPflow model will be used.
Args:
kernel (gpflow.Kernel): Kernel to use.
likelihood (gpflow.likelihoods): Likelihood to use from GPFlow, if None
a default exact inference Gaussian likelihood is used.
variational (bool): If True, use variational inference to approximate
function values as Gaussian. If False it will use Monte carlo Markov Chain.
sparse (bool): If True, will use sparse GP regression.
like_params (dict): Parameters to GPflow likelihood.
"""
x, y = self.dataset._to_kernel()
# Gaussian likelihood
if likelihood == None:
if not sparse:
self.model = gpflow.models.GPR((x, y), kernel)
else:
# TODO: test if induction points are set
self.name += ' (sparse)'
self.model = gpflow.models.SGPR(x, y, kernel)
# MCMC
elif not variational:
self.likelihood = likelihood(**like_params)
if not sparse:
self.name += ' (MCMC)'
self.model = gpflow.models.GPMC(x, y, kernel, self.likelihood)
else:
self.name += ' (sparse MCMC)'
self.model = gpflow.models.SGPMC(x, y, kernel, self.likelihood)
# Variational
else:
self.likelihood = likelihood(**like_params)
if not sparse:
self.name += ' (variational)'
self.model = gpflow.models.VGP(x, y, kernel, self.likelihood)
else:
self.name += ' (sparse variational)'
self.model = gpflow.models.SVGP(x, y, kernel, self.likelihood)
################################################################
def print_parameters(self):
"""
Print the parameters of the model in a table.
Examples:
>>> model.print_parameters()
"""
with np.printoptions(precision=3, floatmode='fixed'):
try:
get_ipython # fails if we're not in a notebook
table = '<table><tr><th>Kernel</th><th>Name</th><th>Train</th><th>Shape</th><th>Dtype</th><th>Value</th></tr>'
for q, params in enumerate(self.get_parameters()):
kernel = None
if hasattr(self.model.kernel, 'kernels'):
kernel = self.model.kernel.kernels[q]
else:
kernel = self.model.kernel
first = True
for key in params.keys():
param = getattr(kernel, key)
val = params[key]
if val.ndim == 0:
val = '%.3f' % (val,)
else:
val = str(val)
tr_style = ''
name = ''
if first:
if q != 0:
tr_style = ' style="border-top:1px solid darkgrey"'
name = '<th rowspan="%d" style="text-align:center">%s<br>Q=%d</th>' % (len(params.keys()), kernel.name, q)
first = False
table += '<tr%s>%s<td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (tr_style, name, key, param.trainable, params[key].shape, params[key].dtype, val)
first = True
params = self.get_likelihood_parameters()
for key in params:
param = getattr(self.model.likelihood, key)
val = params[key]
if val.ndim == 0:
val = '%.3f' % (val,)
else:
val = str(val)
name = ''
tr_style = ''
if first:
tr_style = ' style="border-top:1px solid darkgrey"'
name = '<th rowspan="%d" style="text-align:center">%s<br>likelihood</th>' % (len(params.keys()), self.model.likelihood.name)
first = False
table += '<tr%s>%s<td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (tr_style, name, key, param.trainable, params[key].shape, params[key].dtype, val)
table += '</table>'
display(HTML(table))
except Exception as e:
contents = []
for q, params in enumerate(self.get_parameters()):
kernel = None
if hasattr(self.model.kernel, 'kernels'):
kernel = self.model.kernel.kernels[q]
else:
kernel = self.model.kernel
first = True
for key in params.keys():
param = getattr(kernel, key)
val = params[key]
if val.ndim == 0:
val = '%.3f' % (val,)
else:
val = str(val)
name = ''
if first:
name = '%s Q=%d' % (kernel.name, q)
first = False
contents.append([name, key, param.trainable, params[key].shape, params[key].dtype, val])
first = True
params = self.get_likelihood_parameters()
for key in params.keys():
param = getattr(self.model.likelihood, key)
val = params[key]
if val.ndim == 0:
val = '%.3f' % (val,)
else:
val = str(val)
name = ''
if first:
name = '%s likelihood' % (self.model.likelihood.name,)
first = False
contents.append([name, key, param.trainable, params[key].shape, params[key].dtype, val])
print(tabulate(contents, headers=['Kernel', 'Name', 'Train', 'Shape', 'Dtype', 'Value']))
def get_parameters(self):
"""
Returns all parameters set for the kernel per component.
Examples:
>>> params = model.get_parameters()
"""
params = []
if hasattr(self.model.kernel, 'kernels'):
for kernel_i, kernel in enumerate(self.model.kernel.kernels):
params.append({})
for param_name, param_val in kernel.__dict__.items():
if isinstance(param_val, gpflow.base.Parameter):
params[kernel_i][param_name] = param_val.read_value().numpy()
else:
params.append({})
for param_name, param_val in self.model.kernel.__dict__.items():
if isinstance(param_val, gpflow.base.Parameter):
params[0][param_name] = param_val.read_value().numpy()
return params
def get_likelihood_parameters(self):
"""
Returns all parameters set for the likelihood.
Examples:
>>> params = model.get_likelihood_parameters()
"""
params = {}
for param_name, param_val in self.model.likelihood.__dict__.items():
if isinstance(param_val, gpflow.base.Parameter):
params[param_name] = param_val.read_value().numpy()
return params
def get_parameter(self, q, key):
"""
Gets a kernel parameter for component 'q' with key the parameter name.
Args:
q (int): Component of kernel.
key (str): Name of component.
Returns:
val (numpy.ndarray): Value of parameter.
Examples:
>>> val = model.get_parameter(0, 'variance') # for Q=0 get the parameter called 'variance'
"""
if hasattr(self.model.kernel, 'kernels'):
if q < 0 or len(self.model.kernel.kernels) <= q:
raise Exception("qth component %d does not exist" % (q,))
kern = self.model.kernel.kernels[q].__dict__
else:
if q != 0:
raise Exception("qth component %d does not exist" % (q,))
kern = self.model.kernel.__dict__
if key not in kern or not isinstance(kern[key], gpflow.base.Parameter):
raise Exception("parameter name '%s' does not exist for q=%d" % (key, q))
return kern[key].read_value().numpy()
def set_parameter(self, q, key, val):
"""
Sets a kernel parameter for component 'q' with key the parameter name.
Args:
q (int): Component of kernel.
key (str): Name of component.
val (float, numpy.ndarray): Value of parameter.
Examples:
>>> model.set_parameter(0, 'variance', np.array([5.0, 3.0])) # for Q=0 set the parameter called 'variance'
"""
if isinstance(val, (int, float, list)):
val = np.array(val)
if not isinstance(val, np.ndarray):
raise Exception("value %s of type %s is not a number type or ndarray" % (val, type(val)))
if hasattr(self.model.kernel, 'kernels'):
if q < 0 or len(self.model.kernel.kernels) <= q:
raise Exception("qth component %d does not exist" % (q,))
kern = self.model.kernel.kernels[q].__dict__
else:
if q != 0:
raise Exception("qth component %d does not exist" % (q,))
kern = self.model.kernel.__dict__
if key not in kern or not isinstance(kern[key], gpflow.base.Parameter):
raise Exception("parameter name '%s' does not exist for q=%d" % (key, q))
if kern[key].shape != val.shape:
raise Exception("parameter name '%s' must have shape %s and not %s for q=%d" % (key, kern[key].shape, val.shape, q))
for i, v in np.ndenumerate(val):
if v < gpflow.config.default_positive_minimum():
val[i] = gpflow.config.default_positive_minimum()
kern[key].assign(val)
def set_likelihood_parameter(self, key, val):
"""
Sets a likelihood parameter with key the parameter name.
Args:
key (str): Name of component.
val (float, ndarray): Value of parameter.
Examples:
>>> model.set_likelihood_parameter('variance', np.array([5.0, 3.0])) # set the parameter called 'variance'
"""
if isinstance(val, (int, float, list)):
val = np.array(val)
if not isinstance(val, np.ndarray):
raise Exception("value %s of type %s is not a number type or ndarray" % (val, type(val)))
likelihood = self.model.likelihood.__dict__
if key not in likelihood or not isinstance(likelihood[key], gpflow.base.Parameter):
raise Exception("parameter name '%s' does not exist" % (key))
if likelihood[key].shape != val.shape:
raise Exception("parameter name '%s' must have shape %s and not %s" % (key, likelihood[key].shape, val.shape))
for i, v in np.ndenumerate(val):
if v < gpflow.config.default_positive_minimum():
val[i] = gpflow.config.default_positive_minimum()
likelihood[key].assign(val)
def fix_parameter(self, q, key):
"""
Make parameter untrainable (undo with `unfix_parameter`).
Args:
q: (int, list or array-like of ints): components to fix.
key (str): Name of the parameter.
Examples:
>>> model.fix_parameter([0, 1], 'variance')
"""
if isinstance(q, int):
q = [q]
if hasattr(self.model.kernel, 'kernels'):
for kernel_i in q:
kernel = self.model.kernel.kernels[kernel_i]
for param_name, param_val in kernel.__dict__.items():
if param_name == key and isinstance(param_val, gpflow.base.Parameter):
getattr(self.model.kernel.kernels[kernel_i], param_name).trainable = False
else:
for param_name, param_val in self.model.kernel.__dict__.items():
if param_name == key and isinstance(param_val, gpflow.base.Parameter):
getattr(self.model.kernel, param_name).trainable = False
def unfix_parameter(self, q, key):
"""
Make parameter trainable (that was previously fixed, see `fix_param`).
Args:
q: (int, list or array-like of ints): components to unfix.
key (str): Name of the parameter.
Examples:
>>> model.unfix_parameter('variance')
"""
if isinstance(q, int):
q = [q]
if hasattr(self.model.kernel, 'kernels'):
for kernel_i in q:
kernel = self.model.kernel.kernels[kernel_i]
for param_name, param_val in kernel.__dict__.items():
if param_name == key and isinstance(param_val, gpflow.base.Parameter):
getattr(self.model.kernel.kernels[kernel_i], param_name).trainable = True
else:
for param_name, param_val in self.model.kernel.__dict__.items():
if param_name == key and isinstance(param_val, gpflow.base.Parameter):
getattr(self.model.kernel, param_name).trainable = True
def save_parameters(self, filename):
"""
Save model parameters to a given file that can then be loaded with `load_parameters()`.
Args:
filename (str): Filename to save to, automatically appends '.params'.
Examples:
>>> model.save_parameters('filename')
"""
filename += "." + self.name + ".params"
try:
os.remove(filename)
except OSError:
pass
class NumpyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.ndarray):
return obj.tolist()
return json.JSONEncoder.default(self, obj)
data = {
'model': self.__class__.__name__,
'likelihood': self.get_likelihood_parameters(),
'params': self.get_parameters()
}
with open(filename, 'w') as w:
json.dump(data, w, cls=NumpyEncoder)
def load_parameters(self, filename):
"""
Load model parameters from a given file that was previously saved with `save_parameters()`.
Args:
filename (str): Filename to load from, automatically appends '.params'.
Examples:
>>> model.load_parameters('filename')
"""
filename += "." + self.name + ".params"
with open(filename) as r:
data = json.load(r)
if not isinstance(data, dict) or 'model' not in data or 'likelihood' not in data or 'params' not in data:
raise Exception('parameter file has bad format')
if not isinstance(data['params'], list) or not all(isinstance(param, dict) for param in data['params']):
raise Exception('parameter file has bad format')
if data['model'] != self.__class__.__name__:
raise Exception("parameter file uses model '%s' which is different from current model '%s'" % (data['model'], self.__class__.__name__))
cur_params = self.get_parameters()
if len(data['params']) != len(cur_params):
raise Exception("parameter file uses model with %d kernels which is different from current model that uses %d kernels, is the model's Q different?" % (len(data['params']), len(cur_params)))
for key, val in data['likelihood'].items():
self.set_likelihood_parameter(key, val)
for q, param in enumerate(data['params']):
for key, val in param.items():
self.set_parameter(q, key, val)
def train(
self,
method='L-BFGS-B',
tol=1e-6,
maxiter=500,
params={},
verbose=False):
"""
Trains the model using the kernel and its parameters.
For different optimizers, see scipy.optimize.minimize.
It can be bounded by a maximum number of iterations, disp will output final
optimization information. When using the 'Adam' optimizer, a
learning_rate can be set.
Args:
method (str): Optimizer to use, if "Adam" is chosen,
gpflow.training.Adamoptimizer will be used, otherwise the passed scipy
optimizer is used. Defaults to scipy 'L-BFGS-B'.
tol (float): Tolerance for optimizer. Defaults to 1e-6.
maxiter (int): Maximum number of iterations. Defaults to 2000.
params (dict): Additional dictionary with parameters to minimize.
verbose (bool): Print verbose output about the state of the optimizer.
Examples:
>>> model.train(tol=1e-6, maxiter=10000)
>>> model.train(method='Adam', opt_params={...})
"""
inital_time = time.time()
if verbose:
print('Starting optimization\n >Model: {}\n >Channels: {}\
\n >Components: {}\n >Training points: {}\n >Parameters: {}\n >Initial NLL: {:.3f}'.format(
self.name,
len(self.dataset),
self.Q,
sum([len(channel.get_train_data()[0]) for channel in self.dataset]),
sum([int(np.prod(var.shape)) for var in self.model.trainable_variables]),
-self.model.log_marginal_likelihood().numpy()))
@tf.function # optimize TF
def loss():
return -self.model.log_marginal_likelihood()
if method.lower() == "adam":
opt = tf.optimizers.Adam(learning_rate=0.001)
opt.minimize(loss, self.model.trainable_variables)
else:
opt = gpflow.optimizers.Scipy()
opt.minimize(closure=loss, variables=self.model.trainable_variables, method=method, tol=tol, options={'maxiter': maxiter, 'disp': True}, **params)
elapsed_time = time.time() - inital_time
if verbose:
print('Optimization finished in {:.2f} minutes\n >Final NLL: {:.3f} \n'.format(elapsed_time / 60, -self.model.log_marginal_likelihood().numpy()))
################################################################################
# Predictions ##################################################################
################################################################################
def predict(self, x=None, plot=False):
"""
Predict with model.
Will make a prediction using x as input. If no input value is passed, the prediction will
be made with atribute self.X_pred that can be setted with other functions.
It returns the X, Y_mu, Y_var values per channel.
Args:
x_pred (list, dict): Dictionary where keys are channel index and elements numpy arrays with channel inputs.
returns :
mu (ndarray): Posterior mean.
lower (ndarray): Lower confidence interval.
upper (ndarray): Upper confidence interval.
Examples:
>>> model.predict(plot=True)
"""
if x is not None:
self.dataset.set_prediction_x(x)
x = self.dataset._to_kernel_prediction()
if len(x) == 0:
raise Exception('no prediction x range set, use x argument or set manually using DataSet.set_prediction_x() or Data.set_prediction_x()')
mu, var = self.model.predict_f(x)
self.dataset._from_kernel_prediction(self.name, mu, var)
if plot:
self.plot_prediction()
_, mu, lower, upper = self.dataset.get_prediction(self.name)
return mu, lower, upper
def plot_prediction(self, grid=None, figsize=None, ylims=None, names=None, title=''):
"""
Plot training points, all data and prediction for training range for all channels.
Args:
grid (tuple) : Tuple with the 2 dimensions of the grid.
figsize(tuple): Figure size, default to (12, 8).
ylims(list): List of tuples with limits for Y axis for
each channel.
Names(list): List of the names of each title.
title(str): Title of the plot.
ret_fig(bool): If true returns the matplotlib figure,
array of axis and dictionary with all the points used.
"""
#TODO: Add case for single output SM kernel.
# get data
x_train, y_train = self.dataset.get_train_data()
x_all, y_all = self.dataset.get_data()
x_pred, mu, lower, upper = self.dataset.get_prediction(self.name)
n_dim = self.dataset.get_output_dims()
if n_dim == 1:
grid = (1, 1)
elif grid is None:
grid = (int(np.ceil(n_dim/2)), 2)
if (grid[0] * grid[1]) < n_dim:
raise Exception('grid not big enough for all channels')
if figsize is None:
figsize = (12, 2.6 * grid[0])
fig, axes = plt.subplots(grid[0], grid[1], sharex=False, figsize=figsize)
axes = np.array(axes).reshape(-1)
colors = list(matplotlib.colors.TABLEAU_COLORS)
for i in range(n_dim):
axes[i].fill_between(x_pred[i][:,0].reshape(-1),
lower[i],
upper[i],
label='95% c.i',
color=colors[i%len(colors)],
alpha=0.4,
zorder=1)
axes[i].plot(x_pred[i][:,0], mu[i], label='Post.Mean', c=colors[i%len(colors)], zorder=4, lw=1.8)
axes[i].plot(x_all[i][:,0], y_all[i], '--k', label='Test', lw=1, alpha=0.8, zorder=2)
axes[i].plot(x_train[i][:,0], y_train[i], '.k', label='Train', ms=11, mew=0.8, markeredgecolor='white', zorder=3)
axes[i].xaxis.set_major_locator(plt.MaxNLocator(5))
formatter = matplotlib.ticker.FuncFormatter(lambda x,pos: self.dataset.get(i).formatters[0].format(x))
axes[i].xaxis.set_major_formatter(formatter)
xmin = min(x_all[i].min(), x_pred[i].min())
xmax = max(x_all[i].max(), x_pred[i].max())
axes[i].set_xlim(xmin - (xmax - xmin)*0.005, xmax + (xmax - xmin)*0.005)
# set channels name
if names is not None:
axes[i].set_title(names[i])
else:
channel_name = self.dataset.get_names()[i]
if channel_name != '':
axes[i].set_title(channel_name)
elif n_dim == 1:
pass
else:
axes[i].set_title('Channel ' + str(i))
# set y lims
if ylims is not None:
axes[i].set_ylim(ylims[i])
plt.suptitle(title, y=1.02, fontsize=20)
plt.tight_layout()
return fig, axes
def plot_gram_matrix(self, xmin=None, xmax=None, n_points=31, figsize=(10, 10), title=''):
"""
Plot the gram matrix of associated kernel.
The gram matrix is evaluated depending a equaly spaced grid
between [xmin_i, xmax_i] for i = 0, ..., n_channels.
Args:
xmin (float, list, array):
xmax (float, list, array):
n_points (int): Number of points per channel
figsize (2-tuple of ints): Figure size.
title (str): Figure title.
Returns:
fig : Matplotlib figure
ax : Matplotlib axis
"""
if xmin is None:
xmin = [data.X.min() for data in self.dataset]
if xmax is None:
xmax = [data.X.max() for data in self.dataset]
M = len(self.dataset)
if not isinstance(xmin, (list, np.ndarray)):
xmin = [xmin] * M
if not isinstance(xmax, (list, np.ndarray)):
xmax = [xmax] * M
xx = np.zeros((M * n_points, 2))
xx[:, 0] = np.repeat(np.arange(M), n_points)
for m in range(M):
xx[m * n_points: (m + 1) * n_points, 1] = np.linspace(xmin[m], xmax[m], n_points)
K_gram = self.model.kernel.K(xx)
fig, ax = plt.subplots(figsize=figsize)
color_range = np.abs(K_gram).max()
norm = mpl.colors.Normalize(vmin=-color_range, vmax=color_range)
im = ax.matshow(K_gram, cmap='coolwarm', norm=norm)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.3)
fig.colorbar(im, cax=cax)
# Major ticks every 20, minor ticks every 5
major_ticks = np.arange(-0.5, M * n_points, n_points)
minor_ticks = np.arange(-0.5, M * n_points, 2)
ax.set_xticks(major_ticks)
ax.set_yticks(major_ticks)
ax.grid(which='major', alpha=.8, linewidth=1.5, color='k')
ax.set_xticklabels([]);
ax.set_yticklabels([]);
ax.set_title(title)
return fig, ax
Classes
class model (name, dataset)
-
Base class for Multi-Output Gaussian process models. See subclasses for instantiation.
Args
name
:str
- Name of the model.
dataset
:DataSet
,Data
- DataSet with Data objects for all the channels.
When a (list or dict of) Data object is passed, it will automatically be converted to a DataSet.
Expand source code Browse git
class model: def __init__(self, name, dataset): """ Base class for Multi-Output Gaussian process models. See subclasses for instantiation. Args: name (str): Name of the model. dataset (mogptk.dataset.DataSet, mogptk.data.Data): DataSet with Data objects for all the channels. When a (list or dict of) Data object is passed, it will automatically be converted to a DataSet. """ if not isinstance(dataset, DataSet): dataset = DataSet(dataset) if dataset.get_output_dims() == 0: raise Exception("dataset must have at least one channel") if len(set(dataset.get_names())) != len(dataset.get_names()): raise Exception("all data channels must have unique names") if len(set(dataset.get_input_dims())) != 1: raise Exception("all data channels must have the same amount of input dimensions") for channel in dataset: for dim in range(channel.X.shape[1]): xran = np.max(channel.X[:,dim]) - np.min(channel.X[:,dim]) if xran < 1e-3: logger.warning("Very small X range may give problems, it is suggested to scale up your X-axis") elif 1e4 < xran: logger.warning("Very large X range may give problems, it is suggested to scale down your X-axis") self.name = name self.dataset = dataset def _build(self, kernel, likelihood, variational, sparse, like_params): """ Build the model using the given kernel and likelihood. The variational and sparse booleans decide which GPflow model will be used. Args: kernel (gpflow.Kernel): Kernel to use. likelihood (gpflow.likelihoods): Likelihood to use from GPFlow, if None a default exact inference Gaussian likelihood is used. variational (bool): If True, use variational inference to approximate function values as Gaussian. If False it will use Monte carlo Markov Chain. sparse (bool): If True, will use sparse GP regression. like_params (dict): Parameters to GPflow likelihood. """ x, y = self.dataset._to_kernel() # Gaussian likelihood if likelihood == None: if not sparse: self.model = gpflow.models.GPR((x, y), kernel) else: # TODO: test if induction points are set self.name += ' (sparse)' self.model = gpflow.models.SGPR(x, y, kernel) # MCMC elif not variational: self.likelihood = likelihood(**like_params) if not sparse: self.name += ' (MCMC)' self.model = gpflow.models.GPMC(x, y, kernel, self.likelihood) else: self.name += ' (sparse MCMC)' self.model = gpflow.models.SGPMC(x, y, kernel, self.likelihood) # Variational else: self.likelihood = likelihood(**like_params) if not sparse: self.name += ' (variational)' self.model = gpflow.models.VGP(x, y, kernel, self.likelihood) else: self.name += ' (sparse variational)' self.model = gpflow.models.SVGP(x, y, kernel, self.likelihood) ################################################################ def print_parameters(self): """ Print the parameters of the model in a table. Examples: >>> model.print_parameters() """ with np.printoptions(precision=3, floatmode='fixed'): try: get_ipython # fails if we're not in a notebook table = '<table><tr><th>Kernel</th><th>Name</th><th>Train</th><th>Shape</th><th>Dtype</th><th>Value</th></tr>' for q, params in enumerate(self.get_parameters()): kernel = None if hasattr(self.model.kernel, 'kernels'): kernel = self.model.kernel.kernels[q] else: kernel = self.model.kernel first = True for key in params.keys(): param = getattr(kernel, key) val = params[key] if val.ndim == 0: val = '%.3f' % (val,) else: val = str(val) tr_style = '' name = '' if first: if q != 0: tr_style = ' style="border-top:1px solid darkgrey"' name = '<th rowspan="%d" style="text-align:center">%s<br>Q=%d</th>' % (len(params.keys()), kernel.name, q) first = False table += '<tr%s>%s<td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (tr_style, name, key, param.trainable, params[key].shape, params[key].dtype, val) first = True params = self.get_likelihood_parameters() for key in params: param = getattr(self.model.likelihood, key) val = params[key] if val.ndim == 0: val = '%.3f' % (val,) else: val = str(val) name = '' tr_style = '' if first: tr_style = ' style="border-top:1px solid darkgrey"' name = '<th rowspan="%d" style="text-align:center">%s<br>likelihood</th>' % (len(params.keys()), self.model.likelihood.name) first = False table += '<tr%s>%s<td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (tr_style, name, key, param.trainable, params[key].shape, params[key].dtype, val) table += '</table>' display(HTML(table)) except Exception as e: contents = [] for q, params in enumerate(self.get_parameters()): kernel = None if hasattr(self.model.kernel, 'kernels'): kernel = self.model.kernel.kernels[q] else: kernel = self.model.kernel first = True for key in params.keys(): param = getattr(kernel, key) val = params[key] if val.ndim == 0: val = '%.3f' % (val,) else: val = str(val) name = '' if first: name = '%s Q=%d' % (kernel.name, q) first = False contents.append([name, key, param.trainable, params[key].shape, params[key].dtype, val]) first = True params = self.get_likelihood_parameters() for key in params.keys(): param = getattr(self.model.likelihood, key) val = params[key] if val.ndim == 0: val = '%.3f' % (val,) else: val = str(val) name = '' if first: name = '%s likelihood' % (self.model.likelihood.name,) first = False contents.append([name, key, param.trainable, params[key].shape, params[key].dtype, val]) print(tabulate(contents, headers=['Kernel', 'Name', 'Train', 'Shape', 'Dtype', 'Value'])) def get_parameters(self): """ Returns all parameters set for the kernel per component. Examples: >>> params = model.get_parameters() """ params = [] if hasattr(self.model.kernel, 'kernels'): for kernel_i, kernel in enumerate(self.model.kernel.kernels): params.append({}) for param_name, param_val in kernel.__dict__.items(): if isinstance(param_val, gpflow.base.Parameter): params[kernel_i][param_name] = param_val.read_value().numpy() else: params.append({}) for param_name, param_val in self.model.kernel.__dict__.items(): if isinstance(param_val, gpflow.base.Parameter): params[0][param_name] = param_val.read_value().numpy() return params def get_likelihood_parameters(self): """ Returns all parameters set for the likelihood. Examples: >>> params = model.get_likelihood_parameters() """ params = {} for param_name, param_val in self.model.likelihood.__dict__.items(): if isinstance(param_val, gpflow.base.Parameter): params[param_name] = param_val.read_value().numpy() return params def get_parameter(self, q, key): """ Gets a kernel parameter for component 'q' with key the parameter name. Args: q (int): Component of kernel. key (str): Name of component. Returns: val (numpy.ndarray): Value of parameter. Examples: >>> val = model.get_parameter(0, 'variance') # for Q=0 get the parameter called 'variance' """ if hasattr(self.model.kernel, 'kernels'): if q < 0 or len(self.model.kernel.kernels) <= q: raise Exception("qth component %d does not exist" % (q,)) kern = self.model.kernel.kernels[q].__dict__ else: if q != 0: raise Exception("qth component %d does not exist" % (q,)) kern = self.model.kernel.__dict__ if key not in kern or not isinstance(kern[key], gpflow.base.Parameter): raise Exception("parameter name '%s' does not exist for q=%d" % (key, q)) return kern[key].read_value().numpy() def set_parameter(self, q, key, val): """ Sets a kernel parameter for component 'q' with key the parameter name. Args: q (int): Component of kernel. key (str): Name of component. val (float, numpy.ndarray): Value of parameter. Examples: >>> model.set_parameter(0, 'variance', np.array([5.0, 3.0])) # for Q=0 set the parameter called 'variance' """ if isinstance(val, (int, float, list)): val = np.array(val) if not isinstance(val, np.ndarray): raise Exception("value %s of type %s is not a number type or ndarray" % (val, type(val))) if hasattr(self.model.kernel, 'kernels'): if q < 0 or len(self.model.kernel.kernels) <= q: raise Exception("qth component %d does not exist" % (q,)) kern = self.model.kernel.kernels[q].__dict__ else: if q != 0: raise Exception("qth component %d does not exist" % (q,)) kern = self.model.kernel.__dict__ if key not in kern or not isinstance(kern[key], gpflow.base.Parameter): raise Exception("parameter name '%s' does not exist for q=%d" % (key, q)) if kern[key].shape != val.shape: raise Exception("parameter name '%s' must have shape %s and not %s for q=%d" % (key, kern[key].shape, val.shape, q)) for i, v in np.ndenumerate(val): if v < gpflow.config.default_positive_minimum(): val[i] = gpflow.config.default_positive_minimum() kern[key].assign(val) def set_likelihood_parameter(self, key, val): """ Sets a likelihood parameter with key the parameter name. Args: key (str): Name of component. val (float, ndarray): Value of parameter. Examples: >>> model.set_likelihood_parameter('variance', np.array([5.0, 3.0])) # set the parameter called 'variance' """ if isinstance(val, (int, float, list)): val = np.array(val) if not isinstance(val, np.ndarray): raise Exception("value %s of type %s is not a number type or ndarray" % (val, type(val))) likelihood = self.model.likelihood.__dict__ if key not in likelihood or not isinstance(likelihood[key], gpflow.base.Parameter): raise Exception("parameter name '%s' does not exist" % (key)) if likelihood[key].shape != val.shape: raise Exception("parameter name '%s' must have shape %s and not %s" % (key, likelihood[key].shape, val.shape)) for i, v in np.ndenumerate(val): if v < gpflow.config.default_positive_minimum(): val[i] = gpflow.config.default_positive_minimum() likelihood[key].assign(val) def fix_parameter(self, q, key): """ Make parameter untrainable (undo with `unfix_parameter`). Args: q: (int, list or array-like of ints): components to fix. key (str): Name of the parameter. Examples: >>> model.fix_parameter([0, 1], 'variance') """ if isinstance(q, int): q = [q] if hasattr(self.model.kernel, 'kernels'): for kernel_i in q: kernel = self.model.kernel.kernels[kernel_i] for param_name, param_val in kernel.__dict__.items(): if param_name == key and isinstance(param_val, gpflow.base.Parameter): getattr(self.model.kernel.kernels[kernel_i], param_name).trainable = False else: for param_name, param_val in self.model.kernel.__dict__.items(): if param_name == key and isinstance(param_val, gpflow.base.Parameter): getattr(self.model.kernel, param_name).trainable = False def unfix_parameter(self, q, key): """ Make parameter trainable (that was previously fixed, see `fix_param`). Args: q: (int, list or array-like of ints): components to unfix. key (str): Name of the parameter. Examples: >>> model.unfix_parameter('variance') """ if isinstance(q, int): q = [q] if hasattr(self.model.kernel, 'kernels'): for kernel_i in q: kernel = self.model.kernel.kernels[kernel_i] for param_name, param_val in kernel.__dict__.items(): if param_name == key and isinstance(param_val, gpflow.base.Parameter): getattr(self.model.kernel.kernels[kernel_i], param_name).trainable = True else: for param_name, param_val in self.model.kernel.__dict__.items(): if param_name == key and isinstance(param_val, gpflow.base.Parameter): getattr(self.model.kernel, param_name).trainable = True def save_parameters(self, filename): """ Save model parameters to a given file that can then be loaded with `load_parameters()`. Args: filename (str): Filename to save to, automatically appends '.params'. Examples: >>> model.save_parameters('filename') """ filename += "." + self.name + ".params" try: os.remove(filename) except OSError: pass class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.ndarray): return obj.tolist() return json.JSONEncoder.default(self, obj) data = { 'model': self.__class__.__name__, 'likelihood': self.get_likelihood_parameters(), 'params': self.get_parameters() } with open(filename, 'w') as w: json.dump(data, w, cls=NumpyEncoder) def load_parameters(self, filename): """ Load model parameters from a given file that was previously saved with `save_parameters()`. Args: filename (str): Filename to load from, automatically appends '.params'. Examples: >>> model.load_parameters('filename') """ filename += "." + self.name + ".params" with open(filename) as r: data = json.load(r) if not isinstance(data, dict) or 'model' not in data or 'likelihood' not in data or 'params' not in data: raise Exception('parameter file has bad format') if not isinstance(data['params'], list) or not all(isinstance(param, dict) for param in data['params']): raise Exception('parameter file has bad format') if data['model'] != self.__class__.__name__: raise Exception("parameter file uses model '%s' which is different from current model '%s'" % (data['model'], self.__class__.__name__)) cur_params = self.get_parameters() if len(data['params']) != len(cur_params): raise Exception("parameter file uses model with %d kernels which is different from current model that uses %d kernels, is the model's Q different?" % (len(data['params']), len(cur_params))) for key, val in data['likelihood'].items(): self.set_likelihood_parameter(key, val) for q, param in enumerate(data['params']): for key, val in param.items(): self.set_parameter(q, key, val) def train( self, method='L-BFGS-B', tol=1e-6, maxiter=500, params={}, verbose=False): """ Trains the model using the kernel and its parameters. For different optimizers, see scipy.optimize.minimize. It can be bounded by a maximum number of iterations, disp will output final optimization information. When using the 'Adam' optimizer, a learning_rate can be set. Args: method (str): Optimizer to use, if "Adam" is chosen, gpflow.training.Adamoptimizer will be used, otherwise the passed scipy optimizer is used. Defaults to scipy 'L-BFGS-B'. tol (float): Tolerance for optimizer. Defaults to 1e-6. maxiter (int): Maximum number of iterations. Defaults to 2000. params (dict): Additional dictionary with parameters to minimize. verbose (bool): Print verbose output about the state of the optimizer. Examples: >>> model.train(tol=1e-6, maxiter=10000) >>> model.train(method='Adam', opt_params={...}) """ inital_time = time.time() if verbose: print('Starting optimization\n >Model: {}\n >Channels: {}\ \n >Components: {}\n >Training points: {}\n >Parameters: {}\n >Initial NLL: {:.3f}'.format( self.name, len(self.dataset), self.Q, sum([len(channel.get_train_data()[0]) for channel in self.dataset]), sum([int(np.prod(var.shape)) for var in self.model.trainable_variables]), -self.model.log_marginal_likelihood().numpy())) @tf.function # optimize TF def loss(): return -self.model.log_marginal_likelihood() if method.lower() == "adam": opt = tf.optimizers.Adam(learning_rate=0.001) opt.minimize(loss, self.model.trainable_variables) else: opt = gpflow.optimizers.Scipy() opt.minimize(closure=loss, variables=self.model.trainable_variables, method=method, tol=tol, options={'maxiter': maxiter, 'disp': True}, **params) elapsed_time = time.time() - inital_time if verbose: print('Optimization finished in {:.2f} minutes\n >Final NLL: {:.3f} \n'.format(elapsed_time / 60, -self.model.log_marginal_likelihood().numpy())) ################################################################################ # Predictions ################################################################## ################################################################################ def predict(self, x=None, plot=False): """ Predict with model. Will make a prediction using x as input. If no input value is passed, the prediction will be made with atribute self.X_pred that can be setted with other functions. It returns the X, Y_mu, Y_var values per channel. Args: x_pred (list, dict): Dictionary where keys are channel index and elements numpy arrays with channel inputs. returns : mu (ndarray): Posterior mean. lower (ndarray): Lower confidence interval. upper (ndarray): Upper confidence interval. Examples: >>> model.predict(plot=True) """ if x is not None: self.dataset.set_prediction_x(x) x = self.dataset._to_kernel_prediction() if len(x) == 0: raise Exception('no prediction x range set, use x argument or set manually using DataSet.set_prediction_x() or Data.set_prediction_x()') mu, var = self.model.predict_f(x) self.dataset._from_kernel_prediction(self.name, mu, var) if plot: self.plot_prediction() _, mu, lower, upper = self.dataset.get_prediction(self.name) return mu, lower, upper def plot_prediction(self, grid=None, figsize=None, ylims=None, names=None, title=''): """ Plot training points, all data and prediction for training range for all channels. Args: grid (tuple) : Tuple with the 2 dimensions of the grid. figsize(tuple): Figure size, default to (12, 8). ylims(list): List of tuples with limits for Y axis for each channel. Names(list): List of the names of each title. title(str): Title of the plot. ret_fig(bool): If true returns the matplotlib figure, array of axis and dictionary with all the points used. """ #TODO: Add case for single output SM kernel. # get data x_train, y_train = self.dataset.get_train_data() x_all, y_all = self.dataset.get_data() x_pred, mu, lower, upper = self.dataset.get_prediction(self.name) n_dim = self.dataset.get_output_dims() if n_dim == 1: grid = (1, 1) elif grid is None: grid = (int(np.ceil(n_dim/2)), 2) if (grid[0] * grid[1]) < n_dim: raise Exception('grid not big enough for all channels') if figsize is None: figsize = (12, 2.6 * grid[0]) fig, axes = plt.subplots(grid[0], grid[1], sharex=False, figsize=figsize) axes = np.array(axes).reshape(-1) colors = list(matplotlib.colors.TABLEAU_COLORS) for i in range(n_dim): axes[i].fill_between(x_pred[i][:,0].reshape(-1), lower[i], upper[i], label='95% c.i', color=colors[i%len(colors)], alpha=0.4, zorder=1) axes[i].plot(x_pred[i][:,0], mu[i], label='Post.Mean', c=colors[i%len(colors)], zorder=4, lw=1.8) axes[i].plot(x_all[i][:,0], y_all[i], '--k', label='Test', lw=1, alpha=0.8, zorder=2) axes[i].plot(x_train[i][:,0], y_train[i], '.k', label='Train', ms=11, mew=0.8, markeredgecolor='white', zorder=3) axes[i].xaxis.set_major_locator(plt.MaxNLocator(5)) formatter = matplotlib.ticker.FuncFormatter(lambda x,pos: self.dataset.get(i).formatters[0].format(x)) axes[i].xaxis.set_major_formatter(formatter) xmin = min(x_all[i].min(), x_pred[i].min()) xmax = max(x_all[i].max(), x_pred[i].max()) axes[i].set_xlim(xmin - (xmax - xmin)*0.005, xmax + (xmax - xmin)*0.005) # set channels name if names is not None: axes[i].set_title(names[i]) else: channel_name = self.dataset.get_names()[i] if channel_name != '': axes[i].set_title(channel_name) elif n_dim == 1: pass else: axes[i].set_title('Channel ' + str(i)) # set y lims if ylims is not None: axes[i].set_ylim(ylims[i]) plt.suptitle(title, y=1.02, fontsize=20) plt.tight_layout() return fig, axes def plot_gram_matrix(self, xmin=None, xmax=None, n_points=31, figsize=(10, 10), title=''): """ Plot the gram matrix of associated kernel. The gram matrix is evaluated depending a equaly spaced grid between [xmin_i, xmax_i] for i = 0, ..., n_channels. Args: xmin (float, list, array): xmax (float, list, array): n_points (int): Number of points per channel figsize (2-tuple of ints): Figure size. title (str): Figure title. Returns: fig : Matplotlib figure ax : Matplotlib axis """ if xmin is None: xmin = [data.X.min() for data in self.dataset] if xmax is None: xmax = [data.X.max() for data in self.dataset] M = len(self.dataset) if not isinstance(xmin, (list, np.ndarray)): xmin = [xmin] * M if not isinstance(xmax, (list, np.ndarray)): xmax = [xmax] * M xx = np.zeros((M * n_points, 2)) xx[:, 0] = np.repeat(np.arange(M), n_points) for m in range(M): xx[m * n_points: (m + 1) * n_points, 1] = np.linspace(xmin[m], xmax[m], n_points) K_gram = self.model.kernel.K(xx) fig, ax = plt.subplots(figsize=figsize) color_range = np.abs(K_gram).max() norm = mpl.colors.Normalize(vmin=-color_range, vmax=color_range) im = ax.matshow(K_gram, cmap='coolwarm', norm=norm) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.3) fig.colorbar(im, cax=cax) # Major ticks every 20, minor ticks every 5 major_ticks = np.arange(-0.5, M * n_points, n_points) minor_ticks = np.arange(-0.5, M * n_points, 2) ax.set_xticks(major_ticks) ax.set_yticks(major_ticks) ax.grid(which='major', alpha=.8, linewidth=1.5, color='k') ax.set_xticklabels([]); ax.set_yticklabels([]); ax.set_title(title) return fig, ax
Subclasses
Methods
def fix_parameter(self, q, key)
-
Make parameter untrainable (undo with
unfix_parameter
).Args
q
- (int, list or array-like of ints): components to fix.
key
:str
- Name of the parameter.
Examples
>>> model.fix_parameter([0, 1], 'variance')
Expand source code Browse git
def fix_parameter(self, q, key): """ Make parameter untrainable (undo with `unfix_parameter`). Args: q: (int, list or array-like of ints): components to fix. key (str): Name of the parameter. Examples: >>> model.fix_parameter([0, 1], 'variance') """ if isinstance(q, int): q = [q] if hasattr(self.model.kernel, 'kernels'): for kernel_i in q: kernel = self.model.kernel.kernels[kernel_i] for param_name, param_val in kernel.__dict__.items(): if param_name == key and isinstance(param_val, gpflow.base.Parameter): getattr(self.model.kernel.kernels[kernel_i], param_name).trainable = False else: for param_name, param_val in self.model.kernel.__dict__.items(): if param_name == key and isinstance(param_val, gpflow.base.Parameter): getattr(self.model.kernel, param_name).trainable = False
def get_likelihood_parameters(self)
-
Returns all parameters set for the likelihood.
Examples
>>> params = model.get_likelihood_parameters()
Expand source code Browse git
def get_likelihood_parameters(self): """ Returns all parameters set for the likelihood. Examples: >>> params = model.get_likelihood_parameters() """ params = {} for param_name, param_val in self.model.likelihood.__dict__.items(): if isinstance(param_val, gpflow.base.Parameter): params[param_name] = param_val.read_value().numpy() return params
def get_parameter(self, q, key)
-
Gets a kernel parameter for component 'q' with key the parameter name.
Args
q
:int
- Component of kernel.
key
:str
- Name of component.
Returns
val
:numpy.ndarray
- Value of parameter.
Examples
>>> val = model.get_parameter(0, 'variance') # for Q=0 get the parameter called 'variance'
Expand source code Browse git
def get_parameter(self, q, key): """ Gets a kernel parameter for component 'q' with key the parameter name. Args: q (int): Component of kernel. key (str): Name of component. Returns: val (numpy.ndarray): Value of parameter. Examples: >>> val = model.get_parameter(0, 'variance') # for Q=0 get the parameter called 'variance' """ if hasattr(self.model.kernel, 'kernels'): if q < 0 or len(self.model.kernel.kernels) <= q: raise Exception("qth component %d does not exist" % (q,)) kern = self.model.kernel.kernels[q].__dict__ else: if q != 0: raise Exception("qth component %d does not exist" % (q,)) kern = self.model.kernel.__dict__ if key not in kern or not isinstance(kern[key], gpflow.base.Parameter): raise Exception("parameter name '%s' does not exist for q=%d" % (key, q)) return kern[key].read_value().numpy()
def get_parameters(self)
-
Returns all parameters set for the kernel per component.
Examples
>>> params = model.get_parameters()
Expand source code Browse git
def get_parameters(self): """ Returns all parameters set for the kernel per component. Examples: >>> params = model.get_parameters() """ params = [] if hasattr(self.model.kernel, 'kernels'): for kernel_i, kernel in enumerate(self.model.kernel.kernels): params.append({}) for param_name, param_val in kernel.__dict__.items(): if isinstance(param_val, gpflow.base.Parameter): params[kernel_i][param_name] = param_val.read_value().numpy() else: params.append({}) for param_name, param_val in self.model.kernel.__dict__.items(): if isinstance(param_val, gpflow.base.Parameter): params[0][param_name] = param_val.read_value().numpy() return params
def load_parameters(self, filename)
-
Load model parameters from a given file that was previously saved with
save_parameters()
.Args
filename
:str
- Filename to load from, automatically appends '.params'.
Examples
>>> model.load_parameters('filename')
Expand source code Browse git
def load_parameters(self, filename): """ Load model parameters from a given file that was previously saved with `save_parameters()`. Args: filename (str): Filename to load from, automatically appends '.params'. Examples: >>> model.load_parameters('filename') """ filename += "." + self.name + ".params" with open(filename) as r: data = json.load(r) if not isinstance(data, dict) or 'model' not in data or 'likelihood' not in data or 'params' not in data: raise Exception('parameter file has bad format') if not isinstance(data['params'], list) or not all(isinstance(param, dict) for param in data['params']): raise Exception('parameter file has bad format') if data['model'] != self.__class__.__name__: raise Exception("parameter file uses model '%s' which is different from current model '%s'" % (data['model'], self.__class__.__name__)) cur_params = self.get_parameters() if len(data['params']) != len(cur_params): raise Exception("parameter file uses model with %d kernels which is different from current model that uses %d kernels, is the model's Q different?" % (len(data['params']), len(cur_params))) for key, val in data['likelihood'].items(): self.set_likelihood_parameter(key, val) for q, param in enumerate(data['params']): for key, val in param.items(): self.set_parameter(q, key, val)
def plot_gram_matrix(self, xmin=None, xmax=None, n_points=31, figsize=(10, 10), title='')
-
Plot the gram matrix of associated kernel.
The gram matrix is evaluated depending a equaly spaced grid between [xmin_i, xmax_i] for i = 0, …, n_channels.
Args
xmin
:float
,list
,array
- xmax (float, list, array):
n_points
:int
- Number of points per channel
- figsize (2-tuple of ints): Figure size.
title
:str
- Figure title.
Returns
fig
:Matplotlib
figure
ax
:Matplotlib
axis
Expand source code Browse git
def plot_gram_matrix(self, xmin=None, xmax=None, n_points=31, figsize=(10, 10), title=''): """ Plot the gram matrix of associated kernel. The gram matrix is evaluated depending a equaly spaced grid between [xmin_i, xmax_i] for i = 0, ..., n_channels. Args: xmin (float, list, array): xmax (float, list, array): n_points (int): Number of points per channel figsize (2-tuple of ints): Figure size. title (str): Figure title. Returns: fig : Matplotlib figure ax : Matplotlib axis """ if xmin is None: xmin = [data.X.min() for data in self.dataset] if xmax is None: xmax = [data.X.max() for data in self.dataset] M = len(self.dataset) if not isinstance(xmin, (list, np.ndarray)): xmin = [xmin] * M if not isinstance(xmax, (list, np.ndarray)): xmax = [xmax] * M xx = np.zeros((M * n_points, 2)) xx[:, 0] = np.repeat(np.arange(M), n_points) for m in range(M): xx[m * n_points: (m + 1) * n_points, 1] = np.linspace(xmin[m], xmax[m], n_points) K_gram = self.model.kernel.K(xx) fig, ax = plt.subplots(figsize=figsize) color_range = np.abs(K_gram).max() norm = mpl.colors.Normalize(vmin=-color_range, vmax=color_range) im = ax.matshow(K_gram, cmap='coolwarm', norm=norm) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.3) fig.colorbar(im, cax=cax) # Major ticks every 20, minor ticks every 5 major_ticks = np.arange(-0.5, M * n_points, n_points) minor_ticks = np.arange(-0.5, M * n_points, 2) ax.set_xticks(major_ticks) ax.set_yticks(major_ticks) ax.grid(which='major', alpha=.8, linewidth=1.5, color='k') ax.set_xticklabels([]); ax.set_yticklabels([]); ax.set_title(title) return fig, ax
def plot_prediction(self, grid=None, figsize=None, ylims=None, names=None, title='')
-
Plot training points, all data and prediction for training range for all channels.
Args
grid (tuple) : Tuple with the 2 dimensions of the grid. figsize(tuple): Figure size, default to (12, 8). ylims(list): List of tuples with limits for Y axis for each channel. Names(list): List of the names of each title. title(str): Title of the plot. ret_fig(bool): If true returns the matplotlib figure, array of axis and dictionary with all the points used.
Expand source code Browse git
def plot_prediction(self, grid=None, figsize=None, ylims=None, names=None, title=''): """ Plot training points, all data and prediction for training range for all channels. Args: grid (tuple) : Tuple with the 2 dimensions of the grid. figsize(tuple): Figure size, default to (12, 8). ylims(list): List of tuples with limits for Y axis for each channel. Names(list): List of the names of each title. title(str): Title of the plot. ret_fig(bool): If true returns the matplotlib figure, array of axis and dictionary with all the points used. """ #TODO: Add case for single output SM kernel. # get data x_train, y_train = self.dataset.get_train_data() x_all, y_all = self.dataset.get_data() x_pred, mu, lower, upper = self.dataset.get_prediction(self.name) n_dim = self.dataset.get_output_dims() if n_dim == 1: grid = (1, 1) elif grid is None: grid = (int(np.ceil(n_dim/2)), 2) if (grid[0] * grid[1]) < n_dim: raise Exception('grid not big enough for all channels') if figsize is None: figsize = (12, 2.6 * grid[0]) fig, axes = plt.subplots(grid[0], grid[1], sharex=False, figsize=figsize) axes = np.array(axes).reshape(-1) colors = list(matplotlib.colors.TABLEAU_COLORS) for i in range(n_dim): axes[i].fill_between(x_pred[i][:,0].reshape(-1), lower[i], upper[i], label='95% c.i', color=colors[i%len(colors)], alpha=0.4, zorder=1) axes[i].plot(x_pred[i][:,0], mu[i], label='Post.Mean', c=colors[i%len(colors)], zorder=4, lw=1.8) axes[i].plot(x_all[i][:,0], y_all[i], '--k', label='Test', lw=1, alpha=0.8, zorder=2) axes[i].plot(x_train[i][:,0], y_train[i], '.k', label='Train', ms=11, mew=0.8, markeredgecolor='white', zorder=3) axes[i].xaxis.set_major_locator(plt.MaxNLocator(5)) formatter = matplotlib.ticker.FuncFormatter(lambda x,pos: self.dataset.get(i).formatters[0].format(x)) axes[i].xaxis.set_major_formatter(formatter) xmin = min(x_all[i].min(), x_pred[i].min()) xmax = max(x_all[i].max(), x_pred[i].max()) axes[i].set_xlim(xmin - (xmax - xmin)*0.005, xmax + (xmax - xmin)*0.005) # set channels name if names is not None: axes[i].set_title(names[i]) else: channel_name = self.dataset.get_names()[i] if channel_name != '': axes[i].set_title(channel_name) elif n_dim == 1: pass else: axes[i].set_title('Channel ' + str(i)) # set y lims if ylims is not None: axes[i].set_ylim(ylims[i]) plt.suptitle(title, y=1.02, fontsize=20) plt.tight_layout() return fig, axes
def predict(self, x=None, plot=False)
-
Predict with model.
Will make a prediction using x as input. If no input value is passed, the prediction will be made with atribute self.X_pred that can be setted with other functions. It returns the X, Y_mu, Y_var values per channel.
Args
x_pred
:list
,dict
- Dictionary where keys are channel index and elements numpy arrays with channel inputs.
returns : mu (ndarray): Posterior mean. lower (ndarray): Lower confidence interval. upper (ndarray): Upper confidence interval.
Examples
>>> model.predict(plot=True)
Expand source code Browse git
def predict(self, x=None, plot=False): """ Predict with model. Will make a prediction using x as input. If no input value is passed, the prediction will be made with atribute self.X_pred that can be setted with other functions. It returns the X, Y_mu, Y_var values per channel. Args: x_pred (list, dict): Dictionary where keys are channel index and elements numpy arrays with channel inputs. returns : mu (ndarray): Posterior mean. lower (ndarray): Lower confidence interval. upper (ndarray): Upper confidence interval. Examples: >>> model.predict(plot=True) """ if x is not None: self.dataset.set_prediction_x(x) x = self.dataset._to_kernel_prediction() if len(x) == 0: raise Exception('no prediction x range set, use x argument or set manually using DataSet.set_prediction_x() or Data.set_prediction_x()') mu, var = self.model.predict_f(x) self.dataset._from_kernel_prediction(self.name, mu, var) if plot: self.plot_prediction() _, mu, lower, upper = self.dataset.get_prediction(self.name) return mu, lower, upper
def print_parameters(self)
-
Print the parameters of the model in a table.
Examples
>>> model.print_parameters()
Expand source code Browse git
def print_parameters(self): """ Print the parameters of the model in a table. Examples: >>> model.print_parameters() """ with np.printoptions(precision=3, floatmode='fixed'): try: get_ipython # fails if we're not in a notebook table = '<table><tr><th>Kernel</th><th>Name</th><th>Train</th><th>Shape</th><th>Dtype</th><th>Value</th></tr>' for q, params in enumerate(self.get_parameters()): kernel = None if hasattr(self.model.kernel, 'kernels'): kernel = self.model.kernel.kernels[q] else: kernel = self.model.kernel first = True for key in params.keys(): param = getattr(kernel, key) val = params[key] if val.ndim == 0: val = '%.3f' % (val,) else: val = str(val) tr_style = '' name = '' if first: if q != 0: tr_style = ' style="border-top:1px solid darkgrey"' name = '<th rowspan="%d" style="text-align:center">%s<br>Q=%d</th>' % (len(params.keys()), kernel.name, q) first = False table += '<tr%s>%s<td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (tr_style, name, key, param.trainable, params[key].shape, params[key].dtype, val) first = True params = self.get_likelihood_parameters() for key in params: param = getattr(self.model.likelihood, key) val = params[key] if val.ndim == 0: val = '%.3f' % (val,) else: val = str(val) name = '' tr_style = '' if first: tr_style = ' style="border-top:1px solid darkgrey"' name = '<th rowspan="%d" style="text-align:center">%s<br>likelihood</th>' % (len(params.keys()), self.model.likelihood.name) first = False table += '<tr%s>%s<td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (tr_style, name, key, param.trainable, params[key].shape, params[key].dtype, val) table += '</table>' display(HTML(table)) except Exception as e: contents = [] for q, params in enumerate(self.get_parameters()): kernel = None if hasattr(self.model.kernel, 'kernels'): kernel = self.model.kernel.kernels[q] else: kernel = self.model.kernel first = True for key in params.keys(): param = getattr(kernel, key) val = params[key] if val.ndim == 0: val = '%.3f' % (val,) else: val = str(val) name = '' if first: name = '%s Q=%d' % (kernel.name, q) first = False contents.append([name, key, param.trainable, params[key].shape, params[key].dtype, val]) first = True params = self.get_likelihood_parameters() for key in params.keys(): param = getattr(self.model.likelihood, key) val = params[key] if val.ndim == 0: val = '%.3f' % (val,) else: val = str(val) name = '' if first: name = '%s likelihood' % (self.model.likelihood.name,) first = False contents.append([name, key, param.trainable, params[key].shape, params[key].dtype, val]) print(tabulate(contents, headers=['Kernel', 'Name', 'Train', 'Shape', 'Dtype', 'Value']))
def save_parameters(self, filename)
-
Save model parameters to a given file that can then be loaded with
load_parameters()
.Args
filename
:str
- Filename to save to, automatically appends '.params'.
Examples
>>> model.save_parameters('filename')
Expand source code Browse git
def save_parameters(self, filename): """ Save model parameters to a given file that can then be loaded with `load_parameters()`. Args: filename (str): Filename to save to, automatically appends '.params'. Examples: >>> model.save_parameters('filename') """ filename += "." + self.name + ".params" try: os.remove(filename) except OSError: pass class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.ndarray): return obj.tolist() return json.JSONEncoder.default(self, obj) data = { 'model': self.__class__.__name__, 'likelihood': self.get_likelihood_parameters(), 'params': self.get_parameters() } with open(filename, 'w') as w: json.dump(data, w, cls=NumpyEncoder)
def set_likelihood_parameter(self, key, val)
-
Sets a likelihood parameter with key the parameter name.
Args
key
:str
- Name of component.
val
:float
,ndarray
- Value of parameter.
Examples
>>> model.set_likelihood_parameter('variance', np.array([5.0, 3.0])) # set the parameter called 'variance'
Expand source code Browse git
def set_likelihood_parameter(self, key, val): """ Sets a likelihood parameter with key the parameter name. Args: key (str): Name of component. val (float, ndarray): Value of parameter. Examples: >>> model.set_likelihood_parameter('variance', np.array([5.0, 3.0])) # set the parameter called 'variance' """ if isinstance(val, (int, float, list)): val = np.array(val) if not isinstance(val, np.ndarray): raise Exception("value %s of type %s is not a number type or ndarray" % (val, type(val))) likelihood = self.model.likelihood.__dict__ if key not in likelihood or not isinstance(likelihood[key], gpflow.base.Parameter): raise Exception("parameter name '%s' does not exist" % (key)) if likelihood[key].shape != val.shape: raise Exception("parameter name '%s' must have shape %s and not %s" % (key, likelihood[key].shape, val.shape)) for i, v in np.ndenumerate(val): if v < gpflow.config.default_positive_minimum(): val[i] = gpflow.config.default_positive_minimum() likelihood[key].assign(val)
def set_parameter(self, q, key, val)
-
Sets a kernel parameter for component 'q' with key the parameter name.
Args
q
:int
- Component of kernel.
key
:str
- Name of component.
val
:float
,numpy.ndarray
- Value of parameter.
Examples
>>> model.set_parameter(0, 'variance', np.array([5.0, 3.0])) # for Q=0 set the parameter called 'variance'
Expand source code Browse git
def set_parameter(self, q, key, val): """ Sets a kernel parameter for component 'q' with key the parameter name. Args: q (int): Component of kernel. key (str): Name of component. val (float, numpy.ndarray): Value of parameter. Examples: >>> model.set_parameter(0, 'variance', np.array([5.0, 3.0])) # for Q=0 set the parameter called 'variance' """ if isinstance(val, (int, float, list)): val = np.array(val) if not isinstance(val, np.ndarray): raise Exception("value %s of type %s is not a number type or ndarray" % (val, type(val))) if hasattr(self.model.kernel, 'kernels'): if q < 0 or len(self.model.kernel.kernels) <= q: raise Exception("qth component %d does not exist" % (q,)) kern = self.model.kernel.kernels[q].__dict__ else: if q != 0: raise Exception("qth component %d does not exist" % (q,)) kern = self.model.kernel.__dict__ if key not in kern or not isinstance(kern[key], gpflow.base.Parameter): raise Exception("parameter name '%s' does not exist for q=%d" % (key, q)) if kern[key].shape != val.shape: raise Exception("parameter name '%s' must have shape %s and not %s for q=%d" % (key, kern[key].shape, val.shape, q)) for i, v in np.ndenumerate(val): if v < gpflow.config.default_positive_minimum(): val[i] = gpflow.config.default_positive_minimum() kern[key].assign(val)
def train(self, method='L-BFGS-B', tol=1e-06, maxiter=500, params={}, verbose=False)
-
Trains the model using the kernel and its parameters.
For different optimizers, see scipy.optimize.minimize. It can be bounded by a maximum number of iterations, disp will output final optimization information. When using the 'Adam' optimizer, a learning_rate can be set.
Args
method
:str
- Optimizer to use, if "Adam" is chosen, gpflow.training.Adamoptimizer will be used, otherwise the passed scipy optimizer is used. Defaults to scipy 'L-BFGS-B'.
tol
:float
- Tolerance for optimizer. Defaults to 1e-6.
maxiter
:int
- Maximum number of iterations. Defaults to 2000.
params
:dict
- Additional dictionary with parameters to minimize.
verbose
:bool
- Print verbose output about the state of the optimizer.
Examples
>>> model.train(tol=1e-6, maxiter=10000) >>> model.train(method='Adam', opt_params={...})
Expand source code Browse git
def train( self, method='L-BFGS-B', tol=1e-6, maxiter=500, params={}, verbose=False): """ Trains the model using the kernel and its parameters. For different optimizers, see scipy.optimize.minimize. It can be bounded by a maximum number of iterations, disp will output final optimization information. When using the 'Adam' optimizer, a learning_rate can be set. Args: method (str): Optimizer to use, if "Adam" is chosen, gpflow.training.Adamoptimizer will be used, otherwise the passed scipy optimizer is used. Defaults to scipy 'L-BFGS-B'. tol (float): Tolerance for optimizer. Defaults to 1e-6. maxiter (int): Maximum number of iterations. Defaults to 2000. params (dict): Additional dictionary with parameters to minimize. verbose (bool): Print verbose output about the state of the optimizer. Examples: >>> model.train(tol=1e-6, maxiter=10000) >>> model.train(method='Adam', opt_params={...}) """ inital_time = time.time() if verbose: print('Starting optimization\n >Model: {}\n >Channels: {}\ \n >Components: {}\n >Training points: {}\n >Parameters: {}\n >Initial NLL: {:.3f}'.format( self.name, len(self.dataset), self.Q, sum([len(channel.get_train_data()[0]) for channel in self.dataset]), sum([int(np.prod(var.shape)) for var in self.model.trainable_variables]), -self.model.log_marginal_likelihood().numpy())) @tf.function # optimize TF def loss(): return -self.model.log_marginal_likelihood() if method.lower() == "adam": opt = tf.optimizers.Adam(learning_rate=0.001) opt.minimize(loss, self.model.trainable_variables) else: opt = gpflow.optimizers.Scipy() opt.minimize(closure=loss, variables=self.model.trainable_variables, method=method, tol=tol, options={'maxiter': maxiter, 'disp': True}, **params) elapsed_time = time.time() - inital_time if verbose: print('Optimization finished in {:.2f} minutes\n >Final NLL: {:.3f} \n'.format(elapsed_time / 60, -self.model.log_marginal_likelihood().numpy()))
def unfix_parameter(self, q, key)
-
Make parameter trainable (that was previously fixed, see
fix_param
).Args: q: (int, list or array-like of ints): components to unfix. key (str): Name of the parameter.
Examples
>>> model.unfix_parameter('variance')
Expand source code Browse git
def unfix_parameter(self, q, key): """ Make parameter trainable (that was previously fixed, see `fix_param`). Args: q: (int, list or array-like of ints): components to unfix. key (str): Name of the parameter. Examples: >>> model.unfix_parameter('variance') """ if isinstance(q, int): q = [q] if hasattr(self.model.kernel, 'kernels'): for kernel_i in q: kernel = self.model.kernel.kernels[kernel_i] for param_name, param_val in kernel.__dict__.items(): if param_name == key and isinstance(param_val, gpflow.base.Parameter): getattr(self.model.kernel.kernels[kernel_i], param_name).trainable = True else: for param_name, param_val in self.model.kernel.__dict__.items(): if param_name == key and isinstance(param_val, gpflow.base.Parameter): getattr(self.model.kernel, param_name).trainable = True