Module mogptk.sm

Expand source code Browse git
import numpy as np
from .model import model
from .kernels import SpectralMixture, sm_init, Noise
from .plot import plot_spectrum
from scipy.stats import norm
import matplotlib.pyplot as plt
import logging

logger = logging.getLogger('mogptk')

def _estimate_from_sm(dataset, Q, method='BNSE', optimizer='BFGS', maxiter=2000, plot=False, fix_means=False):
    """
    Estimate kernel param with single ouput GP-SM

    Args:
        dataset (mogptk.DataSet): DataSet object of data with one channel.
        Q (int): Number of components.
        estimate (str): Method to estimate, 'BNSE', 'LS' or 'AGW'
        method (str): Optimization method, either 'Adam' or any
            scipy optimizer.
        maxiter (int): Maximum number of iteration.
        plot (bool): If true plot the kernel PSD of each channel.
        fix_means(bool): Fix spectral means to zero in trainning.

    Returns: 
        params[q][name][output dim][input dim]
    """
    input_dims = dataset.get_input_dims()[0]
    output_dims = dataset.get_output_dims()

    params = []
    for q in range(Q):
        params.append({
            'weight': np.empty((input_dims, output_dims)),
            'mean': np.empty((input_dims, output_dims)),
            'scale': np.empty((input_dims, output_dims)),
        })

    for channel in range(output_dims):
        for i in range(input_dims):  # TODO one SM per channel
            sm = SM(dataset[channel], Q)
            sm.init_parameters(method)

            if fix_means:
                sm.set_parameter(0, 'mixture_means', np.zeros((Q, input_dims)))
                sm.set_parameter(0, 'mixture_scales', sm.get_parameter(0, 'mixture_scales') * 50)
                sm.fix_parameters(0, 'mixture_means')

            sm.train(method=optimizer, maxiter=maxiter)

            if plot:
                nyquist = dataset[channel].get_nyquist_estimation()
                means = sm.get_parameter(0, 'mixture_means')
                weights = sm.get_parameter(0, 'mixture_weights')
                scales = sm.get_parameter(0, 'mixture_scales').T
                plot_spectrum(means, scales, weights=weights, nyquist=nyquist, title=dataset[channel].name)

            for q in range(Q):
                params[q]['weight'][i,channel] = sm.get_parameter(0, 'mixture_weights')[q]
                params[q]['mean'][i,channel] = sm.get_parameter(0, 'mixture_means')[q,:] * np.pi * 2
                params[q]['scale'][i,channel] = sm.get_parameter(0, 'mixture_scales')[:,q]

    return params

class SM(model):
    """
    A single output GP Spectral mixture kernel as proposed by [1].

    The model contain the dataset and the associated gpflow model, 
    when the mogptk.Model is instanciated the gpflow model is built 
    using random parameters.

    Args:
        dataset (mogptk.dataset.DataSet): DataSet object of data for all channels. Only one channel allowed for SM.
        Q (int, optional): Number of components.
        name (str, optional): Name of the model.
        likelihood (gpflow.likelihoods, optional): Likelihood to use from GPFlow, if None a default exact inference Gaussian likelihood is used.
        variational (bool, optional): If True, use variational inference to approximate function values as Gaussian. If False it will use Monte Carlo Markov Chain.
        sparse (bool, optional): If True, will use sparse GP regression.
        like_params (dict, optional): Parameters to GPflow likelihood.

    Examples:

    >>> import numpy as np
    >>> t = np.linspace(0, 10, 100)
    >>> y = np.sin(0.5 * t)
    >>> import mogptk
    >>> data = mogptk.Data(t, y)
    >>> model = mogptk.SM([data], Q=1)
    >>> model.build()
    >>> model.train()
    >>> model.predict([np.linspace(1, 15, 150)])

    [1] A.G. Wilson and R.P. Adams, "Gaussian Process Kernels for Pattern Discovery and Extrapolation", International Conference on Machine Learning 30, 2013
    """
    def __init__(self, dataset, Q=1, name="SM", likelihood=None, variational=False, sparse=False, like_params={}):
        model.__init__(self, name, dataset)
        self.Q = Q

        if self.dataset.get_output_dims() != 1:
            raise Exception("single output spectral mixture kernel can only have one output dimension in the data")

        kernel = SpectralMixture(
            self.dataset.get_input_dims()[0],
            self.Q,
        )
        self._build(kernel, likelihood, variational, sparse, like_params)

    def init_parameters(self, method='BNSE'):
        """
        Initialize parameters of kernel from data using different methods.

        Kernel parameters can be initialized using 3 heuristics using the train data:

        'AGW': (Taken from phd thesis from Andrew wilson 2014) is taking the inverse
            of lengthscales drawn from truncated Gaussian N(0, max_dist^2), the
            means drawn from Unif(0, 0.5 / minimum distance between two points),
            and the mixture weights by taking the stdv of the y values divided by the
            number of mixtures.
        'LS'_ is using Lomb Scargle periodogram for estimating the PSD,
            and using the first Q peaks as the means and mixture weights.
        'BNSE': third is using the BNSE (Tobar 2018) to estimate the PSD 
            and use the first Q peaks as the means and mixture weights.

        In all cases the noise is initialized with 1/30 of the variance of each channel.

        *** Only for single input dimension for each channel.
        """

        if method not in ['AGW', 'LS', 'BNSE']:
            raise Exception("possible methods are 'AGW', 'LS' and 'BNSE' (see documentation).")

        if method == 'AGW':
            x, y = self.dataset[0].get_train_data()
            weights, means, scales = sm_init(x, y, self.Q)
            self.set_parameter(0, 'mixture_weights', weights)
            self.set_parameter(0, 'mixture_means', np.array(means))
            self.set_parameter(0, 'mixture_scales', np.array(scales.T))

        elif method == 'LS':
            amplitudes, means, variances = self.dataset[0].get_lombscargle_estimation(self.Q)
            if len(amplitudes) == 0:
                logger.warning('LS could not find peaks for SM')
                return
            
            mixture_weights = amplitudes.mean(axis=0) / amplitudes.sum() * self.dataset[0].Y[self.dataset[0].mask].var() * 2

            self.set_parameter(0, 'mixture_weights', mixture_weights)
            self.set_parameter(0, 'mixture_means', means.T)
            self.set_parameter(0, 'mixture_scales', variances * 2.0)

        elif method == 'BNSE':
            amplitudes, means, variances = self.dataset[0].get_bnse_estimation(self.Q)
            if np.sum(amplitudes) == 0.0:
                logger.warning('BNSE could not find peaks for SM')
                return

            mixture_weights = amplitudes.mean(axis=0) / amplitudes.sum() * self.dataset[0].Y[self.dataset[0].mask].var() * 2

            self.set_parameter(0, 'mixture_weights', mixture_weights)
            self.set_parameter(0, 'mixture_means', means.T)
            self.set_parameter(0, 'mixture_scales', variances * 2.0)

    def plot_psd(self, figsize=(10, 4), title='', log_scale=False):
        """
        Plot power spectral density of single output GP-SM.
        """
        means = self.get_parameter(0, 'mixture_means') * 2.0 * np.pi
        weights = self.get_parameter(0, 'mixture_weights')
        scales = self.get_parameter(0, 'mixture_scales').T
        
        # calculate bounds
        x_low = norm.ppf(0.001, loc=means, scale=scales).min()
        x_high = norm.ppf(0.99, loc=means, scale=scales).max()
        
        x = np.linspace(0, x_high + 1, 10000)

        psd = np.zeros_like(x)

        fig, axes = plt.subplots(1, 1, figsize=figsize)
        for q in range(self.Q):
            single_psd = weights[q] * norm.pdf(x, loc=means[q], scale=scales[q])
            axes.plot(x, single_psd, '--', lw=1.2, c='xkcd:strawberry', zorder=2)
            axes.axvline(means[q], ymin=0.001, ymax=0.1, lw=2, color='grey')
            psd = psd + single_psd
            
        # symmetrize PSD
        if psd[x<0].size != 0:
            psd = psd + np.r_[psd[x<0][::-1], np.zeros((x>=0).sum())]
            
        axes.plot(x, psd, lw=2.5, c='r', alpha=0.7, zorder=1)
        axes.set_xlim(0, x[-1] + 0.1)
        if log_scale:
            axes.set_yscale('log')
        axes.set_xlabel('Frequency [Hz]')
        axes.set_ylabel('PSD')
        axes.set_title(title)

        return fig, axes

Classes

class SM (dataset, Q=1, name='SM', likelihood=None, variational=False, sparse=False, like_params={})

A single output GP Spectral mixture kernel as proposed by [1].

The model contain the dataset and the associated gpflow model, when the mogptk.Model is instanciated the gpflow model is built using random parameters.

Args

dataset : DataSet
DataSet object of data for all channels. Only one channel allowed for SM.
Q : int, optional
Number of components.
name : str, optional
Name of the model.
likelihood : gpflow.likelihoods, optional
Likelihood to use from GPFlow, if None a default exact inference Gaussian likelihood is used.
variational : bool, optional
If True, use variational inference to approximate function values as Gaussian. If False it will use Monte Carlo Markov Chain.
sparse : bool, optional
If True, will use sparse GP regression.
like_params : dict, optional
Parameters to GPflow likelihood.

Examples:

>>> import numpy as np
>>> t = np.linspace(0, 10, 100)
>>> y = np.sin(0.5 * t)
>>> import mogptk
>>> data = mogptk.Data(t, y)
>>> model = mogptk.SM([data], Q=1)
>>> model.build()
>>> model.train()
>>> model.predict([np.linspace(1, 15, 150)])

[1] A.G. Wilson and R.P. Adams, "Gaussian Process Kernels for Pattern Discovery and Extrapolation", International Conference on Machine Learning 30, 2013

Base class for Multi-Output Gaussian process models. See subclasses for instantiation.

Args

name : str
Name of the model.
dataset : DataSet, Data
DataSet with Data objects for all the channels.

When a (list or dict of) Data object is passed, it will automatically be converted to a DataSet.

Expand source code Browse git
class SM(model):
    """
    A single output GP Spectral mixture kernel as proposed by [1].

    The model contain the dataset and the associated gpflow model, 
    when the mogptk.Model is instanciated the gpflow model is built 
    using random parameters.

    Args:
        dataset (mogptk.dataset.DataSet): DataSet object of data for all channels. Only one channel allowed for SM.
        Q (int, optional): Number of components.
        name (str, optional): Name of the model.
        likelihood (gpflow.likelihoods, optional): Likelihood to use from GPFlow, if None a default exact inference Gaussian likelihood is used.
        variational (bool, optional): If True, use variational inference to approximate function values as Gaussian. If False it will use Monte Carlo Markov Chain.
        sparse (bool, optional): If True, will use sparse GP regression.
        like_params (dict, optional): Parameters to GPflow likelihood.

    Examples:

    >>> import numpy as np
    >>> t = np.linspace(0, 10, 100)
    >>> y = np.sin(0.5 * t)
    >>> import mogptk
    >>> data = mogptk.Data(t, y)
    >>> model = mogptk.SM([data], Q=1)
    >>> model.build()
    >>> model.train()
    >>> model.predict([np.linspace(1, 15, 150)])

    [1] A.G. Wilson and R.P. Adams, "Gaussian Process Kernels for Pattern Discovery and Extrapolation", International Conference on Machine Learning 30, 2013
    """
    def __init__(self, dataset, Q=1, name="SM", likelihood=None, variational=False, sparse=False, like_params={}):
        model.__init__(self, name, dataset)
        self.Q = Q

        if self.dataset.get_output_dims() != 1:
            raise Exception("single output spectral mixture kernel can only have one output dimension in the data")

        kernel = SpectralMixture(
            self.dataset.get_input_dims()[0],
            self.Q,
        )
        self._build(kernel, likelihood, variational, sparse, like_params)

    def init_parameters(self, method='BNSE'):
        """
        Initialize parameters of kernel from data using different methods.

        Kernel parameters can be initialized using 3 heuristics using the train data:

        'AGW': (Taken from phd thesis from Andrew wilson 2014) is taking the inverse
            of lengthscales drawn from truncated Gaussian N(0, max_dist^2), the
            means drawn from Unif(0, 0.5 / minimum distance between two points),
            and the mixture weights by taking the stdv of the y values divided by the
            number of mixtures.
        'LS'_ is using Lomb Scargle periodogram for estimating the PSD,
            and using the first Q peaks as the means and mixture weights.
        'BNSE': third is using the BNSE (Tobar 2018) to estimate the PSD 
            and use the first Q peaks as the means and mixture weights.

        In all cases the noise is initialized with 1/30 of the variance of each channel.

        *** Only for single input dimension for each channel.
        """

        if method not in ['AGW', 'LS', 'BNSE']:
            raise Exception("possible methods are 'AGW', 'LS' and 'BNSE' (see documentation).")

        if method == 'AGW':
            x, y = self.dataset[0].get_train_data()
            weights, means, scales = sm_init(x, y, self.Q)
            self.set_parameter(0, 'mixture_weights', weights)
            self.set_parameter(0, 'mixture_means', np.array(means))
            self.set_parameter(0, 'mixture_scales', np.array(scales.T))

        elif method == 'LS':
            amplitudes, means, variances = self.dataset[0].get_lombscargle_estimation(self.Q)
            if len(amplitudes) == 0:
                logger.warning('LS could not find peaks for SM')
                return
            
            mixture_weights = amplitudes.mean(axis=0) / amplitudes.sum() * self.dataset[0].Y[self.dataset[0].mask].var() * 2

            self.set_parameter(0, 'mixture_weights', mixture_weights)
            self.set_parameter(0, 'mixture_means', means.T)
            self.set_parameter(0, 'mixture_scales', variances * 2.0)

        elif method == 'BNSE':
            amplitudes, means, variances = self.dataset[0].get_bnse_estimation(self.Q)
            if np.sum(amplitudes) == 0.0:
                logger.warning('BNSE could not find peaks for SM')
                return

            mixture_weights = amplitudes.mean(axis=0) / amplitudes.sum() * self.dataset[0].Y[self.dataset[0].mask].var() * 2

            self.set_parameter(0, 'mixture_weights', mixture_weights)
            self.set_parameter(0, 'mixture_means', means.T)
            self.set_parameter(0, 'mixture_scales', variances * 2.0)

    def plot_psd(self, figsize=(10, 4), title='', log_scale=False):
        """
        Plot power spectral density of single output GP-SM.
        """
        means = self.get_parameter(0, 'mixture_means') * 2.0 * np.pi
        weights = self.get_parameter(0, 'mixture_weights')
        scales = self.get_parameter(0, 'mixture_scales').T
        
        # calculate bounds
        x_low = norm.ppf(0.001, loc=means, scale=scales).min()
        x_high = norm.ppf(0.99, loc=means, scale=scales).max()
        
        x = np.linspace(0, x_high + 1, 10000)

        psd = np.zeros_like(x)

        fig, axes = plt.subplots(1, 1, figsize=figsize)
        for q in range(self.Q):
            single_psd = weights[q] * norm.pdf(x, loc=means[q], scale=scales[q])
            axes.plot(x, single_psd, '--', lw=1.2, c='xkcd:strawberry', zorder=2)
            axes.axvline(means[q], ymin=0.001, ymax=0.1, lw=2, color='grey')
            psd = psd + single_psd
            
        # symmetrize PSD
        if psd[x<0].size != 0:
            psd = psd + np.r_[psd[x<0][::-1], np.zeros((x>=0).sum())]
            
        axes.plot(x, psd, lw=2.5, c='r', alpha=0.7, zorder=1)
        axes.set_xlim(0, x[-1] + 0.1)
        if log_scale:
            axes.set_yscale('log')
        axes.set_xlabel('Frequency [Hz]')
        axes.set_ylabel('PSD')
        axes.set_title(title)

        return fig, axes

Ancestors

Methods

def init_parameters(self, method='BNSE')

Initialize parameters of kernel from data using different methods.

Kernel parameters can be initialized using 3 heuristics using the train data:

'AGW': (Taken from phd thesis from Andrew wilson 2014) is taking the inverse of lengthscales drawn from truncated Gaussian N(0, max_dist^2), the means drawn from Unif(0, 0.5 / minimum distance between two points), and the mixture weights by taking the stdv of the y values divided by the number of mixtures. 'LS'_ is using Lomb Scargle periodogram for estimating the PSD, and using the first Q peaks as the means and mixture weights. 'BNSE': third is using the BNSE (Tobar 2018) to estimate the PSD and use the first Q peaks as the means and mixture weights.

In all cases the noise is initialized with 1/30 of the variance of each channel.

*** Only for single input dimension for each channel.

Expand source code Browse git
def init_parameters(self, method='BNSE'):
    """
    Initialize parameters of kernel from data using different methods.

    Kernel parameters can be initialized using 3 heuristics using the train data:

    'AGW': (Taken from phd thesis from Andrew wilson 2014) is taking the inverse
        of lengthscales drawn from truncated Gaussian N(0, max_dist^2), the
        means drawn from Unif(0, 0.5 / minimum distance between two points),
        and the mixture weights by taking the stdv of the y values divided by the
        number of mixtures.
    'LS'_ is using Lomb Scargle periodogram for estimating the PSD,
        and using the first Q peaks as the means and mixture weights.
    'BNSE': third is using the BNSE (Tobar 2018) to estimate the PSD 
        and use the first Q peaks as the means and mixture weights.

    In all cases the noise is initialized with 1/30 of the variance of each channel.

    *** Only for single input dimension for each channel.
    """

    if method not in ['AGW', 'LS', 'BNSE']:
        raise Exception("possible methods are 'AGW', 'LS' and 'BNSE' (see documentation).")

    if method == 'AGW':
        x, y = self.dataset[0].get_train_data()
        weights, means, scales = sm_init(x, y, self.Q)
        self.set_parameter(0, 'mixture_weights', weights)
        self.set_parameter(0, 'mixture_means', np.array(means))
        self.set_parameter(0, 'mixture_scales', np.array(scales.T))

    elif method == 'LS':
        amplitudes, means, variances = self.dataset[0].get_lombscargle_estimation(self.Q)
        if len(amplitudes) == 0:
            logger.warning('LS could not find peaks for SM')
            return
        
        mixture_weights = amplitudes.mean(axis=0) / amplitudes.sum() * self.dataset[0].Y[self.dataset[0].mask].var() * 2

        self.set_parameter(0, 'mixture_weights', mixture_weights)
        self.set_parameter(0, 'mixture_means', means.T)
        self.set_parameter(0, 'mixture_scales', variances * 2.0)

    elif method == 'BNSE':
        amplitudes, means, variances = self.dataset[0].get_bnse_estimation(self.Q)
        if np.sum(amplitudes) == 0.0:
            logger.warning('BNSE could not find peaks for SM')
            return

        mixture_weights = amplitudes.mean(axis=0) / amplitudes.sum() * self.dataset[0].Y[self.dataset[0].mask].var() * 2

        self.set_parameter(0, 'mixture_weights', mixture_weights)
        self.set_parameter(0, 'mixture_means', means.T)
        self.set_parameter(0, 'mixture_scales', variances * 2.0)
def plot_psd(self, figsize=(10, 4), title='', log_scale=False)

Plot power spectral density of single output GP-SM.

Expand source code Browse git
def plot_psd(self, figsize=(10, 4), title='', log_scale=False):
    """
    Plot power spectral density of single output GP-SM.
    """
    means = self.get_parameter(0, 'mixture_means') * 2.0 * np.pi
    weights = self.get_parameter(0, 'mixture_weights')
    scales = self.get_parameter(0, 'mixture_scales').T
    
    # calculate bounds
    x_low = norm.ppf(0.001, loc=means, scale=scales).min()
    x_high = norm.ppf(0.99, loc=means, scale=scales).max()
    
    x = np.linspace(0, x_high + 1, 10000)

    psd = np.zeros_like(x)

    fig, axes = plt.subplots(1, 1, figsize=figsize)
    for q in range(self.Q):
        single_psd = weights[q] * norm.pdf(x, loc=means[q], scale=scales[q])
        axes.plot(x, single_psd, '--', lw=1.2, c='xkcd:strawberry', zorder=2)
        axes.axvline(means[q], ymin=0.001, ymax=0.1, lw=2, color='grey')
        psd = psd + single_psd
        
    # symmetrize PSD
    if psd[x<0].size != 0:
        psd = psd + np.r_[psd[x<0][::-1], np.zeros((x>=0).sum())]
        
    axes.plot(x, psd, lw=2.5, c='r', alpha=0.7, zorder=1)
    axes.set_xlim(0, x[-1] + 0.1)
    if log_scale:
        axes.set_yscale('log')
    axes.set_xlabel('Frequency [Hz]')
    axes.set_ylabel('PSD')
    axes.set_title(title)

    return fig, axes

Inherited members