In [1]:
import mogptk
import pandas as pd
import numpy as np
In [65]:
df = pd.read_table('data/AirQualityUCI.csv', sep=';')
df.replace(-200.0, np.nan, inplace=True)

df['NMHC(GT)'] = df['NMHC(GT)'].shift(12)

# First 2 columns are date and time, we convert it to a single column with datetime format
df['Date'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%d/%m/%Y %H.%M.%S')
df['Time'] = (df['Date'] - pd.Timestamp('2004-03-12')) / pd.Timedelta(hours=1)

df = df[(df['Date'] >= '2004-03-12') & (df['Date'] < '2004-03-19')]

data = mogptk.LoadDataFrame(df,
                            x_col='Time',
                            y_col=['CO(GT)', 'NOx(GT)', 'NO2(GT)', 'T'])

data[0].remove_relative_range(0.2, 0.3)
#data[1].remove_relative_range(0.8, 1.0)
#data[2].remove_relative_range(0.9, None)
data[1].remove_relative_range(0.8, 1.0)
data[2].remove_relative_range(0.0, 0.2)
data[3].set_name('Temperature')

for channel in data:
    channel.remove_randomly(pct=0.3)
    channel.transform(mogptk.TransformDetrend(degree=1))
    channel.transform(mogptk.TransformWhiten())
data.plot()
Out[65]:
(<Figure size 864x720 with 4 Axes>,
 array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f66044b55d0>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x7f65edb8eb10>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x7f65eec1c3d0>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x7f65ee573890>]],
       dtype=object))
In [70]:
mosm = mogptk.MOSM(data, Q=3)
mosm.init_parameters('BNSE')
mosm.print_parameters()
mosm.predict();

import matplotlib.patches as patches

f, ax = mosm.plot_prediction(grid=(2, 2), figsize=(15, 5), title='Initialized MOSM on air quality data')
pos = [[0.21, 0.3], [0.8, 1.0], [0.0, 0.2]]
for i in range(3):
    x0 = ax[i].get_xlim()[0]
    x1 = ax[i].get_xlim()[1]
    ax[i].add_patch(patches.Rectangle(
                 (x0+pos[i][0]*(x1-x0), ax[i].get_ylim()[0]),           # (x,y)
                 (pos[i][1]-pos[i][0])*(x1-x0),                                 # width
                 ax[i].get_ylim()[1] - ax[i].get_ylim()[0],  # height
                 fill=True,
                 color='xkcd:strawberry',
                 alpha=0.25,
                 lw=0,
                 ))

f.savefig('air_quality_pre.pdf', bbox_inches='tight')
KernelNameTrainShapeDtypeValue
mosm
Q=0
magnitudeTrue(4,)float64[1.347 1.423 1.581 1.825]
meanTrue(1, 4)float64[[0.425 0.199 0.783 0.037]]
varianceTrue(1, 4)float64[[0.006 0.006 0.007 0.004]]
delayTrue(1, 4)float64[[0.000 0.000 0.000 0.000]]
phaseTrue(4,)float64[0.000 0.000 0.000 0.000]
mosm
Q=1
magnitudeTrue(4,)float64[1.283 1.178 1.072 0.633]
meanTrue(1, 4)float64[[0.425 0.199 0.783 0.037]]
varianceTrue(1, 4)float64[[0.006 0.006 0.007 0.004]]
delayTrue(1, 4)float64[[0.000 0.000 0.000 0.000]]
phaseTrue(4,)float64[0.000 0.000 0.000 0.000]
mosm
Q=2
magnitudeTrue(4,)float64[0.734 0.766 0.592 0.518]
meanTrue(1, 4)float64[[0.425 0.199 0.783 0.037]]
varianceTrue(1, 4)float64[[0.006 0.006 0.007 0.004]]
delayTrue(1, 4)float64[[0.000 0.000 0.000 0.000]]
phaseTrue(4,)float64[0.000 0.000 0.000 0.000]
noise
Q=3
noiseTrue(4,)float64[0.033 0.033 0.033 0.033]
gaussian
likelihood
varianceTrue()float641.000
In [67]:
#for q in range(3):
#    param = mosm.get_parameter(q, 'delay')
#    param[0,1] = 12
#    mosm.set_parameter(q, 'delay', param)
#    
#    param = mosm.get_parameter(q, 'phase')
#    param[1] = 12
#    mosm.set_parameter(q, 'phase', param)
#mosm.print_parameters()
#mosm.predict();
#mosm.plot_prediction(grid=(2, 2), figsize=(15, 5))
In [72]:
mosm.train('L-BFGS-B', verbose=True)
mosm.print_parameters()
mosm.predict();
Starting optimization
 >Model: MOSM
 >Channels: 4                    
 >Components: 3
 >Training points: 399
 >Parameters: 65
 >Initial NLL: 558.024
Optimization finished in 1.26 minutes
 >Final NLL: 181.386 

KernelNameTrainShapeDtypeValue
mosm
Q=0
magnitudeTrue(4,)float64[1.273 1.326 1.293 1.562]
meanTrue(1, 4)float64[[0.173 0.156 0.315 0.310]]
varianceTrue(1, 4)float64[[0.005 0.005 0.007 0.019]]
delayTrue(1, 4)float64[[-2.358 1.226 3.103 -1.971]]
phaseTrue(4,)float64[ 0.562 -0.183 -1.001 0.622]
mosm
Q=1
magnitudeTrue(4,)float64[ 0.728 0.717 0.579 -1.073]
meanTrue(1, 4)float64[[0.615 0.508 0.566 0.099]]
varianceTrue(1, 4)float64[[2.167e-01 2.673e-01 2.661e-01 1.707e-05]]
delayTrue(1, 4)float64[[ 0.031 -0.247 -0.414 0.631]]
phaseTrue(4,)float64[ 0.004 0.093 0.007 -0.104]
mosm
Q=2
magnitudeTrue(4,)float64[ 0.269 -0.029 0.145 0.048]
meanTrue(1, 4)float64[[2.472 0.080 2.013 0.042]]
varianceTrue(1, 4)float64[[0.134 0.007 0.693 0.004]]
delayTrue(1, 4)float64[[ 1.225 0.074 -1.229 -0.069]]
phaseTrue(4,)float64[-3.332 0.374 3.289 -0.331]
noise
Q=3
noiseTrue(4,)float64[0.002 0.069 0.001 0.021]
gaussian
likelihood
varianceTrue()float640.001
In [73]:
import matplotlib.patches as patches

f, ax = mosm.plot_prediction(grid=(2, 2), figsize=(15, 5), title='Trained MOSM on air quality data')
pos = [[0.21, 0.3], [0.8, 1.0], [0.0, 0.2]]
for i in range(3):
    x0 = ax[i].get_xlim()[0]
    x1 = ax[i].get_xlim()[1]
    ax[i].add_patch(patches.Rectangle(
                 (x0+pos[i][0]*(x1-x0), ax[i].get_ylim()[0]),           # (x,y)
                 (pos[i][1]-pos[i][0])*(x1-x0),                                 # width
                 ax[i].get_ylim()[1] - ax[i].get_ylim()[0],  # height
                 fill=True,
                 color='xkcd:strawberry',
                 alpha=0.25,
                 lw=0,
                 ))
f.savefig('air_quality.pdf', bbox_inches='tight')
In [16]:
mosm.plot_gram_matrix()
Out[16]:
(<Figure size 720x720 with 2 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7faeec883110>)
In [17]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 2, figsize=(16, 4))
data[0].plot(ax=ax[0])
data[1].plot(ax=ax[1])

ax[0].xaxis.set_major_locator(plt.MaxNLocator(6))
ax[1].xaxis.set_major_locator(plt.MaxNLocator(6))
In [60]:
import matplotlib.pyplot as plt
import matplotlib

x = np.linspace(0, 10, 500)
one = np.sin(x) + np.random.normal(0.0, 0.1, len(x))
two = np.sin(x) + np.random.normal(0.0, 0.1, len(x))
three = -0.5 + x*0.1 + np.random.normal(0.0, 0.1, len(x))
four = -np.sin(x) + np.random.normal(0.0, 0.1, len(x))
data = [one, three, four]

fig, axes = plt.subplots(1,1, sharex=True, sharey=True, figsize=(4,3))
axes = np.array(axes).reshape(-1)

colors = list(matplotlib.colors.TABLEAU_COLORS)
for i, channel in enumerate(data):
    #axes[i].plot(x_pred[i][:,0], mu[i], label='Post.Mean', c=colors[i%len(colors)], zorder=4, lw=1.8)
    axes[0].plot(x, channel, '-k', label='Test', lw=1, alpha=1, zorder=2, c=colors[i%len(colors)])
    #axes[i].plot(x_train[i][:,0], y_train[i], '.k', label='Train', ms=11, mew=0.8, markeredgecolor='white', zorder=3)

    axes[0].xaxis.set_major_locator(plt.MaxNLocator(5))
    axes[0].set_xlim(0, 10)
    axes[0].set_ylim(-2, 2)

plt.suptitle('', y=1.02, fontsize=20)
plt.tight_layout()
plt.savefig('channel_correlation.pdf', bbox_inches='tight')
In [ ]: