import mogptk
import pandas as pd
import numpy as np
df = pd.read_table('data/AirQualityUCI.csv', sep=';')
df.replace(-200.0, np.nan, inplace=True)
df['NMHC(GT)'] = df['NMHC(GT)'].shift(12)
# First 2 columns are date and time, we convert it to a single column with datetime format
df['Date'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%d/%m/%Y %H.%M.%S')
df['Time'] = (df['Date'] - pd.Timestamp('2004-03-12')) / pd.Timedelta(hours=1)
df = df[(df['Date'] >= '2004-03-12') & (df['Date'] < '2004-03-19')]
data = mogptk.LoadDataFrame(df,
x_col='Time',
y_col=['CO(GT)', 'NOx(GT)', 'NO2(GT)', 'T'])
data[0].remove_relative_range(0.2, 0.3)
#data[1].remove_relative_range(0.8, 1.0)
#data[2].remove_relative_range(0.9, None)
data[1].remove_relative_range(0.8, 1.0)
data[2].remove_relative_range(0.0, 0.2)
data[3].set_name('Temperature')
for channel in data:
channel.remove_randomly(pct=0.3)
channel.transform(mogptk.TransformDetrend(degree=1))
channel.transform(mogptk.TransformWhiten())
data.plot()
(<Figure size 864x720 with 4 Axes>, array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f66044b55d0>], [<matplotlib.axes._subplots.AxesSubplot object at 0x7f65edb8eb10>], [<matplotlib.axes._subplots.AxesSubplot object at 0x7f65eec1c3d0>], [<matplotlib.axes._subplots.AxesSubplot object at 0x7f65ee573890>]], dtype=object))
mosm = mogptk.MOSM(data, Q=3)
mosm.init_parameters('BNSE')
mosm.print_parameters()
mosm.predict();
import matplotlib.patches as patches
f, ax = mosm.plot_prediction(grid=(2, 2), figsize=(15, 5), title='Initialized MOSM on air quality data')
pos = [[0.21, 0.3], [0.8, 1.0], [0.0, 0.2]]
for i in range(3):
x0 = ax[i].get_xlim()[0]
x1 = ax[i].get_xlim()[1]
ax[i].add_patch(patches.Rectangle(
(x0+pos[i][0]*(x1-x0), ax[i].get_ylim()[0]), # (x,y)
(pos[i][1]-pos[i][0])*(x1-x0), # width
ax[i].get_ylim()[1] - ax[i].get_ylim()[0], # height
fill=True,
color='xkcd:strawberry',
alpha=0.25,
lw=0,
))
f.savefig('air_quality_pre.pdf', bbox_inches='tight')
Kernel | Name | Train | Shape | Dtype | Value |
---|---|---|---|---|---|
mosm Q=0 | magnitude | True | (4,) | float64 | [1.347 1.423 1.581 1.825] |
mean | True | (1, 4) | float64 | [[0.425 0.199 0.783 0.037]] | |
variance | True | (1, 4) | float64 | [[0.006 0.006 0.007 0.004]] | |
delay | True | (1, 4) | float64 | [[0.000 0.000 0.000 0.000]] | |
phase | True | (4,) | float64 | [0.000 0.000 0.000 0.000] | |
mosm Q=1 | magnitude | True | (4,) | float64 | [1.283 1.178 1.072 0.633] |
mean | True | (1, 4) | float64 | [[0.425 0.199 0.783 0.037]] | |
variance | True | (1, 4) | float64 | [[0.006 0.006 0.007 0.004]] | |
delay | True | (1, 4) | float64 | [[0.000 0.000 0.000 0.000]] | |
phase | True | (4,) | float64 | [0.000 0.000 0.000 0.000] | |
mosm Q=2 | magnitude | True | (4,) | float64 | [0.734 0.766 0.592 0.518] |
mean | True | (1, 4) | float64 | [[0.425 0.199 0.783 0.037]] | |
variance | True | (1, 4) | float64 | [[0.006 0.006 0.007 0.004]] | |
delay | True | (1, 4) | float64 | [[0.000 0.000 0.000 0.000]] | |
phase | True | (4,) | float64 | [0.000 0.000 0.000 0.000] | |
noise Q=3 | noise | True | (4,) | float64 | [0.033 0.033 0.033 0.033] |
gaussian likelihood | variance | True | () | float64 | 1.000 |
#for q in range(3):
# param = mosm.get_parameter(q, 'delay')
# param[0,1] = 12
# mosm.set_parameter(q, 'delay', param)
#
# param = mosm.get_parameter(q, 'phase')
# param[1] = 12
# mosm.set_parameter(q, 'phase', param)
#mosm.print_parameters()
#mosm.predict();
#mosm.plot_prediction(grid=(2, 2), figsize=(15, 5))
mosm.train('L-BFGS-B', verbose=True)
mosm.print_parameters()
mosm.predict();
Starting optimization >Model: MOSM >Channels: 4 >Components: 3 >Training points: 399 >Parameters: 65 >Initial NLL: 558.024 Optimization finished in 1.26 minutes >Final NLL: 181.386
Kernel | Name | Train | Shape | Dtype | Value |
---|---|---|---|---|---|
mosm Q=0 | magnitude | True | (4,) | float64 | [1.273 1.326 1.293 1.562] |
mean | True | (1, 4) | float64 | [[0.173 0.156 0.315 0.310]] | |
variance | True | (1, 4) | float64 | [[0.005 0.005 0.007 0.019]] | |
delay | True | (1, 4) | float64 | [[-2.358 1.226 3.103 -1.971]] | |
phase | True | (4,) | float64 | [ 0.562 -0.183 -1.001 0.622] | |
mosm Q=1 | magnitude | True | (4,) | float64 | [ 0.728 0.717 0.579 -1.073] |
mean | True | (1, 4) | float64 | [[0.615 0.508 0.566 0.099]] | |
variance | True | (1, 4) | float64 | [[2.167e-01 2.673e-01 2.661e-01 1.707e-05]] | |
delay | True | (1, 4) | float64 | [[ 0.031 -0.247 -0.414 0.631]] | |
phase | True | (4,) | float64 | [ 0.004 0.093 0.007 -0.104] | |
mosm Q=2 | magnitude | True | (4,) | float64 | [ 0.269 -0.029 0.145 0.048] |
mean | True | (1, 4) | float64 | [[2.472 0.080 2.013 0.042]] | |
variance | True | (1, 4) | float64 | [[0.134 0.007 0.693 0.004]] | |
delay | True | (1, 4) | float64 | [[ 1.225 0.074 -1.229 -0.069]] | |
phase | True | (4,) | float64 | [-3.332 0.374 3.289 -0.331] | |
noise Q=3 | noise | True | (4,) | float64 | [0.002 0.069 0.001 0.021] |
gaussian likelihood | variance | True | () | float64 | 0.001 |
import matplotlib.patches as patches
f, ax = mosm.plot_prediction(grid=(2, 2), figsize=(15, 5), title='Trained MOSM on air quality data')
pos = [[0.21, 0.3], [0.8, 1.0], [0.0, 0.2]]
for i in range(3):
x0 = ax[i].get_xlim()[0]
x1 = ax[i].get_xlim()[1]
ax[i].add_patch(patches.Rectangle(
(x0+pos[i][0]*(x1-x0), ax[i].get_ylim()[0]), # (x,y)
(pos[i][1]-pos[i][0])*(x1-x0), # width
ax[i].get_ylim()[1] - ax[i].get_ylim()[0], # height
fill=True,
color='xkcd:strawberry',
alpha=0.25,
lw=0,
))
f.savefig('air_quality.pdf', bbox_inches='tight')
mosm.plot_gram_matrix()
(<Figure size 720x720 with 2 Axes>, <matplotlib.axes._subplots.AxesSubplot at 0x7faeec883110>)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 2, figsize=(16, 4))
data[0].plot(ax=ax[0])
data[1].plot(ax=ax[1])
ax[0].xaxis.set_major_locator(plt.MaxNLocator(6))
ax[1].xaxis.set_major_locator(plt.MaxNLocator(6))
import matplotlib.pyplot as plt
import matplotlib
x = np.linspace(0, 10, 500)
one = np.sin(x) + np.random.normal(0.0, 0.1, len(x))
two = np.sin(x) + np.random.normal(0.0, 0.1, len(x))
three = -0.5 + x*0.1 + np.random.normal(0.0, 0.1, len(x))
four = -np.sin(x) + np.random.normal(0.0, 0.1, len(x))
data = [one, three, four]
fig, axes = plt.subplots(1,1, sharex=True, sharey=True, figsize=(4,3))
axes = np.array(axes).reshape(-1)
colors = list(matplotlib.colors.TABLEAU_COLORS)
for i, channel in enumerate(data):
#axes[i].plot(x_pred[i][:,0], mu[i], label='Post.Mean', c=colors[i%len(colors)], zorder=4, lw=1.8)
axes[0].plot(x, channel, '-k', label='Test', lw=1, alpha=1, zorder=2, c=colors[i%len(colors)])
#axes[i].plot(x_train[i][:,0], y_train[i], '.k', label='Train', ms=11, mew=0.8, markeredgecolor='white', zorder=3)
axes[0].xaxis.set_major_locator(plt.MaxNLocator(5))
axes[0].set_xlim(0, 10)
axes[0].set_ylim(-2, 2)
plt.suptitle('', y=1.02, fontsize=20)
plt.tight_layout()
plt.savefig('channel_correlation.pdf', bbox_inches='tight')