from sklearn.datasets import make_circles, load_iris
from sklearn.model_selection import train_test_split
import torch
import numpy as np
import yellowbrick as yb
import matplotlib
import matplotlib.pylab as plt
# dtype = torch.long
# device = torch.device("cpu")
X, y = make_circles(n_samples=1000, noise=0.1)
# 75/25 train/test split
orig_X_train, orig_X_test, orig_y_train, orig_y_test = train_test_split(X, y, test_size=0.25)
# Transform data into tensors.
X = torch.tensor(orig_X_train, dtype=torch.float)
y = torch.tensor(orig_y_train, dtype=torch.long)
import yellowbrick.contrib.scatter
visualizer = yellowbrick.contrib.scatter.ScatterVisualizer()
visualizer.fit(orig_X_train, orig_y_train)
visualizer.poof()
from torch import nn
# Sequential model allows easy model experimentation
model = nn.Sequential(
nn.Linear(2, 16), # input dim 2. 16 neurons in first layer.
nn.ReLU(), # ReLU activation
#nn.Dropout(p=0.2), # Optional dropout
nn.Linear(16, 4), # Linear from 16 neurons down to 2
nn.ReLU(),
nn.Linear(4,2),
nn.Softmax(dim=1) # Softmax activation to normalize output weights
)
# Loss function. CrossEntropy is valid for classification problems.
loss_fn = nn.CrossEntropyLoss()
# Optimizer. Many to choose from.
optimizer = torch.optim.Adam(params=model.parameters())
# Optimizer iterations
for i in range(1000):
# Clear the gradient at the start of each step.
optimizer.zero_grad()
# Compute the forward pass
output = model(X)
# Compute the loss
loss = loss_fn(output, y)
# Backprop to compute the gradients
loss.backward()
# Update the model parameters
optimizer.step()
print(loss.item())
(an exercise in Tensor math)
%matplotlib inline
# Make a grid
ns = 25
xx, yy = np.meshgrid(np.linspace(-1.5, 1.5, 2*ns), np.linspace(-1.5, 1.5, 2*ns))
# Shape of each is [ns, ns]
# Combine into a single tensor
G = torch.tensor(np.array([xx, yy]), dtype=torch.float)
# Shape is [2, ns, ns]
# reshape to be convenient to work with
G = G.reshape((2, G.shape[1]*G.shape[2])).transpose(0,1)
# Now a tensor of shape [ns*ns, 2]. Sequence of x,y coordinate pairs
result = model(G).detach()
# For each row (sample) in G, get the prediction under the model
# The variables inside the model are tracked for gradients.
# Call "detach()" to stop tracking gradient for further computations.
# Result is shape [ns*ns, 2] since model takes 2-dim vectors and generates a 2-dim prediction
c0 = result[:,0]
# weights assigned to class 0
c1 = result[:,1]
# weights assigned to class 1
plt.hexbin(G[:,0].detach().numpy(), G[:,1].detach().numpy(), c0.numpy(), gridsize=ns, cmap='viridis')
# Gridsize is half that of the meshgrid for clean rendering.
plt.title("Class 0 Activation")
plt.axis('equal')
plt.show()
plt.hexbin(G[:,0].detach().numpy(), G[:,1].detach().numpy(), c1.numpy(), gridsize=ns, cmap='viridis')
plt.title("Class 1 Activation")
plt.axis('equal')
plt.show()
Case study in working with Yellowbrick
from sklearn.base import BaseEstimator
class NetWrapper(BaseEstimator):
"""
Wrap our model as a BaseEstimator
"""
_estimator_type = "classifier"
# Tell yellowbrick this is a classifier
def __init__(self, model):
# save a reference to the model
self.model = model
self.classes_ = None
def fit(self, X, y):
# save the list of classes
self.classes_ = list(set(i for i in y))
def predict_proba(self, X):
"""
Define predict_proba or decision_function
Compute predictions from model.
Transform input into a Tensor, compute the prediction,
transform the prediction back into a numpy array
"""
v = model(torch.tensor(X, dtype=torch.float)).detach().numpy()
print("v:", v.shape)
return v
wrapped_net = NetWrapper(model)
# Wrap the model
# Use ROCAUC as per usual
ROCAUC = yb.classifier.ROCAUC(wrapped_net)
ROCAUC.fit(orig_X_train, orig_y_train)
print(orig_X_test.shape, orig_y_test.shape)
print(orig_X_train.shape, orig_y_train.shape)
ROCAUC.score(orig_X_test, orig_y_test)
ROCAUC.poof()
Implementing new functionality, e.g. radial activation regions for "circular" neurons
# weight: a * (x-c)^T(x-c), a is a real number
class Circle(torch.nn.Module):
"""
Extend torch.nn.Module for a new "layer" in a neural network
"""
def __init__(self, k, data):
"""
k is the number of neurons to use
data is passed in to use as samples to initialize centers
"""
super().__init__()
# k is not a Parameter, so there is no gradient and this is not updated in optimization
self.k = int(k)
# Parameters always have gradients computed
self.alpha = torch.nn.Parameter(torch.normal(mean=torch.zeros(k), std=torch.ones(k)*0.5).unsqueeze(1))
self.C = torch.nn.Parameter(data[np.random.choice(data.shape[0], k, replace=False), :].unsqueeze(1))
def forward(self, x):
diff = (x - self.C)
# compact way of writing inner products, outer products, etc.
tmp = torch.einsum('kij,kij->ki', [diff, diff])
return (self.alpha * torch.einsum('kij,kij->ki', [diff, diff])).transpose(0,1)
from tqdm import tqdm
loss_fn = torch.nn.CrossEntropyLoss()
model = nn.Sequential(
Circle(16, X),
nn.ReLU(),
nn.Linear(16,4),
nn.ReLU(),
nn.Linear(4,2),
nn.Softmax(dim=1)
)
optimizer = torch.optim.Adam(params=model.parameters())
for i in tqdm(range(1000)):
optimizer.zero_grad()
output = model(X)
loss = loss_fn(output, y)
loss.backward()
optimizer.step()
%matplotlib inline
ns = 25
xx, yy = np.meshgrid(np.linspace(-1.5, 1.5, 2*ns), np.linspace(-1.5, 1.5, 2*ns))
G = torch.tensor(np.array([xx, yy]), dtype=torch.float)
# reshape...
G = G.reshape((2, G.shape[1]*G.shape[2])).transpose(0,1)
result = model(G).detach()
c0 = result[:,0]
c1 = result[:,1]
plt.hexbin(G[:,0].detach().numpy(), G[:,1].detach().numpy(), c0.numpy(), gridsize=ns, cmap='viridis')
plt.title("Class 0 Activation")
plt.axis('equal')
plt.show()
plt.hexbin(G[:,0].detach().numpy(), G[:,1].detach().numpy(), c1.numpy(), gridsize=ns, cmap='viridis')
plt.title("Class 1 Activation")
plt.axis('equal')
plt.show()
wrapped_net = NetWrapper(model)
ROCAUC = yb.classifier.ROCAUC(wrapped_net)
ROCAUC.fit(orig_X_train, orig_y_train)
wrapped_net.predict_proba(orig_X_test)
ROCAUC.score(orig_X_test, orig_y_test)
ROCAUC.poof()
%matplotlib inline
# Show the centers of each "kernel"
centers = model[0].C.squeeze().detach().numpy()
scales = model[0].alpha.squeeze().detach().numpy()
plt.scatter(centers[:,0], centers[:,1])
plt.scatter(X[:,0], X[:,1], alpha=0.1)
plt.axis('equal')
print(centers.shape)
%matplotlib inline
from matplotlib import cm
# Show the contours of the activation regions of each kernel
ns = 25
xx, yy = np.meshgrid(np.linspace(-2, 2, ns), np.linspace(-2, 2, ns))
G = torch.tensor(np.array([xx, yy]), dtype=torch.float)
G = G.reshape((2, G.shape[1]*G.shape[2])).transpose(0,1)
G = G.expand(centers.shape[0], ns*ns, 2)
Z = torch.tensor(scales).unsqueeze(1) * torch.einsum('kij,kij->ki', [G-torch.tensor(centers).unsqueeze(1), G-torch.tensor(centers).unsqueeze(1)])
plt.scatter(centers[:,0], centers[:,1])
plt.scatter(X[:,0], X[:,1], alpha=0.1)
cmap = cm.get_cmap('tab20')
for i in range(Z.shape[0]):
if scales[i] > 0:
plt.contour(np.linspace(-2, 2, ns), np.linspace(-2, 2, ns), Z[i].reshape(ns, ns), [-0.5,0.5], antialiased=True, colors=[cmap(i)], alpha=0.8, linestyles='dotted')
else:
plt.contour(np.linspace(-2, 2, ns), np.linspace(-2, 2, ns), Z[i].reshape(ns, ns), [-0.5,0.5], antialiased=True, colors=[cmap(i)], alpha=0.3, linestyles='solid')
plt.axis('equal')