Source code for distil.active_learning_strategies.baseline_sampling

import numpy as np
from torch.utils.data import DataLoader
from .strategy import Strategy

[docs]def gram_red(L, L_inv, u_loc): n = np.shape(L_inv)[0] ms = np.array([False for i in range(n)]) ms[u_loc] = True L_red = L[~ms][:, ~ms] D = L_inv[~ms][:, ~ms] e = L_inv[~ms][:, ms] f = L_inv[ms][:, ms] L_red_inv = D - e.dot(e.T) / f return L_red, L_red_inv
[docs]def gram_aug(L_Y, L_Y_inv, b_u, c_u): d_u = c_u - b_u.T.dot(L_Y_inv.dot(b_u)) g_u = L_Y_inv.dot(b_u) L_aug = np.block([[L_Y, b_u],[b_u.T, c_u]]) L_aug_inv = np.block([[L_Y_inv + g_u.dot(g_u.T/d_u), -g_u/d_u], [-g_u.T/d_u, 1.0/d_u]]) return L_aug, L_aug_inv
[docs]def sample_k_imp(Phi, k, max_iter, rng=np.random): n = np.shape(Phi)[0] Ind = rng.choice(range(n), size=k, replace=False) if n == k: return Ind X = [False] * n for i in Ind: X[i] = True X = np.array(X) L_X = Phi[Ind, :].dot(Phi[Ind, :].T) L_X_inv = np.linalg.pinv(L_X) for i in range(1, max_iter): u = rng.choice(np.arange(n)[X]) v = rng.choice(np.arange(n)[~X]) for j in range(len(Ind)): if Ind[j] == u: u_loc = j L_Y, L_Y_inv = gram_red(L_X, L_X_inv, u_loc) Ind_red = [i for i in Ind if i != u] b_u = Phi[Ind_red, :].dot(Phi[[u], :].T) c_u = Phi[[u], :].dot(Phi[[u], :].T) b_v = Phi[Ind_red, :].dot(Phi[[v], :].T) c_v = Phi[[v], :].dot(Phi[[v], :].T) p = min(1, (c_v - b_v.T.dot(L_Y_inv.dot(b_v))) / (c_u - b_u.T.dot(L_Y_inv.dot(b_u))) ) if rng.uniform() <= p: X[u] = False X[v] = True Ind = Ind_red + [v] L_X, L_X_inv = gram_aug(L_Y, L_Y_inv, b_v, c_v) return Ind
[docs]class BaselineSampling(Strategy): """ Implementation of Baseline Sampling Strategy. This class extends :class:`active_learning_strategies.strategy.Strategy` to include entropy sampling technique to select data points for active learning. Parameters ---------- X: numpy array Present training/labeled data y: numpy array Labels of present training data unlabeled_x: numpy array Data without labels net: class Pytorch Model class handler: class Data Handler, which can load data even without labels. nclasses: int Number of unique target variables args: dict Specify optional parameters batch_size Batch size to be used inside strategy class (int, optional) """ def __init__(self, X, Y, unlabeled_x, net, handler, nclasses, args={}): """ Constructor method """ super(BaselineSampling, self).__init__(X, Y, unlabeled_x, net, handler, nclasses, args={})
[docs] def select(self, budget): """ Select next set of points Parameters ---------- budget: int Number of indexes to be returned for next set Returns ---------- chosen: list List of selected data point indexes with respect to unlabeled_x """ gradEmbedding = self.get_grad_embedding(self.unlabeled_x, bias_grad=False).numpy() chosen = sample_k_imp(gradEmbedding, budget, max_iter= int(5 * budget * np.log(budget))) return chosen