Source code for hypertools.tools.reduce

#!/usr/bin/env python

# libraries
import warnings
import numpy as np

## reduction models
from sklearn.decomposition import PCA, FastICA, IncrementalPCA, KernelPCA, FactorAnalysis, TruncatedSVD, SparsePCA, MiniBatchSparsePCA, DictionaryLearning, MiniBatchDictionaryLearning
from sklearn.manifold import TSNE, MDS, SpectralEmbedding, LocallyLinearEmbedding, Isomap

# internal libraries
from ..tools.df2mat import df2mat
from .._shared.helpers import *
from .normalize import normalize as normalizer
from .align import align as aligner

# main function
@memoize
[docs]def reduce(x, reduce='IncrementalPCA', ndims=3, normalize=None, align=None, model=None, model_params=None, internal=False): """ Reduces dimensionality of an array, or list of arrays Parameters ---------- x : Numpy array or list of arrays Dimensionality reduction using PCA is performed on this array. reduce : str or dict Decomposition/manifold learning model to use. Models supported: PCA, IncrementalPCA, SparsePCA, MiniBatchSparsePCA, KernelPCA, FastICA, FactorAnalysis, TruncatedSVD, DictionaryLearning, MiniBatchDictionaryLearning, TSNE, Isomap, SpectralEmbedding, LocallyLinearEmbedding, and MDS. Can be passed as a string, but for finer control of the model parameters, pass as a dictionary, e.g. reduce={'model' : 'PCA', 'params' : {'whiten' : True}}. See scikit-learn specific model docs for details on parameters supported for each model. ndims : int Number of dimensions to reduce model : None Deprecated argument. Please use reduce. model_params : None Deprecated argument. Please use reduce. align : None Deprecated argument. Please use new analyze function to perform combinations of transformations normalize : None Deprecated argument. Please use new analyze function to perform combinations of transformations Returns ---------- x_reduced : Numpy array or list of arrays The reduced data with ndims dimensionality is returned. If the input is a list, a list is returned. """ # sub functions def reduce_list(x, model): split = np.cumsum([len(xi) for xi in x])[:-1] x_r = np.vsplit(model.fit_transform(np.vstack(x)), split) if len(x)>1: return [xi for xi in x_r] else: return [x_r[0]] # dictionary of models models = { 'PCA' : PCA, 'IncrementalPCA' : IncrementalPCA, 'SparsePCA' : SparsePCA, 'MiniBatchSparsePCA' : MiniBatchSparsePCA, 'KernelPCA' : KernelPCA, 'FastICA' : FastICA, 'FactorAnalysis' : FactorAnalysis, 'TruncatedSVD' : TruncatedSVD, 'DictionaryLearning' : DictionaryLearning, 'MiniBatchDictionaryLearning' : MiniBatchDictionaryLearning, 'TSNE' : TSNE, 'Isomap' : Isomap, 'SpectralEmbedding' : SpectralEmbedding, 'LocallyLinearEmbedding' : LocallyLinearEmbedding, 'MDS' : MDS } # deprecated warning if (model is not None) or (model_params is not None): warnings.warn('Model and model params will be deprecated. Please use the \ reduce keyword. See API docs for more info: http://hypertools.readthedocs.io/en/latest/hypertools.tools.reduce.html#hypertools.tools.reduce') reduce = {} reduce['model'] = model reduce['params'] = model_params # if model is None, just return data if reduce is None: return x else: # common format x = format_data(x, ppca=True) # deprecation warnings if normalize is not None: warnings.warn('The normalize argument will be deprecated for this function. Please use the \ analyze function to perform combinations of these transformations. See API docs for more info: http://hypertools.readthedocs.io/en/latest/hypertools.analyze.html#hypertools.analyze') x = normalizer(x, normalize=normalize) if align is not None: warnings.warn('The align argument will be deprecated for this function. Please use the \ analyze function to perform combinations of these transformations. See API docs for more info: http://hypertools.readthedocs.io/en/latest/hypertools.analyze.html#hypertools.analyze') x = aligner(x, align=align) # if the shape of the data is already less than ndims, just return it if all([i.shape[1]<=ndims for i in x]): return x # if reduce is a string, find the corresponding model if type(reduce) in [str, np.string_]: model = models[reduce] model_params = { 'n_components' : ndims } # if its a dict, use custom params elif type(reduce) is dict: if type(reduce['model']) is str: model = models[reduce['model']] if reduce['params'] is None: model_params = { 'n_components' : ndims } else: model_params = reduce['params'] # initialize model model = model(**model_params) # reduce data x_reduced = reduce_list(x, model) # return data if internal or len(x_reduced)>1: return x_reduced else: return x_reduced[0]