Source code for hypertools.tools.load

import requests
import pickle
import pandas as pd
import deepdish as dd
import sys
from warnings import warn
from .analyze import analyze
from .._shared.helpers import format_data
from ..datageometry import DataGeometry

[docs]def load(dataset, reduce=None, ndims=None, align=None, normalize=None): """ Load a .geo file or example data Parameters ---------- dataset : string The name of the example dataset. Can be a `.geo` file, or one of a number of example datasets listed below. `weights` is an fmri dataset comprised of 36 subjects. For each subject, the rows are fMRI measurements and the columns are parameters of a model fit to the fMRI data. `weights_sample` is a sample of 3 subjects from that dataset. `weights_avg` is the dataset split in half and averaged into two groups. `spiral` is 3D spiral to highlight the `procrustes` function. `mushrooms` is an example dataset comprised of features (columns) of a collection of mushroomm samples (rows). normalize : str or False or None If set to 'across', the columns of the input data will be z-scored across lists (default). That is, the z-scores will be computed with with repect to column n across all arrays passed in the list. If set to 'within', the columns will be z-scored within each list that is passed. If set to 'row', each row of the input data will be z-scored. If set to False, the input data will be returned with no z-scoring. reduce : str or dict Decomposition/manifold learning model to use. Models supported: PCA, IncrementalPCA, SparsePCA, MiniBatchSparsePCA, KernelPCA, FastICA, FactorAnalysis, TruncatedSVD, DictionaryLearning, MiniBatchDictionaryLearning, TSNE, Isomap, SpectralEmbedding, LocallyLinearEmbedding, and MDS. Can be passed as a string, but for finer control of the model parameters, pass as a dictionary, e.g. reduce={'model' : 'PCA', 'params' : {'whiten' : True}}. See scikit-learn specific model docs for details on parameters supported for each model. ndims : int Number of dimensions to reduce align : str or dict If str, either 'hyper' or 'SRM'. If 'hyper', alignment algorithm will be hyperalignment. If 'SRM', alignment algorithm will be shared response model. You can also pass a dictionary for finer control, where the 'model' key is a string that specifies the model and the params key is a dictionary of parameter values (default : 'hyper'). Returns ---------- data : Numpy Array Example data """ if sys.version_info[0]==3: pickle_options = { 'encoding' : 'latin1' } else: pickle_options = {} if dataset[-4:] == '.geo': geo = dd.io.load(dataset) data = DataGeometry(fig=None, ax=None, data=geo['data'], xform_data=geo['xform_data'], line_ani=None, reduce=geo['reduce'], align=geo['align'], normalize=geo['normalize'], kwargs=geo['kwargs'], version=geo['version']) elif dataset is 'weights': fileid = '0B7Ycm4aSYdPPREJrZ2stdHBFdjg' url = 'https://docs.google.com/uc?export=download&id=' + fileid data = pickle.loads(requests.get(url, stream=True).content, **pickle_options) elif dataset is 'weights_avg': fileid = '0B7Ycm4aSYdPPRmtPRnBJc3pieDg' url = 'https://docs.google.com/uc?export=download&id=' + fileid data = pickle.loads(requests.get(url, stream=True).content, **pickle_options) elif dataset is 'weights_sample': fileid = '0B7Ycm4aSYdPPTl9IUUVlamJ2VjQ' url = 'https://docs.google.com/uc?export=download&id=' + fileid data = pickle.loads(requests.get(url, stream=True).content, **pickle_options) elif dataset is 'spiral': fileid = '0B7Ycm4aSYdPPQS0xN3FmQ1FZSzg' url = 'https://docs.google.com/uc?export=download&id=' + fileid data = pickle.loads(requests.get(url, stream=True).content, **pickle_options) elif dataset is 'mushrooms': fileid = '0B7Ycm4aSYdPPY3J0U2tRNFB4T3c' url = 'https://docs.google.com/uc?export=download&id=' + fileid data = pd.read_csv(url) return analyze(data, reduce=reduce, ndims=ndims, align=align, normalize=normalize)