Source code for hypertools.tools.cluster
#!/usr/bin/env python
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import numpy as np
from .._shared.helpers import *
[docs]def cluster(x, n_clusters=8, ndims=None):
"""
Performs k-means clustering and returns a list of cluster labels
Parameters
----------
x : A Numpy array, Pandas Dataframe or list of arrays/dfs
The data to be clustered. You can pass a single array/df or a list.
If a list is passed, the arrays will be stacked and the clustering
will be performed across all lists (i.e. not within each list).
n_clusters : int
The number of clusters to discover (i.e. k)
ndims : int or None
This parameter allows you to first reduce dimensionality before
running k-means
Returns
----------
cluster_labels : list
An list of cluster labels
"""
x = format_data(x)
if type(x) is list:
x = np.vstack(x)
if ndims:
x = PCA(n_components=ndims).fit_transform(x)
kmeans = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
kmeans.fit(x)
return list(kmeans.labels_)