Module hummingbird.ml.operator_converters.skl_array_feature_extractor
Converters for scikit-learn feature selectors: SelectKBest, SelectPercentile, VarianceThreshold
Expand source code
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
"""
Converters for scikit-learn feature selectors: SelectKBest, SelectPercentile, VarianceThreshold
"""
import numpy as np
from onnxconverter_common.registration import register_converter
import torch
from ._array_feature_extractor_implementations import ArrayFeatureExtractor
def convert_sklearn_select_k_best(operator, device, extra_config):
"""
Converter for `sklearn.feature_selection.SelectKBest`.
Args:
operator: An operator wrapping a `sklearn.feature_selection.SelectKBest` model
device: String defining the type of device the converted operator should be run on
extra_config: Extra configuration used to select the best conversion strategy
Returns:
A PyTorch model
"""
# TODO FIXME: This will fail with chi2 (Ex: SelectKBest(chi2, k=20))
# but pass with SelectKBest(mutual_info_classif, k=20)
# See issue #200
k = operator.raw_operator.k
indices = np.sort(np.array(operator.raw_operator.scores_).argsort()[-k:])
return ArrayFeatureExtractor(np.ascontiguousarray(indices), device)
def convert_sklearn_variance_threshold(operator, device, extra_config):
"""
Converter for `sklearn.feature_selection.VarianceThreshold`.
Args:
operator: An operator wrapping a `sklearn.feature_selection.VarianceThreshold` model
device: String defining the type of device the converted operator should be run on
extra_config: Extra configuration used to select the best conversion strategy
Returns:
A PyTorch model
"""
var = operator.raw_operator.variances_
threshold = operator.raw_operator.threshold
indices = np.array([i for i in range(len(var)) if var[i] > threshold])
return ArrayFeatureExtractor(np.ascontiguousarray(indices), device)
register_converter("SklearnSelectKBest", convert_sklearn_select_k_best)
register_converter("SklearnVarianceThreshold", convert_sklearn_variance_threshold)
Functions
def convert_sklearn_select_k_best(operator, device, extra_config)
-
Converter for
sklearn.feature_selection.SelectKBest
.Args
operator
- An operator wrapping a
sklearn.feature_selection.SelectKBest
model device
- String defining the type of device the converted operator should be run on
extra_config
- Extra configuration used to select the best conversion strategy
Returns
A PyTorch model
Expand source code
def convert_sklearn_select_k_best(operator, device, extra_config): """ Converter for `sklearn.feature_selection.SelectKBest`. Args: operator: An operator wrapping a `sklearn.feature_selection.SelectKBest` model device: String defining the type of device the converted operator should be run on extra_config: Extra configuration used to select the best conversion strategy Returns: A PyTorch model """ # TODO FIXME: This will fail with chi2 (Ex: SelectKBest(chi2, k=20)) # but pass with SelectKBest(mutual_info_classif, k=20) # See issue #200 k = operator.raw_operator.k indices = np.sort(np.array(operator.raw_operator.scores_).argsort()[-k:]) return ArrayFeatureExtractor(np.ascontiguousarray(indices), device)
def convert_sklearn_variance_threshold(operator, device, extra_config)
-
Converter for
sklearn.feature_selection.VarianceThreshold
.Args
operator
- An operator wrapping a
sklearn.feature_selection.VarianceThreshold
model device
- String defining the type of device the converted operator should be run on
extra_config
- Extra configuration used to select the best conversion strategy
Returns
A PyTorch model
Expand source code
def convert_sklearn_variance_threshold(operator, device, extra_config): """ Converter for `sklearn.feature_selection.VarianceThreshold`. Args: operator: An operator wrapping a `sklearn.feature_selection.VarianceThreshold` model device: String defining the type of device the converted operator should be run on extra_config: Extra configuration used to select the best conversion strategy Returns: A PyTorch model """ var = operator.raw_operator.variances_ threshold = operator.raw_operator.threshold indices = np.array([i for i in range(len(var)) if var[i] > threshold]) return ArrayFeatureExtractor(np.ascontiguousarray(indices), device)