Module hummingbird.ml.operator_converters.sklearn.discretizer
Converter for scikit-learn discretizers: Binarizer and KBinsDiscretizer.
Expand source code Browse git
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
"""
Converter for scikit-learn discretizers: Binarizer and KBinsDiscretizer.
"""
import torch
import numpy as np
from onnxconverter_common.registration import register_converter
from .._base_operator import BaseOperator
from .._discretizer_implementations import Binarizer, KBinsDiscretizer
def convert_sklearn_binarizer(operator, device, extra_config):
"""
Converter for `sklearn.preprocessing.Binarizer`
Args:
operator: An operator wrapping a `sklearn.preprocessing.Binarizer` model
device: String defining the type of device the converted operator should be run on
extra_config: Extra configuration used to select the best conversion strategy
Returns:
A PyTorch model
"""
return Binarizer(operator.raw_operator.threshold, device)
def convert_sklearn_k_bins_discretizer(operator, device, extra_config):
"""
Converter for `sklearn.preprocessing.KBinsDiscretizer`
Args:
operator: An operator wrapping a `sklearn.preprocessing.KBinsDiscretizer` model
device: String defining the type of device the converted operator should be run on
extra_config: Extra configuration used to select the best conversion strategy
Returns:
A PyTorch model
"""
bin_edges = []
max_bin_edges = 0
labels = []
for x in operator.raw_operator.bin_edges_:
bin_edges.append(x.flatten().tolist())
max_bin_edges = max(max_bin_edges, len(bin_edges[-1]))
for i in range(len(bin_edges)):
labels.append(np.array([i for i in range(len(bin_edges[i]) - 1)]))
if len(bin_edges[i]) < max_bin_edges:
bin_edges[i] = (
[bin_edges[i][0]]
+ bin_edges[i][1:-1]
+ [np.inf for _ in range((max_bin_edges - len(bin_edges[i])))]
+ [bin_edges[i][-1]]
)
return KBinsDiscretizer(operator.raw_operator.encode, np.array(bin_edges), labels, device)
register_converter("SklearnBinarizer", convert_sklearn_binarizer)
register_converter("SklearnKBinsDiscretizer", convert_sklearn_k_bins_discretizer)
Functions
def convert_sklearn_binarizer(operator, device, extra_config)
-
Converter for
sklearn.preprocessing.Binarizer
Args
operator
- An operator wrapping a
sklearn.preprocessing.Binarizer
model device
- String defining the type of device the converted operator should be run on
extra_config
- Extra configuration used to select the best conversion strategy
Returns
A PyTorch model
Expand source code Browse git
def convert_sklearn_binarizer(operator, device, extra_config): """ Converter for `sklearn.preprocessing.Binarizer` Args: operator: An operator wrapping a `sklearn.preprocessing.Binarizer` model device: String defining the type of device the converted operator should be run on extra_config: Extra configuration used to select the best conversion strategy Returns: A PyTorch model """ return Binarizer(operator.raw_operator.threshold, device)
def convert_sklearn_k_bins_discretizer(operator, device, extra_config)
-
Converter for
sklearn.preprocessing.KBinsDiscretizer
Args
operator
- An operator wrapping a
sklearn.preprocessing.KBinsDiscretizer
model device
- String defining the type of device the converted operator should be run on
extra_config
- Extra configuration used to select the best conversion strategy
Returns
A PyTorch model
Expand source code Browse git
def convert_sklearn_k_bins_discretizer(operator, device, extra_config): """ Converter for `sklearn.preprocessing.KBinsDiscretizer` Args: operator: An operator wrapping a `sklearn.preprocessing.KBinsDiscretizer` model device: String defining the type of device the converted operator should be run on extra_config: Extra configuration used to select the best conversion strategy Returns: A PyTorch model """ bin_edges = [] max_bin_edges = 0 labels = [] for x in operator.raw_operator.bin_edges_: bin_edges.append(x.flatten().tolist()) max_bin_edges = max(max_bin_edges, len(bin_edges[-1])) for i in range(len(bin_edges)): labels.append(np.array([i for i in range(len(bin_edges[i]) - 1)])) if len(bin_edges[i]) < max_bin_edges: bin_edges[i] = ( [bin_edges[i][0]] + bin_edges[i][1:-1] + [np.inf for _ in range((max_bin_edges - len(bin_edges[i])))] + [bin_edges[i][-1]] ) return KBinsDiscretizer(operator.raw_operator.encode, np.array(bin_edges), labels, device)