Module hummingbird.operator_converters._tree_commons
Collections of classes and functions shared among all tree converters.
Expand source code
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
"""
Collections of classes and functions shared among all tree converters.
"""
import copy
import numpy as np
from ._tree_implementations import TreeImpl
from . import constants
class Node:
"""
Class defining a tree node.
"""
def __init__(self, id=None):
"""
Args:
id: A unique ID for the node
left: The id of the left node
right: The id of the right node
feature: The feature used to make a decision (if not leaf node, ignored otherwise)
threshold: The threshold used in the decision (if not leaf node, ignored otherwise)
value: The value stored in the leaf (ignored if not leaf node).
"""
self.id = id
self.left = None
self.right = None
self.feature = None
self.threshold = None
self.value = None
class TreeParameters:
"""
Class containing a convenient in-memory representation of a decision tree.
"""
def __init__(self, lefts, rights, features, thresholds, values):
"""
Args:
lefts: The id of the left nodes
rights: The id of the right nodes
feature: The features used to make decisions
thresholds: The thresholds used in the decisions
values: The value stored in the leaves
"""
self.lefts = lefts
self.rights = rights
self.features = features
self.thresholds = thresholds
self.values = values
def _find_max_depth(tree_parameters):
"""
Function traversing all trees in sequence and returning the maximum depth.
"""
depth = 0
for tree in tree_parameters:
tree = copy.deepcopy(tree)
lefts = tree.lefts
rights = tree.rights
ids = [i for i in range(len(lefts))]
nodes = list(zip(ids, lefts, rights))
nodes_map = {0: Node(0)}
current_node = 0
for i, node in enumerate(nodes):
id, left, right = node
if left != -1:
l_node = Node(left)
nodes_map[left] = l_node
else:
lefts[i] = id
l_node = -1
if right != -1:
r_node = Node(right)
nodes_map[right] = r_node
else:
rights[i] = id
r_node = -1
nodes_map[current_node].left = l_node
nodes_map[current_node].right = r_node
current_node += 1
depth = max(depth, _find_depth(nodes_map[0], -1))
return depth
def _find_depth(node, current_depth):
"""
Recursive function traversing a tree and returning the maximum depth.
"""
if node.left == -1 and node.right == -1:
return current_depth + 1
elif node.left != -1 and node.right == -1:
return _find_depth(node.l, current_depth + 1)
elif node.right != -1 and node.left == -1:
return _find_depth(node.r, current_depth + 1)
elif node.right != -1 and node.left != -1:
return max(_find_depth(node.left, current_depth + 1), _find_depth(node.right, current_depth + 1))
def get_tree_implementation_by_config_or_depth(extra_config, max_depth, low=3, high=10):
"""
Utility function used to pick the tree implementation based on input parameters and heurstics.
The current heuristic is such that GEMM <= low < PerfTreeTrav <= high < TreeTrav
Args:
max_depth: The maximum tree-depth found in the tree model.
low: The maximum depth below which GEMM strategy is used
high: The maximum depth for which PerfTreeTrav strategy is used
Returns: A tree implementation
"""
if constants.TREE_IMPLEMENTATION not in extra_config:
if max_depth is not None and max_depth <= low:
return TreeImpl.gemm
elif max_depth is not None and max_depth <= high:
return TreeImpl.perf_tree_trav
else:
return TreeImpl.tree_trav
if extra_config[constants.TREE_IMPLEMENTATION] == TreeImpl.gemm.name:
return TreeImpl.gemm
elif extra_config[constants.TREE_IMPLEMENTATION] == TreeImpl.tree_trav.name:
return TreeImpl.tree_trav
elif extra_config[constants.TREE_IMPLEMENTATION] == TreeImpl.perf_tree_trav.name:
return TreeImpl.perf_tree_trav
else:
raise ValueError("Tree implementation {} not found".format(extra_config))
def get_tree_params_and_type(tree_infos, get_tree_parameters, extra_config):
"""
Populate the parameters from the trees and pick the tree implementation strategy.
Args:
tree_infos: The information representaing a tree (ensemble)
get_tree_parameters: A function specifying how to parse the tree_infos into a
`operator_converters._tree_commons_TreeParameters` object
extra_config: param extra_config: Extra configuration used also to select the best conversion strategy
Returns:
The tree parameters, the maximum tree-depth and the tre implementation to use
"""
tree_parameters = [get_tree_parameters(tree_info) for tree_info in tree_infos]
max_depth = max(1, _find_max_depth(tree_parameters))
tree_type = get_tree_implementation_by_config_or_depth(extra_config, max_depth)
return tree_parameters, max_depth, tree_type
def get_parameters_for_sklearn_common(tree_infos):
"""
Parse sklearn-based trees, including
SklearnRandomForestClassifier/Regressor and SklearnGradientBoostingClassifier
Args:
tree_infos: The information representaing a tree (ensemble)
Returns: The tree parameters wrapped into an instance of
`operator_converters._tree_commons_TreeParameters`
"""
trees = tree_infos
lefts = trees.tree_.children_left
rights = trees.tree_.children_right
features = trees.tree_.feature
thresholds = trees.tree_.threshold
values = trees.tree_.value
return TreeParameters(lefts, rights, features, thresholds, values)
def get_parameters_for_tree_trav_common(lefts, rights, features, thresholds, values):
"""
Common functions used by all tree algorithms to generate the parameters according to the tree_trav strategies.
Args:
left: The left nodes
right: The right nodes
features: The features used in the decision nodes
thresholds: The thresholds used in the decision nodes
values: The values stored in the leaf nodes
Returns:
An array containing the extracted parameters
"""
if len(lefts) == 1:
# Model creating tree with just a single leaf node. We transform it
# to a model with one internal node.
lefts = [1, -1, -1]
rights = [2, -1, -1]
features = [0, 0, 0]
thresholds = [0, 0, 0]
values = [np.array([0.0]), values[0], values[0]]
ids = [i for i in range(len(lefts))]
nodes = list(zip(ids, lefts, rights, features, thresholds, values))
# Refactor the tree parameters in the proper format.
nodes_map = {0: Node(0)}
current_node = 0
for i, node in enumerate(nodes):
id, left, right, feature, threshold, value = node
if left != -1:
l_node = Node(left)
nodes_map[left] = l_node
else:
lefts[i] = id
l_node = -1
feature = -1
if right != -1:
r_node = Node(right)
nodes_map[right] = r_node
else:
rights[i] = id
r_node = -1
feature = -1
nodes_map[current_node].left = l_node
nodes_map[current_node].right = r_node
nodes_map[current_node].feature = feature
nodes_map[current_node].threshold = threshold
nodes_map[current_node].value = value
current_node += 1
lefts = np.array(lefts)
rights = np.array(rights)
features = np.array(features)
thresholds = np.array(thresholds)
values = np.array(values)
return [nodes_map, ids, lefts, rights, features, thresholds, values]
def get_parameters_for_tree_trav_sklearn(lefts, rights, features, thresholds, values):
"""
This function is used to generate tree parameters for sklearn trees accordingy to the tree_trav strategy.
Includes SklearnRandomForestClassifier/Regressor and SklearnGradientBoostingClassifier
Args:
left: The left nodes
right: The right nodes
features: The features used in the decision nodes
thresholds: The thresholds used in the decision nodes
values: The values stored in the leaf nodes
Returns:
An array containing the extracted parameters
"""
features = [max(x, 0) for x in features]
values = np.array(values)
if len(values.shape) == 3:
values = values.reshape(values.shape[0], -1)
if values.shape[1] > 1:
values /= np.sum(values, axis=1, keepdims=True)
return get_parameters_for_tree_trav_common(lefts, rights, features, thresholds, values)
def get_parameters_for_gemm_common(lefts, rights, features, thresholds, values, n_features):
"""
Common functions used by all tree algorithms to generate the parameters according to the GEMM strategy.
Args:
left: The left nodes
right: The right nodes
features: The features used in the decision nodes
thresholds: The thresholds used in the decision nodes
values: The values stored in the leaf nodes
n_features: The number of expected input features
Returns:
The weights and bias for the GEMM implementation
"""
if len(lefts) == 1:
# Model creating trees with just a single leaf node. We transform it
# to a model with one internal node.
lefts = [1, -1, -1]
rights = [2, -1, -1]
features = [0, 0, 0]
thresholds = [0, 0, 0]
values = [np.array([0.0]), values[0], values[0]]
values = np.array(values)
weights = []
biases = []
# First hidden layer has all inequalities.
hidden_weights = []
hidden_biases = []
for left, feature, thresh in zip(lefts, features, thresholds):
if left != -1:
hidden_weights.append([1 if i == feature else 0 for i in range(n_features)])
hidden_biases.append(thresh)
weights.append(np.array(hidden_weights).astype("float32"))
biases.append(np.array(hidden_biases).astype("float32"))
n_splits = len(hidden_weights)
# Second hidden layer has ANDs for each leaf of the decision tree.
# Depth first enumeration of the tree in order to determine the AND by the path.
hidden_weights = []
hidden_biases = []
path = [0]
n_nodes = len(lefts)
visited = [False for _ in range(n_nodes)]
class_proba = []
nodes = list(zip(lefts, rights, features, thresholds, values))
while True and len(path) > 0:
i = path[-1]
visited[i] = True
left, right, feature, threshold, value = nodes[i]
if left == -1 and right == -1:
vec = [0 for _ in range(n_splits)]
# Keep track of positive weights for calculating bias.
num_positive = 0
for j, p in enumerate(path[:-1]):
num_leaves_before_p = list(lefts[:p]).count(-1)
if path[j + 1] in lefts:
vec[p - num_leaves_before_p] = 1
num_positive += 1
elif path[j + 1] in rights:
vec[p - num_leaves_before_p] = -1
else:
raise RuntimeError("Inconsistent state encountered while tree translation.")
if values.shape[-1] > 1:
class_proba.append((values[i] / np.sum(values[i])).flatten())
else:
# We have only a single value. e.g., GBDT
class_proba.append(values[i].flatten())
hidden_weights.append(vec)
hidden_biases.append(num_positive)
path.pop()
elif not visited[left]:
path.append(left)
elif not visited[right]:
path.append(right)
else:
path.pop()
weights.append(np.array(hidden_weights).astype("float32"))
biases.append(np.array(hidden_biases).astype("float32"))
# OR neurons from the preceding layer in order to get final classes.
weights.append(np.transpose(np.array(class_proba).astype("float32")))
biases.append(None)
return weights, biases
Functions
def get_parameters_for_gemm_common(lefts, rights, features, thresholds, values, n_features)
-
Common functions used by all tree algorithms to generate the parameters according to the GEMM strategy.
Args
left
- The left nodes
right
- The right nodes
features
- The features used in the decision nodes
thresholds
- The thresholds used in the decision nodes
values
- The values stored in the leaf nodes
n_features
- The number of expected input features
Returns
The weights and bias for the GEMM implementation
Expand source code
def get_parameters_for_gemm_common(lefts, rights, features, thresholds, values, n_features): """ Common functions used by all tree algorithms to generate the parameters according to the GEMM strategy. Args: left: The left nodes right: The right nodes features: The features used in the decision nodes thresholds: The thresholds used in the decision nodes values: The values stored in the leaf nodes n_features: The number of expected input features Returns: The weights and bias for the GEMM implementation """ if len(lefts) == 1: # Model creating trees with just a single leaf node. We transform it # to a model with one internal node. lefts = [1, -1, -1] rights = [2, -1, -1] features = [0, 0, 0] thresholds = [0, 0, 0] values = [np.array([0.0]), values[0], values[0]] values = np.array(values) weights = [] biases = [] # First hidden layer has all inequalities. hidden_weights = [] hidden_biases = [] for left, feature, thresh in zip(lefts, features, thresholds): if left != -1: hidden_weights.append([1 if i == feature else 0 for i in range(n_features)]) hidden_biases.append(thresh) weights.append(np.array(hidden_weights).astype("float32")) biases.append(np.array(hidden_biases).astype("float32")) n_splits = len(hidden_weights) # Second hidden layer has ANDs for each leaf of the decision tree. # Depth first enumeration of the tree in order to determine the AND by the path. hidden_weights = [] hidden_biases = [] path = [0] n_nodes = len(lefts) visited = [False for _ in range(n_nodes)] class_proba = [] nodes = list(zip(lefts, rights, features, thresholds, values)) while True and len(path) > 0: i = path[-1] visited[i] = True left, right, feature, threshold, value = nodes[i] if left == -1 and right == -1: vec = [0 for _ in range(n_splits)] # Keep track of positive weights for calculating bias. num_positive = 0 for j, p in enumerate(path[:-1]): num_leaves_before_p = list(lefts[:p]).count(-1) if path[j + 1] in lefts: vec[p - num_leaves_before_p] = 1 num_positive += 1 elif path[j + 1] in rights: vec[p - num_leaves_before_p] = -1 else: raise RuntimeError("Inconsistent state encountered while tree translation.") if values.shape[-1] > 1: class_proba.append((values[i] / np.sum(values[i])).flatten()) else: # We have only a single value. e.g., GBDT class_proba.append(values[i].flatten()) hidden_weights.append(vec) hidden_biases.append(num_positive) path.pop() elif not visited[left]: path.append(left) elif not visited[right]: path.append(right) else: path.pop() weights.append(np.array(hidden_weights).astype("float32")) biases.append(np.array(hidden_biases).astype("float32")) # OR neurons from the preceding layer in order to get final classes. weights.append(np.transpose(np.array(class_proba).astype("float32"))) biases.append(None) return weights, biases
def get_parameters_for_sklearn_common(tree_infos)
-
Parse sklearn-based trees, including SklearnRandomForestClassifier/Regressor and SklearnGradientBoostingClassifier
Args
tree_infos
- The information representaing a tree (ensemble)
Returns
:The tree parameters wrapped into an instance of
operator_converters._tree_commons_TreeParameters
Expand source code
def get_parameters_for_sklearn_common(tree_infos): """ Parse sklearn-based trees, including SklearnRandomForestClassifier/Regressor and SklearnGradientBoostingClassifier Args: tree_infos: The information representaing a tree (ensemble) Returns: The tree parameters wrapped into an instance of `operator_converters._tree_commons_TreeParameters` """ trees = tree_infos lefts = trees.tree_.children_left rights = trees.tree_.children_right features = trees.tree_.feature thresholds = trees.tree_.threshold values = trees.tree_.value return TreeParameters(lefts, rights, features, thresholds, values)
def get_parameters_for_tree_trav_common(lefts, rights, features, thresholds, values)
-
Common functions used by all tree algorithms to generate the parameters according to the tree_trav strategies.
Args
left
- The left nodes
right
- The right nodes
features
- The features used in the decision nodes
thresholds
- The thresholds used in the decision nodes
values
- The values stored in the leaf nodes
Returns
An array containing the extracted parameters
Expand source code
def get_parameters_for_tree_trav_common(lefts, rights, features, thresholds, values): """ Common functions used by all tree algorithms to generate the parameters according to the tree_trav strategies. Args: left: The left nodes right: The right nodes features: The features used in the decision nodes thresholds: The thresholds used in the decision nodes values: The values stored in the leaf nodes Returns: An array containing the extracted parameters """ if len(lefts) == 1: # Model creating tree with just a single leaf node. We transform it # to a model with one internal node. lefts = [1, -1, -1] rights = [2, -1, -1] features = [0, 0, 0] thresholds = [0, 0, 0] values = [np.array([0.0]), values[0], values[0]] ids = [i for i in range(len(lefts))] nodes = list(zip(ids, lefts, rights, features, thresholds, values)) # Refactor the tree parameters in the proper format. nodes_map = {0: Node(0)} current_node = 0 for i, node in enumerate(nodes): id, left, right, feature, threshold, value = node if left != -1: l_node = Node(left) nodes_map[left] = l_node else: lefts[i] = id l_node = -1 feature = -1 if right != -1: r_node = Node(right) nodes_map[right] = r_node else: rights[i] = id r_node = -1 feature = -1 nodes_map[current_node].left = l_node nodes_map[current_node].right = r_node nodes_map[current_node].feature = feature nodes_map[current_node].threshold = threshold nodes_map[current_node].value = value current_node += 1 lefts = np.array(lefts) rights = np.array(rights) features = np.array(features) thresholds = np.array(thresholds) values = np.array(values) return [nodes_map, ids, lefts, rights, features, thresholds, values]
def get_parameters_for_tree_trav_sklearn(lefts, rights, features, thresholds, values)
-
This function is used to generate tree parameters for sklearn trees accordingy to the tree_trav strategy. Includes SklearnRandomForestClassifier/Regressor and SklearnGradientBoostingClassifier
Args
left
- The left nodes
right
- The right nodes
features
- The features used in the decision nodes
thresholds
- The thresholds used in the decision nodes
values
- The values stored in the leaf nodes
Returns
An array containing the extracted parameters
Expand source code
def get_parameters_for_tree_trav_sklearn(lefts, rights, features, thresholds, values): """ This function is used to generate tree parameters for sklearn trees accordingy to the tree_trav strategy. Includes SklearnRandomForestClassifier/Regressor and SklearnGradientBoostingClassifier Args: left: The left nodes right: The right nodes features: The features used in the decision nodes thresholds: The thresholds used in the decision nodes values: The values stored in the leaf nodes Returns: An array containing the extracted parameters """ features = [max(x, 0) for x in features] values = np.array(values) if len(values.shape) == 3: values = values.reshape(values.shape[0], -1) if values.shape[1] > 1: values /= np.sum(values, axis=1, keepdims=True) return get_parameters_for_tree_trav_common(lefts, rights, features, thresholds, values)
def get_tree_implementation_by_config_or_depth(extra_config, max_depth, low=3, high=10)
-
Utility function used to pick the tree implementation based on input parameters and heurstics. The current heuristic is such that GEMM <= low < PerfTreeTrav <= high < TreeTrav
Args
max_depth
- The maximum tree-depth found in the tree model.
low
- The maximum depth below which GEMM strategy is used
high
- The maximum depth for which PerfTreeTrav strategy is used
Returns
:A tree implementation
Expand source code
def get_tree_implementation_by_config_or_depth(extra_config, max_depth, low=3, high=10): """ Utility function used to pick the tree implementation based on input parameters and heurstics. The current heuristic is such that GEMM <= low < PerfTreeTrav <= high < TreeTrav Args: max_depth: The maximum tree-depth found in the tree model. low: The maximum depth below which GEMM strategy is used high: The maximum depth for which PerfTreeTrav strategy is used Returns: A tree implementation """ if constants.TREE_IMPLEMENTATION not in extra_config: if max_depth is not None and max_depth <= low: return TreeImpl.gemm elif max_depth is not None and max_depth <= high: return TreeImpl.perf_tree_trav else: return TreeImpl.tree_trav if extra_config[constants.TREE_IMPLEMENTATION] == TreeImpl.gemm.name: return TreeImpl.gemm elif extra_config[constants.TREE_IMPLEMENTATION] == TreeImpl.tree_trav.name: return TreeImpl.tree_trav elif extra_config[constants.TREE_IMPLEMENTATION] == TreeImpl.perf_tree_trav.name: return TreeImpl.perf_tree_trav else: raise ValueError("Tree implementation {} not found".format(extra_config))
def get_tree_params_and_type(tree_infos, get_tree_parameters, extra_config)
-
Populate the parameters from the trees and pick the tree implementation strategy.
Args
tree_infos
- The information representaing a tree (ensemble)
get_tree_parameters
- A function specifying how to parse the tree_infos into a
operator_converters._tree_commons_TreeParameters
object extra_config
- param extra_config: Extra configuration used also to select the best conversion strategy
Returns
The tree parameters, the maximum tree-depth and the tre implementation to use
Expand source code
def get_tree_params_and_type(tree_infos, get_tree_parameters, extra_config): """ Populate the parameters from the trees and pick the tree implementation strategy. Args: tree_infos: The information representaing a tree (ensemble) get_tree_parameters: A function specifying how to parse the tree_infos into a `operator_converters._tree_commons_TreeParameters` object extra_config: param extra_config: Extra configuration used also to select the best conversion strategy Returns: The tree parameters, the maximum tree-depth and the tre implementation to use """ tree_parameters = [get_tree_parameters(tree_info) for tree_info in tree_infos] max_depth = max(1, _find_max_depth(tree_parameters)) tree_type = get_tree_implementation_by_config_or_depth(extra_config, max_depth) return tree_parameters, max_depth, tree_type
Classes
class Node (id=None)
-
Class defining a tree node.
Args
id
- A unique ID for the node
left
- The id of the left node
right
- The id of the right node
feature
- The feature used to make a decision (if not leaf node, ignored otherwise)
threshold
- The threshold used in the decision (if not leaf node, ignored otherwise)
value
- The value stored in the leaf (ignored if not leaf node).
Expand source code
class Node: """ Class defining a tree node. """ def __init__(self, id=None): """ Args: id: A unique ID for the node left: The id of the left node right: The id of the right node feature: The feature used to make a decision (if not leaf node, ignored otherwise) threshold: The threshold used in the decision (if not leaf node, ignored otherwise) value: The value stored in the leaf (ignored if not leaf node). """ self.id = id self.left = None self.right = None self.feature = None self.threshold = None self.value = None
class TreeParameters (lefts, rights, features, thresholds, values)
-
Class containing a convenient in-memory representation of a decision tree.
Args
lefts
- The id of the left nodes
rights
- The id of the right nodes
feature
- The features used to make decisions
thresholds
- The thresholds used in the decisions
values
- The value stored in the leaves
Expand source code
class TreeParameters: """ Class containing a convenient in-memory representation of a decision tree. """ def __init__(self, lefts, rights, features, thresholds, values): """ Args: lefts: The id of the left nodes rights: The id of the right nodes feature: The features used to make decisions thresholds: The thresholds used in the decisions values: The value stored in the leaves """ self.lefts = lefts self.rights = rights self.features = features self.thresholds = thresholds self.values = values