Module imodels.experimental.bartpy.initializers.sklearntreeinitializer
Expand source code
from typing import Tuple
from operator import gt, le
from sklearn.ensemble import GradientBoostingRegressor
from imodels.experimental.bartpy.initializers.initializer import Initializer
from imodels.experimental.bartpy.mutation import GrowMutation
from imodels.experimental.bartpy.node import split_node, LeafNode
from imodels.experimental.bartpy.splitcondition import SplitCondition
from imodels.experimental.bartpy.tree import Tree, mutate
class SklearnTreeInitializer(Initializer):
"""
Initialize tree structure and leaf node values by fitting a single Sklearn GBR tree
Both tree structure and leaf node parameters are copied across
"""
def __init__(self,
max_depth: int=4,
min_samples_split: int=2,
loss: str='ls'):
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.loss = loss
def initialize_tree(self,
tree: Tree) -> None:
params = {
'n_estimators': 1,
'max_depth': self.max_depth,
'min_samples_split': self.min_samples_split,
'learning_rate': 0.8,
'loss': self.loss
}
clf = GradientBoostingRegressor(**params)
fit = clf.fit(tree.nodes[0].data.X.data, tree.nodes[0].data.y.data)
sklearn_tree = fit.estimators_[0][0].tree_
map_sklearn_tree_into_bartpy(tree, sklearn_tree)
def map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index: int) -> Tuple[SplitCondition, SplitCondition]:
"""
Convert how a split is stored in sklearn's gradient boosted trees library to the bartpy representation
Parameters
----------
sklearn_tree: The full tree object
index: The index of the node in the tree object
Returns
-------
"""
return (
SplitCondition(sklearn_tree.feature[index], sklearn_tree.threshold[index], le),
SplitCondition(sklearn_tree.feature[index], sklearn_tree.threshold[index], gt)
)
def map_sklearn_tree_into_bartpy(bartpy_tree: Tree, sklearn_tree):
nodes = [None for x in sklearn_tree.children_left]
nodes[0] = bartpy_tree.nodes[0]
def search(index: int=0):
left_child_index, right_child_index = sklearn_tree.children_left[index], sklearn_tree.children_right[index]
if left_child_index == -1: # Trees are binary splits, so only need to check left tree
return
searched_node: LeafNode = nodes[index]
split_conditions = map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index)
decision_node = split_node(searched_node, split_conditions)
left_child: LeafNode = decision_node.left_child
right_child: LeafNode = decision_node.right_child
left_child.set_value(sklearn_tree.value[left_child_index][0][0])
right_child.set_value(sklearn_tree.value[right_child_index][0][0])
mutation = GrowMutation(searched_node, decision_node)
mutate(bartpy_tree, mutation)
nodes[index] = decision_node
nodes[left_child_index] = decision_node.left_child
nodes[right_child_index] = decision_node.right_child
search(left_child_index)
search(right_child_index)
search()
Functions
def map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index: int) ‑> Tuple[SplitCondition, SplitCondition]
-
Convert how a split is stored in sklearn's gradient boosted trees library to the bartpy representation
Parameters
sklearn_tree
:The full tree object
index
:The index
ofthe node in the tree object
Returns
Expand source code
def map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index: int) -> Tuple[SplitCondition, SplitCondition]: """ Convert how a split is stored in sklearn's gradient boosted trees library to the bartpy representation Parameters ---------- sklearn_tree: The full tree object index: The index of the node in the tree object Returns ------- """ return ( SplitCondition(sklearn_tree.feature[index], sklearn_tree.threshold[index], le), SplitCondition(sklearn_tree.feature[index], sklearn_tree.threshold[index], gt) )
def map_sklearn_tree_into_bartpy(bartpy_tree: Tree, sklearn_tree)
-
Expand source code
def map_sklearn_tree_into_bartpy(bartpy_tree: Tree, sklearn_tree): nodes = [None for x in sklearn_tree.children_left] nodes[0] = bartpy_tree.nodes[0] def search(index: int=0): left_child_index, right_child_index = sklearn_tree.children_left[index], sklearn_tree.children_right[index] if left_child_index == -1: # Trees are binary splits, so only need to check left tree return searched_node: LeafNode = nodes[index] split_conditions = map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index) decision_node = split_node(searched_node, split_conditions) left_child: LeafNode = decision_node.left_child right_child: LeafNode = decision_node.right_child left_child.set_value(sklearn_tree.value[left_child_index][0][0]) right_child.set_value(sklearn_tree.value[right_child_index][0][0]) mutation = GrowMutation(searched_node, decision_node) mutate(bartpy_tree, mutation) nodes[index] = decision_node nodes[left_child_index] = decision_node.left_child nodes[right_child_index] = decision_node.right_child search(left_child_index) search(right_child_index) search()
Classes
class SklearnTreeInitializer (max_depth: int = 4, min_samples_split: int = 2, loss: str = 'ls')
-
Initialize tree structure and leaf node values by fitting a single Sklearn GBR tree
Both tree structure and leaf node parameters are copied across
Expand source code
class SklearnTreeInitializer(Initializer): """ Initialize tree structure and leaf node values by fitting a single Sklearn GBR tree Both tree structure and leaf node parameters are copied across """ def __init__(self, max_depth: int=4, min_samples_split: int=2, loss: str='ls'): self.max_depth = max_depth self.min_samples_split = min_samples_split self.loss = loss def initialize_tree(self, tree: Tree) -> None: params = { 'n_estimators': 1, 'max_depth': self.max_depth, 'min_samples_split': self.min_samples_split, 'learning_rate': 0.8, 'loss': self.loss } clf = GradientBoostingRegressor(**params) fit = clf.fit(tree.nodes[0].data.X.data, tree.nodes[0].data.y.data) sklearn_tree = fit.estimators_[0][0].tree_ map_sklearn_tree_into_bartpy(tree, sklearn_tree)
Ancestors
Methods
def initialize_tree(self, tree: Tree) ‑> None
-
Expand source code
def initialize_tree(self, tree: Tree) -> None: params = { 'n_estimators': 1, 'max_depth': self.max_depth, 'min_samples_split': self.min_samples_split, 'learning_rate': 0.8, 'loss': self.loss } clf = GradientBoostingRegressor(**params) fit = clf.fit(tree.nodes[0].data.X.data, tree.nodes[0].data.y.data) sklearn_tree = fit.estimators_[0][0].tree_ map_sklearn_tree_into_bartpy(tree, sklearn_tree)