Module imodels.tree.shrunk_tree
Expand source code
from copy import deepcopy
from typing import List
import numpy as np
from sklearn import datasets
from sklearn.base import BaseEstimator
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score, r2_score
from imodels.util import checks
class ShrunkTree(BaseEstimator):
"""Experimental ShrunkTree. Gets passed a sklearn tree or tree ensemble model.
"""
def __init__(self, estimator_: BaseEstimator, reg_param: float = 1, shrinkage_scheme_: str = 'node_based'):
"""
Params
------
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str
Experimental: Used to experiment with different forms of shrinkage. options are:
(i) node_based shrinks based on number of samples in parent node
(ii) leaf_based only shrinks leaf nodes based on number of leaf samples
(iii) constant shrinks every node by a constant lambda
"""
super().__init__()
self.reg_param = reg_param
# print('est', estimator_)
self.estimator_ = estimator_
self.shrinkage_scheme_ = shrinkage_scheme_
self._init_prediction_task()
if checks.check_is_fitted(self.estimator_):
self.shrink()
def __init__prediction_task(self):
self.prediction_task = 'regression'
def fit(self, *args, **kwargs):
self.estimator_.fit(*args, **kwargs)
self.shrink()
def shrink_tree(self, tree, reg_param, i=0, parent_val=None, parent_num=None, cum_sum=0):
"""Shrink the tree
"""
if reg_param is None:
reg_param = 1.0
left = tree.children_left[i]
right = tree.children_right[i]
is_leaf = left == right
n_samples = tree.n_node_samples[i]
if self.prediction_task == 'regression':
val = tree.value[i][0, 0]
else:
if len(tree.value[i][0]) == 1:
val = tree.value[i][0, 0]
else:
val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification
# if root
if parent_val is None and parent_num is None:
if not is_leaf:
self.shrink_tree(tree, reg_param, left,
parent_val=val, parent_num=n_samples, cum_sum=val)
self.shrink_tree(tree, reg_param, right,
parent_val=val, parent_num=n_samples, cum_sum=val)
# if has parent
else:
if self.shrinkage_scheme_ == 'node_based':
val_new = (val - parent_val) / (1 + reg_param / parent_num)
elif self.shrinkage_scheme_ == 'constant':
val_new = (val - parent_val) / (1 + reg_param)
else:
val_new = val
cum_sum += val_new
if is_leaf:
if self.prediction_task == 'regression':
if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
tree.value[i, 0, 0] = cum_sum
else:
# tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples)
tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
1 + reg_param / n_samples)
else:
if len(tree.value[i][0]) == 1:
if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
tree.value[i, 0, 0,] = cum_sum
else:
tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
1 + reg_param / n_samples)
else:
if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
tree.value[i, 0, 1] = cum_sum
tree.value[i, 0, 0] = 1.0 - cum_sum
else:
root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1])
tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / (
1 + reg_param / n_samples)
tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1]
else:
if self.prediction_task == 'regression':
tree.value[i][0, 0] = parent_val + val_new
else:
if len(tree.value[i][0]) == 1:
tree.value[i][0, 0] = parent_val + val_new
else:
tree.value[i][0, 1] = parent_val + val_new
tree.value[i][0, 0] = 1.0 - parent_val + val_new
self.shrink_tree(tree, reg_param, left,
parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
self.shrink_tree(tree, reg_param, right,
parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
# edit the non-leaf nodes for later visualization (doesn't effect predictions)
# pass # not sure exactly what to put here
return tree
def shrink(self):
if hasattr(self.estimator_, 'tree_'):
self.shrink_tree(self.estimator_.tree_, self.reg_param)
elif hasattr(self.estimator_, 'estimators_'):
for t in self.estimator_.estimators_:
if isinstance(t, np.ndarray):
assert t.size == 1, 'multiple trees stored under tree_?'
t = t[0]
self.shrink_tree(t.tree_, self.reg_param)
def predict(self, *args, **kwargs):
return self.estimator_.predict(*args, **kwargs)
def predict_proba(self, *args, **kwargs):
if hasattr(self.estimator_, 'predict_proba'):
return self.estimator_.predict_proba(*args, **kwargs)
else:
return NotImplemented
def score(self, *args, **kwargs):
if hasattr(self.estimator_, 'score'):
return self.estimator_.score(*args, **kwargs)
else:
return NotImplemented
class ShrunkTreeRegressor(ShrunkTree):
def _init_prediction_task(self):
self.prediction_task = 'regression'
class ShrunkTreeClassifier(ShrunkTree):
def _init_prediction_task(self):
self.prediction_task = 'classification'
class ShrunkTreeClassifierCV(ShrunkTreeClassifier):
def __init__(self, estimator_: BaseEstimator,
reg_param_list: List[float] = [0.1, 1, 10, 50, 100, 500], shrinkage_scheme_: str = 'node_based',
cv: int = 3, scoring=None, *args, **kwargs):
"""Note: args, kwargs are not used but left so that imodels-experiments can still pass redundant args
"""
super().__init__(estimator_, reg_param=None)
self.reg_param_list = np.array(reg_param_list)
self.cv = cv
self.scoring = scoring
self.shrinkage_scheme_ = shrinkage_scheme_
# print('estimator', self.estimator_,
# 'checks.check_is_fitted(estimator)', checks.check_is_fitted(self.estimator_))
# if checks.check_is_fitted(self.estimator_):
# raise Warning('Passed an already fitted estimator,'
# 'but shrinking not applied until fit method is called.')
def fit(self, X, y, *args, **kwargs):
self.scores_ = []
for reg_param in self.reg_param_list:
est = ShrunkTreeClassifier(deepcopy(self.estimator_), reg_param)
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)
class ShrunkTreeRegressorCV(ShrunkTreeRegressor):
def __init__(self, estimator_: BaseEstimator,
reg_param_list: List[float] = [0.1, 1, 10, 50, 100, 500], shrinkage_scheme_: str = 'node_based',
cv: int = 3, scoring=None, *args, **kwargs):
"""Note: args, kwargs are not used but left so that imodels-experiments can still pass redundant args
"""
super().__init__(estimator_, reg_param=None)
self.reg_param_list = np.array(reg_param_list)
self.cv = cv
self.scoring = scoring
self.shrinkage_scheme_ = shrinkage_scheme_
# print('estimator', self.estimator_,
# 'checks.check_is_fitted(estimator)', checks.check_is_fitted(self.estimator_))
# if checks.check_is_fitted(self.estimator_):
# raise Warning('Passed an already fitted estimator,'
# 'but shrinking not applied until fit method is called.')
def fit(self, X, y):
self.scores_ = []
for reg_param in self.reg_param_list:
est = ShrunkTreeRegressor(deepcopy(self.estimator_), reg_param)
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)
if __name__ == '__main__':
np.random.seed(15)
# X, y = datasets.fetch_california_housing(return_X_y=True) # regression
# X, y = datasets.load_breast_cancer(return_X_y=True) # binary classification
X, y = datasets.load_diabetes(return_X_y=True) # regression
# X = np.random.randn(500, 10)
# y = (X[:, 0] > 0).astype(float) + (X[:, 1] > 1).astype(float)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=10
)
print('X.shape', X.shape)
print('ys', np.unique(y_train))
# m = ShrunkTree(estimator_=DecisionTreeClassifier(), reg_param=0.1)
# m = DecisionTreeClassifier(max_leaf_nodes = 20,random_state=1, max_features=None)
m = DecisionTreeRegressor(random_state=42, max_leaf_nodes=20)
# print('best alpha', m.reg_param)
m.fit(X_train, y_train)
# m.predict_proba(X_train) # just run this
print('score', r2_score(y_test, m.predict(X_test)))
print('running again....')
# x = DecisionTreeRegressor(random_state = 42, ccp_alpha = 0.3)
# x.fit(X_train,y_train)
# m = ShrunkTree(estimator_=DecisionTreeRegressor(random_state=42, max_features=None), reg_param=10)
# m = ShrunkTree(estimator_=DecisionTreeClassifier(random_state=42, max_features=None), reg_param=0)
m = ShrunkTreeClassifierCV(estimator_=DecisionTreeRegressor(max_leaf_nodes=10, random_state=1),
shrinkage_scheme_='node_based',
reg_param_list=[0.1, 1, 2, 5, 10, 25, 50, 100, 500])
# m = ShrunkTreeCV(estimator_=DecisionTreeClassifier())
# m = ShrunkTreeClassifier(estimator_ = GradientBoostingClassifier(random_state = 10),reg_param = 5)
m.fit(X_train, y_train)
print('best alpha', m.reg_param)
# m.predict_proba(X_train) # just run this
# print('score', m.score(X_test, y_test))
print('score', r2_score(y_test, m.predict(X_test)))
Classes
class ShrunkTree (estimator_: sklearn.base.BaseEstimator, reg_param: float = 1, shrinkage_scheme_: str = 'node_based')
-
Experimental ShrunkTree. Gets passed a sklearn tree or tree ensemble model.
Params
reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str Experimental: Used to experiment with different forms of shrinkage. options are: (i) node_based shrinks based on number of samples in parent node (ii) leaf_based only shrinks leaf nodes based on number of leaf samples (iii) constant shrinks every node by a constant lambda
Expand source code
class ShrunkTree(BaseEstimator): """Experimental ShrunkTree. Gets passed a sklearn tree or tree ensemble model. """ def __init__(self, estimator_: BaseEstimator, reg_param: float = 1, shrinkage_scheme_: str = 'node_based'): """ Params ------ reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0) shrinkage_scheme: str Experimental: Used to experiment with different forms of shrinkage. options are: (i) node_based shrinks based on number of samples in parent node (ii) leaf_based only shrinks leaf nodes based on number of leaf samples (iii) constant shrinks every node by a constant lambda """ super().__init__() self.reg_param = reg_param # print('est', estimator_) self.estimator_ = estimator_ self.shrinkage_scheme_ = shrinkage_scheme_ self._init_prediction_task() if checks.check_is_fitted(self.estimator_): self.shrink() def __init__prediction_task(self): self.prediction_task = 'regression' def fit(self, *args, **kwargs): self.estimator_.fit(*args, **kwargs) self.shrink() def shrink_tree(self, tree, reg_param, i=0, parent_val=None, parent_num=None, cum_sum=0): """Shrink the tree """ if reg_param is None: reg_param = 1.0 left = tree.children_left[i] right = tree.children_right[i] is_leaf = left == right n_samples = tree.n_node_samples[i] if self.prediction_task == 'regression': val = tree.value[i][0, 0] else: if len(tree.value[i][0]) == 1: val = tree.value[i][0, 0] else: val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification # if root if parent_val is None and parent_num is None: if not is_leaf: self.shrink_tree(tree, reg_param, left, parent_val=val, parent_num=n_samples, cum_sum=val) self.shrink_tree(tree, reg_param, right, parent_val=val, parent_num=n_samples, cum_sum=val) # if has parent else: if self.shrinkage_scheme_ == 'node_based': val_new = (val - parent_val) / (1 + reg_param / parent_num) elif self.shrinkage_scheme_ == 'constant': val_new = (val - parent_val) / (1 + reg_param) else: val_new = val cum_sum += val_new if is_leaf: if self.prediction_task == 'regression': if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': tree.value[i, 0, 0] = cum_sum else: # tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples) tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / ( 1 + reg_param / n_samples) else: if len(tree.value[i][0]) == 1: if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': tree.value[i, 0, 0,] = cum_sum else: tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / ( 1 + reg_param / n_samples) else: if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': tree.value[i, 0, 1] = cum_sum tree.value[i, 0, 0] = 1.0 - cum_sum else: root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1]) tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / ( 1 + reg_param / n_samples) tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1] else: if self.prediction_task == 'regression': tree.value[i][0, 0] = parent_val + val_new else: if len(tree.value[i][0]) == 1: tree.value[i][0, 0] = parent_val + val_new else: tree.value[i][0, 1] = parent_val + val_new tree.value[i][0, 0] = 1.0 - parent_val + val_new self.shrink_tree(tree, reg_param, left, parent_val=val, parent_num=n_samples, cum_sum=cum_sum) self.shrink_tree(tree, reg_param, right, parent_val=val, parent_num=n_samples, cum_sum=cum_sum) # edit the non-leaf nodes for later visualization (doesn't effect predictions) # pass # not sure exactly what to put here return tree def shrink(self): if hasattr(self.estimator_, 'tree_'): self.shrink_tree(self.estimator_.tree_, self.reg_param) elif hasattr(self.estimator_, 'estimators_'): for t in self.estimator_.estimators_: if isinstance(t, np.ndarray): assert t.size == 1, 'multiple trees stored under tree_?' t = t[0] self.shrink_tree(t.tree_, self.reg_param) def predict(self, *args, **kwargs): return self.estimator_.predict(*args, **kwargs) def predict_proba(self, *args, **kwargs): if hasattr(self.estimator_, 'predict_proba'): return self.estimator_.predict_proba(*args, **kwargs) else: return NotImplemented def score(self, *args, **kwargs): if hasattr(self.estimator_, 'score'): return self.estimator_.score(*args, **kwargs) else: return NotImplemented
Ancestors
- sklearn.base.BaseEstimator
Subclasses
Methods
def fit(self, *args, **kwargs)
-
Expand source code
def fit(self, *args, **kwargs): self.estimator_.fit(*args, **kwargs) self.shrink()
def predict(self, *args, **kwargs)
-
Expand source code
def predict(self, *args, **kwargs): return self.estimator_.predict(*args, **kwargs)
def predict_proba(self, *args, **kwargs)
-
Expand source code
def predict_proba(self, *args, **kwargs): if hasattr(self.estimator_, 'predict_proba'): return self.estimator_.predict_proba(*args, **kwargs) else: return NotImplemented
def score(self, *args, **kwargs)
-
Expand source code
def score(self, *args, **kwargs): if hasattr(self.estimator_, 'score'): return self.estimator_.score(*args, **kwargs) else: return NotImplemented
def shrink(self)
-
Expand source code
def shrink(self): if hasattr(self.estimator_, 'tree_'): self.shrink_tree(self.estimator_.tree_, self.reg_param) elif hasattr(self.estimator_, 'estimators_'): for t in self.estimator_.estimators_: if isinstance(t, np.ndarray): assert t.size == 1, 'multiple trees stored under tree_?' t = t[0] self.shrink_tree(t.tree_, self.reg_param)
def shrink_tree(self, tree, reg_param, i=0, parent_val=None, parent_num=None, cum_sum=0)
-
Shrink the tree
Expand source code
def shrink_tree(self, tree, reg_param, i=0, parent_val=None, parent_num=None, cum_sum=0): """Shrink the tree """ if reg_param is None: reg_param = 1.0 left = tree.children_left[i] right = tree.children_right[i] is_leaf = left == right n_samples = tree.n_node_samples[i] if self.prediction_task == 'regression': val = tree.value[i][0, 0] else: if len(tree.value[i][0]) == 1: val = tree.value[i][0, 0] else: val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification # if root if parent_val is None and parent_num is None: if not is_leaf: self.shrink_tree(tree, reg_param, left, parent_val=val, parent_num=n_samples, cum_sum=val) self.shrink_tree(tree, reg_param, right, parent_val=val, parent_num=n_samples, cum_sum=val) # if has parent else: if self.shrinkage_scheme_ == 'node_based': val_new = (val - parent_val) / (1 + reg_param / parent_num) elif self.shrinkage_scheme_ == 'constant': val_new = (val - parent_val) / (1 + reg_param) else: val_new = val cum_sum += val_new if is_leaf: if self.prediction_task == 'regression': if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': tree.value[i, 0, 0] = cum_sum else: # tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples) tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / ( 1 + reg_param / n_samples) else: if len(tree.value[i][0]) == 1: if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': tree.value[i, 0, 0,] = cum_sum else: tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / ( 1 + reg_param / n_samples) else: if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': tree.value[i, 0, 1] = cum_sum tree.value[i, 0, 0] = 1.0 - cum_sum else: root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1]) tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / ( 1 + reg_param / n_samples) tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1] else: if self.prediction_task == 'regression': tree.value[i][0, 0] = parent_val + val_new else: if len(tree.value[i][0]) == 1: tree.value[i][0, 0] = parent_val + val_new else: tree.value[i][0, 1] = parent_val + val_new tree.value[i][0, 0] = 1.0 - parent_val + val_new self.shrink_tree(tree, reg_param, left, parent_val=val, parent_num=n_samples, cum_sum=cum_sum) self.shrink_tree(tree, reg_param, right, parent_val=val, parent_num=n_samples, cum_sum=cum_sum) # edit the non-leaf nodes for later visualization (doesn't effect predictions) # pass # not sure exactly what to put here return tree
class ShrunkTreeClassifier (estimator_: sklearn.base.BaseEstimator, reg_param: float = 1, shrinkage_scheme_: str = 'node_based')
-
Experimental ShrunkTree. Gets passed a sklearn tree or tree ensemble model.
Params
reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str Experimental: Used to experiment with different forms of shrinkage. options are: (i) node_based shrinks based on number of samples in parent node (ii) leaf_based only shrinks leaf nodes based on number of leaf samples (iii) constant shrinks every node by a constant lambda
Expand source code
class ShrunkTreeClassifier(ShrunkTree): def _init_prediction_task(self): self.prediction_task = 'classification'
Ancestors
- ShrunkTree
- sklearn.base.BaseEstimator
Subclasses
Inherited members
class ShrunkTreeClassifierCV (estimator_: sklearn.base.BaseEstimator, reg_param_list: List[float] = [0.1, 1, 10, 50, 100, 500], shrinkage_scheme_: str = 'node_based', cv: int = 3, scoring=None, *args, **kwargs)
-
Experimental ShrunkTree. Gets passed a sklearn tree or tree ensemble model.
Note: args, kwargs are not used but left so that imodels-experiments can still pass redundant args
Expand source code
class ShrunkTreeClassifierCV(ShrunkTreeClassifier): def __init__(self, estimator_: BaseEstimator, reg_param_list: List[float] = [0.1, 1, 10, 50, 100, 500], shrinkage_scheme_: str = 'node_based', cv: int = 3, scoring=None, *args, **kwargs): """Note: args, kwargs are not used but left so that imodels-experiments can still pass redundant args """ super().__init__(estimator_, reg_param=None) self.reg_param_list = np.array(reg_param_list) self.cv = cv self.scoring = scoring self.shrinkage_scheme_ = shrinkage_scheme_ # print('estimator', self.estimator_, # 'checks.check_is_fitted(estimator)', checks.check_is_fitted(self.estimator_)) # if checks.check_is_fitted(self.estimator_): # raise Warning('Passed an already fitted estimator,' # 'but shrinking not applied until fit method is called.') def fit(self, X, y, *args, **kwargs): self.scores_ = [] for reg_param in self.reg_param_list: est = ShrunkTreeClassifier(deepcopy(self.estimator_), reg_param) cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring) self.scores_.append(np.mean(cv_scores)) self.reg_param = self.reg_param_list[np.argmax(self.scores_)] super().fit(X=X, y=y)
Ancestors
- ShrunkTreeClassifier
- ShrunkTree
- sklearn.base.BaseEstimator
Methods
def fit(self, X, y, *args, **kwargs)
-
Expand source code
def fit(self, X, y, *args, **kwargs): self.scores_ = [] for reg_param in self.reg_param_list: est = ShrunkTreeClassifier(deepcopy(self.estimator_), reg_param) cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring) self.scores_.append(np.mean(cv_scores)) self.reg_param = self.reg_param_list[np.argmax(self.scores_)] super().fit(X=X, y=y)
Inherited members
class ShrunkTreeRegressor (estimator_: sklearn.base.BaseEstimator, reg_param: float = 1, shrinkage_scheme_: str = 'node_based')
-
Experimental ShrunkTree. Gets passed a sklearn tree or tree ensemble model.
Params
reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str Experimental: Used to experiment with different forms of shrinkage. options are: (i) node_based shrinks based on number of samples in parent node (ii) leaf_based only shrinks leaf nodes based on number of leaf samples (iii) constant shrinks every node by a constant lambda
Expand source code
class ShrunkTreeRegressor(ShrunkTree): def _init_prediction_task(self): self.prediction_task = 'regression'
Ancestors
- ShrunkTree
- sklearn.base.BaseEstimator
Subclasses
Inherited members
class ShrunkTreeRegressorCV (estimator_: sklearn.base.BaseEstimator, reg_param_list: List[float] = [0.1, 1, 10, 50, 100, 500], shrinkage_scheme_: str = 'node_based', cv: int = 3, scoring=None, *args, **kwargs)
-
Experimental ShrunkTree. Gets passed a sklearn tree or tree ensemble model.
Note: args, kwargs are not used but left so that imodels-experiments can still pass redundant args
Expand source code
class ShrunkTreeRegressorCV(ShrunkTreeRegressor): def __init__(self, estimator_: BaseEstimator, reg_param_list: List[float] = [0.1, 1, 10, 50, 100, 500], shrinkage_scheme_: str = 'node_based', cv: int = 3, scoring=None, *args, **kwargs): """Note: args, kwargs are not used but left so that imodels-experiments can still pass redundant args """ super().__init__(estimator_, reg_param=None) self.reg_param_list = np.array(reg_param_list) self.cv = cv self.scoring = scoring self.shrinkage_scheme_ = shrinkage_scheme_ # print('estimator', self.estimator_, # 'checks.check_is_fitted(estimator)', checks.check_is_fitted(self.estimator_)) # if checks.check_is_fitted(self.estimator_): # raise Warning('Passed an already fitted estimator,' # 'but shrinking not applied until fit method is called.') def fit(self, X, y): self.scores_ = [] for reg_param in self.reg_param_list: est = ShrunkTreeRegressor(deepcopy(self.estimator_), reg_param) cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring) self.scores_.append(np.mean(cv_scores)) self.reg_param = self.reg_param_list[np.argmax(self.scores_)] super().fit(X=X, y=y)
Ancestors
- ShrunkTreeRegressor
- ShrunkTree
- sklearn.base.BaseEstimator
Methods
def fit(self, X, y)
-
Expand source code
def fit(self, X, y): self.scores_ = [] for reg_param in self.reg_param_list: est = ShrunkTreeRegressor(deepcopy(self.estimator_), reg_param) cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring) self.scores_.append(np.mean(cv_scores)) self.reg_param = self.reg_param_list[np.argmax(self.scores_)] super().fit(X=X, y=y)
Inherited members