Module imodels.experimental.bartpy.model

Expand source code
from copy import deepcopy, copy
from typing import List, Generator, Optional

import numpy as np
import pandas as pd

from imodels.experimental.bartpy.data import Data
from imodels.experimental.bartpy.initializers.initializer import Initializer
from imodels.experimental.bartpy.initializers.sklearntreeinitializer import SklearnTreeInitializer
from imodels.experimental.bartpy.sigma import Sigma
from imodels.experimental.bartpy.split import Split
from imodels.experimental.bartpy.tree import Tree, LeafNode, deep_copy_tree


class Model:

    def __init__(self,
                 data: Optional[Data],
                 sigma: Sigma,
                 trees: Optional[List[Tree]] = None,
                 n_trees: int = 50,
                 alpha: float = 0.95,
                 beta: float = 2.,
                 k: int = 2.,
                 initializer: Initializer = SklearnTreeInitializer(),
                 classification: bool = False):

        self.data = deepcopy(data)
        self.alpha = float(alpha)
        self.beta = float(beta)
        self.k = k
        self._sigma = sigma
        self._prediction = None
        self._initializer = initializer
        self.classification = classification

        if trees is None:
            self.n_trees = n_trees
            self._trees = self.initialize_trees()
            if self._initializer is not None:
                self._initializer.initialize_trees(self.refreshed_trees())
        else:
            self.n_trees = len(trees)
            self._trees = trees

    def initialize_trees(self) -> List[Tree]:
        trees = [Tree([LeafNode(Split(deepcopy(self.data)))]) for _ in range(self.n_trees)]
        for tree in trees:
            tree.update_y(tree.update_y(self.data.y.values / self.n_trees))
        return trees

    def residuals(self) -> np.ndarray:
        return self.data.y.values - self.predict()

    def unnormalized_residuals(self) -> np.ndarray:
        return self.data.y.unnormalized_y - self.data.y.unnormalize_y(self.predict())

    def predict(self, X: np.ndarray = None) -> np.ndarray:
        if X is not None:
            return self._out_of_sample_predict(X)
        return np.sum([tree.predict() for tree in self.trees], axis=0)

    def _out_of_sample_predict(self, X: np.ndarray) -> np.ndarray:
        if type(X) == pd.DataFrame:
            X: pd.DataFrame = X
            X = X.values
        return np.sum([tree.predict(X) for tree in self.trees], axis=0)

    @property
    def trees(self) -> List[Tree]:
        return self._trees

    def refreshed_trees(self) -> Generator[Tree, None, None]:
        if self._prediction is None:
            self._prediction = self.predict()
        for tree in self._trees:
            self._prediction -= tree.predict()
            tree.update_y(self.data.y.values - self._prediction)
            yield tree
            self._prediction += tree.predict()

    def update_z_values(self, y):
        if not self.classification:
            return
        z = np.random.normal(loc=self.predict(self.data.X.values))
        one_label = np.maximum(z[y == 1], 0)
        zero_label = np.minimum(z[y == 0], 0)
        z[y == 1] = one_label
        z[y == 0] = zero_label
        self.data.update_y(z)

    @property
    def sigma_m(self) -> float:
        if self.classification:
            return 3 / (self.k * np.power(self.n_trees, 0.5))
        return 0.5 / (self.k * np.power(self.n_trees, 0.5))

    @property
    def sigma(self) -> Sigma:
        return self._sigma


def deep_copy_model(model: Model) -> Model:
    copied_model = Model(None, deepcopy(model.sigma), [deep_copy_tree(tree) for tree in model.trees])
    return copied_model

Functions

def deep_copy_model(model: Model) ‑> Model
Expand source code
def deep_copy_model(model: Model) -> Model:
    copied_model = Model(None, deepcopy(model.sigma), [deep_copy_tree(tree) for tree in model.trees])
    return copied_model

Classes

class Model (data: Optional[Data], sigma: Sigma, trees: Optional[List[Tree]] = None, n_trees: int = 50, alpha: float = 0.95, beta: float = 2.0, k: int = 2.0, initializer: Initializer = <imodels.experimental.bartpy.initializers.sklearntreeinitializer.SklearnTreeInitializer object>, classification: bool = False)
Expand source code
class Model:

    def __init__(self,
                 data: Optional[Data],
                 sigma: Sigma,
                 trees: Optional[List[Tree]] = None,
                 n_trees: int = 50,
                 alpha: float = 0.95,
                 beta: float = 2.,
                 k: int = 2.,
                 initializer: Initializer = SklearnTreeInitializer(),
                 classification: bool = False):

        self.data = deepcopy(data)
        self.alpha = float(alpha)
        self.beta = float(beta)
        self.k = k
        self._sigma = sigma
        self._prediction = None
        self._initializer = initializer
        self.classification = classification

        if trees is None:
            self.n_trees = n_trees
            self._trees = self.initialize_trees()
            if self._initializer is not None:
                self._initializer.initialize_trees(self.refreshed_trees())
        else:
            self.n_trees = len(trees)
            self._trees = trees

    def initialize_trees(self) -> List[Tree]:
        trees = [Tree([LeafNode(Split(deepcopy(self.data)))]) for _ in range(self.n_trees)]
        for tree in trees:
            tree.update_y(tree.update_y(self.data.y.values / self.n_trees))
        return trees

    def residuals(self) -> np.ndarray:
        return self.data.y.values - self.predict()

    def unnormalized_residuals(self) -> np.ndarray:
        return self.data.y.unnormalized_y - self.data.y.unnormalize_y(self.predict())

    def predict(self, X: np.ndarray = None) -> np.ndarray:
        if X is not None:
            return self._out_of_sample_predict(X)
        return np.sum([tree.predict() for tree in self.trees], axis=0)

    def _out_of_sample_predict(self, X: np.ndarray) -> np.ndarray:
        if type(X) == pd.DataFrame:
            X: pd.DataFrame = X
            X = X.values
        return np.sum([tree.predict(X) for tree in self.trees], axis=0)

    @property
    def trees(self) -> List[Tree]:
        return self._trees

    def refreshed_trees(self) -> Generator[Tree, None, None]:
        if self._prediction is None:
            self._prediction = self.predict()
        for tree in self._trees:
            self._prediction -= tree.predict()
            tree.update_y(self.data.y.values - self._prediction)
            yield tree
            self._prediction += tree.predict()

    def update_z_values(self, y):
        if not self.classification:
            return
        z = np.random.normal(loc=self.predict(self.data.X.values))
        one_label = np.maximum(z[y == 1], 0)
        zero_label = np.minimum(z[y == 0], 0)
        z[y == 1] = one_label
        z[y == 0] = zero_label
        self.data.update_y(z)

    @property
    def sigma_m(self) -> float:
        if self.classification:
            return 3 / (self.k * np.power(self.n_trees, 0.5))
        return 0.5 / (self.k * np.power(self.n_trees, 0.5))

    @property
    def sigma(self) -> Sigma:
        return self._sigma

Instance variables

var sigmaSigma
Expand source code
@property
def sigma(self) -> Sigma:
    return self._sigma
var sigma_m : float
Expand source code
@property
def sigma_m(self) -> float:
    if self.classification:
        return 3 / (self.k * np.power(self.n_trees, 0.5))
    return 0.5 / (self.k * np.power(self.n_trees, 0.5))
var trees : List[Tree]
Expand source code
@property
def trees(self) -> List[Tree]:
    return self._trees

Methods

def initialize_trees(self) ‑> List[Tree]
Expand source code
def initialize_trees(self) -> List[Tree]:
    trees = [Tree([LeafNode(Split(deepcopy(self.data)))]) for _ in range(self.n_trees)]
    for tree in trees:
        tree.update_y(tree.update_y(self.data.y.values / self.n_trees))
    return trees
def predict(self, X: numpy.ndarray = None) ‑> numpy.ndarray
Expand source code
def predict(self, X: np.ndarray = None) -> np.ndarray:
    if X is not None:
        return self._out_of_sample_predict(X)
    return np.sum([tree.predict() for tree in self.trees], axis=0)
def refreshed_trees(self) ‑> Generator[Tree, None, None]
Expand source code
def refreshed_trees(self) -> Generator[Tree, None, None]:
    if self._prediction is None:
        self._prediction = self.predict()
    for tree in self._trees:
        self._prediction -= tree.predict()
        tree.update_y(self.data.y.values - self._prediction)
        yield tree
        self._prediction += tree.predict()
def residuals(self) ‑> numpy.ndarray
Expand source code
def residuals(self) -> np.ndarray:
    return self.data.y.values - self.predict()
def unnormalized_residuals(self) ‑> numpy.ndarray
Expand source code
def unnormalized_residuals(self) -> np.ndarray:
    return self.data.y.unnormalized_y - self.data.y.unnormalize_y(self.predict())
def update_z_values(self, y)
Expand source code
def update_z_values(self, y):
    if not self.classification:
        return
    z = np.random.normal(loc=self.predict(self.data.X.values))
    one_label = np.maximum(z[y == 1], 0)
    zero_label = np.minimum(z[y == 0], 0)
    z[y == 1] = one_label
    z[y == 0] = zero_label
    self.data.update_y(z)