Loss from Variable Dropout

variable_dropout(explainer, loss_function = function(observed, predicted)
  sum((observed - predicted)^2), ..., type = "raw", n_sample = 1000)

Arguments

explainer

a model to be explained, preprocessed by the 'explain' function

loss_function

a function thet will be used to assess variable importance

...

other parameters

type

character, type of transformation that should be applied for dropout loss. 'raw' results raw drop lossess, 'ratio' returns drop_loss/drop_loss_full_model while 'difference' returns drop_loss - drop_loss_full_model

n_sample

number of observations that should be sampled for calculations of variable leverages

Value

An object of the class 'variable_leverage_explainer'. It's a data frame with calculated average response.

Examples

library("randomForest") HR_rf_model <- randomForest(left~., data = breakDown::HR_data, ntree = 100)
#> Warning: The response has five or fewer unique values. Are you sure you want to do regression?
explainer_rf <- explain(HR_rf_model, data = HR_data, y = HR_data$left) vd_rf <- variable_dropout(explainer_rf, type = "raw") vd_rf
#> variable dropout_loss label #> 1 _full_model_ 3.518106 randomForest #> 2 left 3.518106 randomForest #> 3 promotion_last_5years 3.593906 randomForest #> 4 Work_accident 4.894278 randomForest #> 5 salary 5.492415 randomForest #> 6 sales 10.826912 randomForest #> 7 time_spend_company 76.792620 randomForest #> 8 average_montly_hours 93.316933 randomForest #> 9 last_evaluation 112.648191 randomForest #> 10 number_project 133.269477 randomForest #> 11 satisfaction_level 180.030137 randomForest #> 12 _baseline_ 357.524222 randomForest
HR_glm_model <- glm(left~., data = breakDown::HR_data, family = "binomial") explainer_glm <- explain(HR_glm_model, data = HR_data, y = HR_data$left) logit <- function(x) exp(x)/(1+exp(x)) vd_glm <- variable_dropout(explainer_glm, type = "raw", loss_function = function(observed, predicted) sum((observed - logit(predicted))^2)) vd_glm
#> variable dropout_loss label #> 1 _full_model_ 131.9339 lm #> 2 sales 131.2187 lm #> 3 promotion_last_5years 131.8669 lm #> 4 average_montly_hours 131.9053 lm #> 5 left 131.9339 lm #> 6 last_evaluation 132.1117 lm #> 7 time_spend_company 136.0778 lm #> 8 salary 143.5271 lm #> 9 number_project 143.7606 lm #> 10 Work_accident 146.3567 lm #> 11 satisfaction_level 200.0032 lm #> 12 _baseline_ 215.0226 lm
library("xgboost") model_martix_train <- model.matrix(left~.-1, breakDown::HR_data) data_train <- xgb.DMatrix(model_martix_train, label = breakDown::HR_data$left) param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, objective = "binary:logistic", eval_metric = "auc") HR_xgb_model <- xgb.train(param, data_train, nrounds = 50) explainer_xgb <- explain(HR_xgb_model, data = model_martix_train, y = HR_data$left, label = "xgboost") vd_xgb <- variable_dropout(explainer_xgb, type = "raw") vd_xgb
#> variable dropout_loss label #> 1 _full_model_ 18.84525 xgboost #> 2 salesRandD 18.61675 xgboost #> 3 salessupport 18.78564 xgboost #> 4 salesaccounting 18.84525 xgboost #> 5 salesIT 18.84525 xgboost #> 6 salesmanagement 18.84525 xgboost #> 7 salesmarketing 18.84525 xgboost #> 8 salessales 18.85145 xgboost #> 9 saleshr 18.86079 xgboost #> 10 promotion_last_5years 18.89052 xgboost #> 11 salesproduct_mng 18.89147 xgboost #> 12 salestechnical 19.03387 xgboost #> 13 salarymedium 19.34897 xgboost #> 14 Work_accident 19.57376 xgboost #> 15 salarylow 20.08231 xgboost #> 16 number_project 41.27843 xgboost #> 17 last_evaluation 48.45253 xgboost #> 18 average_montly_hours 48.70331 xgboost #> 19 time_spend_company 78.97156 xgboost #> 20 satisfaction_level 150.05115 xgboost #> 21 _baseline_ 351.57701 xgboost