Contextual is an R package to facilitate the simulation and analysis of Contextual Multi-Armed Bandit (CMAB) policies and algorithms.
# Install development version from GitHub:
# install.packages("devtools")
devtools::install_github("Nth-iteration-labs/contextual")
If you encounter a clear bug, please file a minimal reproducible example on GitHub.
Running a simulation to compare basic Multi-Armed Bandit policies:
library(contextual)
horizon <- 100L
simulations <- 1000L
weight_per_arm <- c(0.9, 0.1, 0.1)
bandit <- SyntheticBandit$new(data = weight_per_arm)
agents <- list( Agent$new(EpsilonGreedyPolicy$new(0.1, "\U190-greedy"), bandit),
Agent$new(RandomPolicy$new("Random"), bandit),
Agent$new(OraclePolicy$new("Oracle"), bandit),
Agent$new(ThompsonSamplingPolicy$new(1.0, 1.0, "TS"), bandit),
Agent$new(Exp3Policy$new(0.1, "Exp3"), bandit),
Agent$new(LinUCBPolicy$new(1.0, "LinUCB"), bandit) )
simulation <- Simulator$new(agents, horizon, simulations)
history <- simulation$run()
plot(history, type = "grid")
Running a simulation to compare a contextual (LinUCB) and some non-contextual Multi-Armed Bandit policies:
library(contextual)
horizon <- 100L
simulations <- 5000L
#k1 #k2 #k3 -> k armed bandit
weights <- matrix( c( 0.9, 0.3, 0.2, #d1
0.5, 0.6, 0.2, #d2
0.2, 0.1, 0.5), nrow = 3, ncol = 3) #d3 -> d features in context
bandit <- SyntheticBandit$new(data = weights )
agents <- list( Agent$new(EpsilonGreedyPolicy$new(0.1, "\U190-greedy"), bandit),
Agent$new(LinUCBPolicy$new(1.0, "LinUCB"), bandit),
Agent$new(OraclePolicy$new("Oracle"), bandit),
Agent$new(RandomPolicy$new("Random"), bandit),
Agent$new(ThompsonSamplingPolicy(1, 1, "TSampling"), bandit) )
simulation <- Simulator$new(agents, horizon, simulations)
history <- simulation$run()
plot(history, type = "grid")