R/bandit_offline_replay_evaluator_lookup.R
OfflineLookupReplayEvaluatorBandit.Rd
Alternative interface for replay style bandit.
TODO: Needs to be documented more fully.
bandit <- OfflineLookupReplayEvaluatorBandit(offline_data, k, shared_lookup = NULL, unique_lookup = NULL, unique_col = NULL, unique = NULL, shared = NULL, randomize = TRUE)
offline_data
data.table; offline data source (required)
k
integer; number of arms (required)
d
integer; number of contextual features (required)
randomize
logical; randomize rows of data stream per simulation (optional, default: TRUE)
unique
integer vector; index of disjoint features (optional)
shared
integer vector; index of shared features (optional)
new(offline_data, k, shared_lookup = NULL, unique_lookup = NULL,
unique_col = NULL, unique = NULL, shared = NULL, randomize = TRUE)
generates
and instantializes a new OfflineLookupReplayEvaluatorBandit
instance.
get_context(t)
argument:
t
: integer, time step t
.
list
containing the current d x k
dimensional matrix context$X
,
the number of arms context$k
and the number of features context$d
.get_reward(t, context, action)
arguments:
t
: integer, time step t
.
context
: list, containing the current context$X
(d x k context matrix),
context$k
(number of arms) and context$d
(number of context features)
(as set by bandit
).
action
: list, containing action$choice
(as set by policy
).
list
containing reward$reward
and, where computable,
reward$optimal
(used by "oracle" policies and to calculate regret).post_initialization()
Randomize offline data by shuffling the offline data.table before the start of each individual simulation when self$randomize is TRUE (default)
Agrawal, R. (1995). The continuum-armed bandit problem. SIAM journal on control and optimization, 33(6), 1926-1951.
Core contextual classes: Bandit
, Policy
, Simulator
,
Agent
, History
, Plot
Bandit subclass examples: BasicBernoulliBandit
, ContextualLogitBandit
, OfflineLookupReplayEvaluatorBandit
Policy subclass examples: EpsilonGreedyPolicy
, ContextualLinTSPolicy
if (FALSE) { library(contextual) library(data.table) library(splitstackshape) library(RCurl) # Import MovieLens ml-10M # Info: https://d1ie9wlkzugsxr.cloudfront.net/data_movielens/ml-10M/README.html movies_dat <- "http://d1ie9wlkzugsxr.cloudfront.net/data_movielens/ml-10M/movies.dat" ratings_dat <- "http://d1ie9wlkzugsxr.cloudfront.net/data_movielens/ml-10M/ratings.dat" movies_dat <- readLines(movies_dat) movies_dat <- gsub( "::", "~", movies_dat ) movies_dat <- paste0(movies_dat, collapse = "\n") movies_dat <- fread(movies_dat, sep = "~", quote="") setnames(movies_dat, c("V1", "V2", "V3"), c("MovieID", "Name", "Type")) movies_dat <- splitstackshape::cSplit_e(movies_dat, "Type", sep = "|", type = "character", fill = 0, drop = TRUE) movies_dat[[3]] <- NULL ratings_dat <- RCurl::getURL(ratings_dat) ratings_dat <- readLines(textConnection(ratings_dat)) ratings_dat <- gsub( "::", "~", ratings_dat ) ratings_dat <- paste0(ratings_dat, collapse = "\n") ratings_dat <- fread(ratings_dat, sep = "~", quote="") setnames(ratings_dat, c("V1", "V2", "V3", "V4"), c("UserID", "MovieID", "Rating", "Timestamp")) all_movies <- ratings_dat[movies_dat, on=c(MovieID = "MovieID")] all_movies <- na.omit(all_movies,cols=c("MovieID", "UserID")) rm(movies_dat,ratings_dat) all_movies[, UserID := as.numeric(as.factor(UserID))] count_movies <- all_movies[,.(MovieCount = .N), by = MovieID] top_50 <- as.vector(count_movies[order(-MovieCount)][1:50]$MovieID) not_50 <- as.vector(count_movies[order(-MovieCount)][51:nrow(count_movies)]$MovieID) top_50_movies <- all_movies[MovieID %in% top_50] # Create feature lookup tables - to speed up, MovieID and UserID are # ordered and lined up with the (dt/matrix) default index. # Arm features # MovieID of top 50 ordered from 1 to N: top_50_movies[, MovieID := as.numeric(as.factor(MovieID))] arm_features <- top_50_movies[,head(.SD, 1),by = MovieID][,c(1,6:24)] setorder(arm_features,MovieID) # User features # Count of categories for non-top-50 movies normalized per user user_features <- all_movies[MovieID %in% not_50] user_features[, c("MovieID", "Rating", "Timestamp", "Name"):=NULL] user_features <- user_features[, lapply(.SD, sum, na.rm=TRUE), by=UserID ] user_features[, total := rowSums(.SD, na.rm = TRUE), .SDcols = 2:20] user_features[, 2:20 := lapply(.SD, function(x) x/total), .SDcols = 2:20] user_features$total <- NULL # Add users that were not in the set of non-top-50 movies (4 in 10m dataset) all_users <- as.data.table(unique(all_movies$UserID)) user_features <- user_features[all_users, on=c(UserID = "V1")] user_features[is.na(user_features)] <- 0 setorder(user_features,UserID) rm(all_movies, not_50, top_50, count_movies) # Contextual format top_50_movies[, t := .I] top_50_movies[, sim := 1] top_50_movies[, agent := "Offline"] top_50_movies[, choice := MovieID] top_50_movies[, reward := ifelse(Rating <= 4, 0, 1)] setorder(top_50_movies,Timestamp,Name) # Run simulation simulations <- 1 horizon <- nrow(top_50_movies) bandit <- OfflineLookupReplayEvaluatorBandit$new(top_50_movies, k = 50, unique_col = "UserID", shared_lookup = arm_features, unique_lookup = user_features) agents <- list(Agent$new(ThompsonSamplingPolicy$new(), bandit, "Thompson"), Agent$new(UCB1Policy$new(), bandit, "UCB1"), Agent$new(RandomPolicy$new(), bandit, "Random"), Agent$new(LinUCBHybridOptimizedPolicy$new(0.9), bandit, "LinUCB Hyb 0.9"), Agent$new(LinUCBDisjointOptimizedPolicy$new(2.1), bandit, "LinUCB Dis 2.1")) simulation <- Simulator$new( agents = agents, simulations = simulations, horizon = horizon ) results <- simulation$run() plot(results, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "topleft") }