Using this setting
set.seed(123) id <- sample(letters[1:5], 10000, replace = TRUE) value <- c("a", "b", "c") index <- c(150, 50, 500)
Index and then split id vector
index_by_id <- split(seq_along(id), id)
Match values ββwith their elements in id_by_value
value_idx <- match(value, names(index_by_id))
Choose the i-th element of each match
mapply(`[`, index_by_id[value_idx], index)
And as a function:
f1 <- function(id, value, index) { index_by_id <- split(seq_along(id), id) value_idx <- match(value, names(index_by_id)) mapply(`[`, index_by_id[value_idx], index) }
It will be fast when value long, but with several levels, for example,
f0 <- function(id, value, index) mapply(function(target, nth) which(id==target)[nth], value, index) viq <- function(id, value, index) { dti <- data.table(id = value, index = index) data.table(id)[dti, .I[index], by = .EACHI, on = "id"]$V1 }
and
> value <- rep(value, 100) > identical(f0(id, value, index), f1(id, value, index)) [1] TRUE > all.equal(f0(id, value, index), viq(id, value, index), + check.attributes=FALSE) [1] TRUE > microbenchmark(f0(id, value, index), f1(id, value, index), + viq(id, value, index)) Unit: milliseconds expr min lq mean median uq f0(id, value, index) 53.166878 54.909566 56.917717 55.336116 56.503741 f1(id, value, index) 1.682265 1.716843 1.883576 1.755070 1.831189 viq(id, value, index) 4.304148 4.381708 4.667590 4.656087 4.757184 max neval 99.621742 100 3.291769 100 6.590130 100