, , split. 200 150 000 , split 50 54 .
split , data.table .
s.test <- test[, list(list(.SD)), by=letters]$V1
data.table + mapply:
set.seed(1L)
k = 200L
n = 150000L
test <- data.frame(letters=sample(paste0("id", 1:k), n*k, TRUE),
numbers=sample(1e6, n*k, TRUE), stringsAsFactors=FALSE)
require(data.table)
setDT(test)
system.time({
s.test <- test[, list(list(.SD)), by=letters]$V1
setattr(s.test, 'names', unique(test$letters))
notIn <- mapply(function(x,y)
sum(!s.test[[x]]$numbers %in% s.test[[y]]$numbers),
x=names(s.test)[1:199], y=names(s.test)[2:200])
})
~ 7.5x . ?