Summary by groups using a function that includes the following group

Suppose I have the following data:

set.seed(1)
test <- data.frame(letters=rep(c("A","B","C","D"),10), numbers=sample(1:50, 40, replace=TRUE))

I want to know how many numbers whose letter is Anot in B, the number of numbers is Bnot in Cand so on.

I came up with a solution for this using basic functions splitand mapply:

s.test <-split(test, test$letters)
notIn <- mapply(function(x,y) sum(!s.test[[x]]$numbers %in% s.test[[y]]$numbers), x=names(s.test)[1:3], y=names(s.test)[2:4])

What gives:

> notIn
A B C 
9 7 7 

But I would also like to do this with dplyror data.table. Is it possible?

+4
source share
2 answers

, , split. 200 150 000 , split 50 54 .

split , data.table .

## test is a data.table here
s.test <- test[, list(list(.SD)), by=letters]$V1

data.table + mapply:

## generate data
set.seed(1L)
k = 200L
n = 150000L
test <- data.frame(letters=sample(paste0("id", 1:k), n*k, TRUE), 
                 numbers=sample(1e6, n*k, TRUE), stringsAsFactors=FALSE)

require(data.table)   ## latest CRAN version is v1.9.2
setDT(test)           ## convert to data.table by reference (no copy)
system.time({
    s.test <- test[, list(list(.SD)), by=letters]$V1 ## split
    setattr(s.test, 'names', unique(test$letters))   ## setnames
    notIn <- mapply(function(x,y) 
         sum(!s.test[[x]]$numbers %in% s.test[[y]]$numbers), 
              x=names(s.test)[1:199], y=names(s.test)[2:200])
})

##   user  system elapsed 
##  4.840   1.643   6.624 

~ 7.5x . ?

+4

, data.table, R. ( , ##):

## generate data - from Arun post
set.seed(1L)
k = 200L
n = 150000L
test <- data.frame(letters=sample(paste0("id", 1:k), n*k, TRUE), 
                 numbers=sample(1e6, n*k, TRUE), stringsAsFactors=FALSE)

system.time({
    s.numbers <- with(test, split(numbers, letters)) ##
    notIn <- mapply(function(x,y) 
         sum(!s.numbers[[x]] %in% s.numbers[[y]]), 
              x=names(s.numbers)[1:199], y=names(s.numbers)[2:200])
})
+4

Source: https://habr.com/ru/post/1533020/


All Articles