Do Not Join in R

I am looking for a quick way to do "don't join" (that is, store strings that don't merge, or invert the inner join). The way I did was use data.table for X and Y, and then set the key. For instance:

require(data.table)

X <- data.table(category = c('A','B','C','D'), val1 = c(0.2,0.3,0.8,0.7))
Y <- data.table(category = c('B','C','D','E'), val2 = c(2,3,5,7))
XY <- merge(X,Y,by='category')

> XY
   category val1 val2
1:        B  0.3    2
2:        C  0.8    3
3:        D  0.7    5

But I need the opposite, so I have to do:

XY_All <- merge(X,Y,by='category',all=TRUE)
setkey(XY,category)
setkey(XY_All,category)
notXY <- XY_All[!XY]    #data.table not join (finally)

> notXY
   category val1 val2
1:        A  0.2   NA
2:        E   NA    7

It seems to me that this is a pretty long wind (especially from data.frame). Did I miss something?

EDIT: I got it thinking more about not joining

X <- data.table(category = c('A','B','C','D'), val1 = c(0.2,0.3,0.8,0.7),key = "category")
Y <- data.table(category = c('B','C','D','E'), val2 = c(2,3,5,7), key = "category")
notXY <- merge(X[!Y],Y[!X],all=TRUE)

But WheresTheAnyKey answer below is clearer. One of the last hurdles is pre-setting data.table keys, it would be nice not to do this.

EDIT. To clarify the decision:

merge(anti_join(X, Y, by = 'category'),anti_join(Y, X, by = 'category'), by = 'category', all = TRUE)
+4
4
require(dplyr)
rbind_list(anti_join(X, Y), anti_join(Y, X))

EDIT: - , :

anti_join() X, Y, , . - . rbind_list() tbl , NA.

+5
setkey(X,category)
setkey(Y,category)

rbind(X[!Y], Y[!X], fill = TRUE)
+5

, :

X <- data.table(category = c('A','B','C','D'), val1 = c(0.2,0.3,0.8,0.7),key = "category")
Y <- data.table(category = c('B','C','D','E'), val2 = c(2,3,5,7), key = "category")
notXY <- merge(X,Y,all = TRUE)[!merge(X,Y)]
+4

. -, "all" "TRUE". :

XY_All <- merge(X,Y,by='category',all=TRUE)
notXY  <- XY_All[!complete.cases(XY_All),]
0

Source: https://habr.com/ru/post/1544369/


All Articles