DavidArenberg, . , setdiff , , . ():
=====
,
set.seed(123)
df.1 <- data.frame(id=LETTERS[sample(length(LETTERS),10,replace=F)],
target=sapply(1:10,function(x) paste(LETTERS[sample(length(LETTERS),5,
replace=F)],collapse=";")),
source="A",stringsAsFactors=F)
df.2 <- data.frame(id=LETTERS[sample(length(LETTERS),5, replace=F)],
target=sapply(1:5,function(x) paste(LETTERS[sample(length(LETTERS),5,
replace=F)],collapse=";")),
source="B",stringsAsFactors=F)
library(data.table)
library(stringi)
res <- dcast(rbind(setDT(df.1), setDT(df.2)), id ~ source, value.var = "target")
res[!is.na(A) & !is.na(B), intersected.targets :=
stri_extract_all(A, regex = gsub(";", "|", B, fixed = TRUE))]
res
==========================
A2 B2, A B
res[ , A2 := stri_extract_all(A, regex = "[[:alpha:]]") ]
res[ , B2 := stri_extract_all(B, regex = "[[:alpha:]]") ]
Map() setdiff:
res[, SourceA := Map( setdiff, A2, intersected.targets)]
res[, SourceB := Map( setdiff, B, intersected.targets)]
res
id A B intersected.targets A2 B2 SourceA SourceB
1: A M;S;F;H;X NA NULL M,S,F,H,X NA M,S,F,H,X NA
2: C NA T;P;R;A;K NULL NA T,P,R,A,K NA T,P,R,A,K
3: G NA G;Q;K;S;C NULL NA G,Q,K,S,C NA G,Q,K,S,C
4: H Y;L;Q;N;C NA NULL Y,L,Q,N,C NA Y,L,Q,N,C NA
5: J X;R;P;W;O F;J;O;I;C O X,R,P,W,O F,J,O,I,C X,R,P,W F,J,I,C
6: K D;K;J;I;Z NA NULL D,K,J,I,Z NA D,K,J,I,Z NA
7: Q D;F;L;G;S NA NULL D,F,L,G,S NA D,F,L,G,S NA
8: R NA L;U;T;S;J NULL NA L,U,T,S,J NA L,U,T,S,J
9: T X;G;B;H;U NA NULL X,G,B,H,U NA X,G,B,H,U NA
10: U S;N;O;G;D NA NULL S,N,O,G,D NA S,N,O,G,D NA
11: W Z;W;Q;S;A NA NULL Z,W,Q,S,A NA Z,W,Q,S,A NA
12: X B;L;T;C;M NA NULL B,L,T,C,M NA B,L,T,C,M NA
13: Z F;D;S;U;I L;Y;V;U;D D,U F,D,S,U,I L,Y,V,U,D F,S,I L,Y,V
.