How to combine lists of vectors based on one vector belonging to another vector?

In R, I have two data frames that contain list columns

d1 <- data.table(
  group_id1=1:4
)
d1$Cat_grouped <- list(letters[1:2],letters[3:2],letters[3:6],letters[11:12] )

and

d_grouped <- data.table(
  group_id2=1:4
)
d_grouped$Cat_grouped <- list(letters[1:5],letters[6:10],letters[1:2],letters[1] )

I would like to combine these two data.tables based on vectors in d1$Cat_groupedcontained in vectors ind_grouped$Cat_grouped

To be more precise, there can be two matching criteria:

a) all elements of each vector d1$Cat_groupedmust be in a consistent vectord_grouped$Cat_grouped

The result is the following match:

result_a <- data.table(
   group_id1=c(1,2)
   group_id2=c(1,1)
)

b) at least one of the elements in each vector d1$Cat_groupedmust be in a consistent vectord_grouped$Cat_grouped

The result is the following match:

result_b <- data.table(
  group_id1=c(1,2,3,3),
  group_id2=c(1,1,1,2)
)

How can I implement a) or b)? Preferably in the form of a data table.

EDIT1: Expected Results a) and b) added

EDIT2: d_grouped, .

+4
3

) .

Harland approach, data.table , , .

library(data.table)
d1 <- data.table(
  group_id1 = 1:4,
  Cat_grouped = list(letters[1:2], letters[3:2], letters[3:6], letters[11:12]))

d_grouped <- data.table(
  group_id2 = 1:2,
  Cat_grouped = list(letters[1:5], letters[6:10]))

a)

grp_cols <- c("group_id1", "group_id2")
unique(d1[, .(unlist(Cat_grouped), lengths(Cat_grouped)), by = group_id1][
  d_grouped[, unlist(Cat_grouped), by = group_id2], on = "V1", nomatch = 0L][
    , .(V2, .N), by = grp_cols][V2 == N, ..grp_cols])

   group_id1 group_id2
1:         1         1
2:         2         1

d1 d_grouped d1 lengths(). lengths() ( , length()) R 3.2.0.

( nomatch = 0L) ( .N) grp_cols. , . , grp_cols.

b)

b) , :

unique(d1[, unlist(Cat_grouped), by = group_id1][
  d_grouped[, unlist(Cat_grouped), by = group_id2], on = "V1", nomatch = 0L][
      , c("group_id1", "group_id2")])
   group_id1 group_id2
1:         1         1
2:         2         1
3:         3         1
4:         3         2
+2

, , , . , -, , :

, :

d1_long <- d1[, list(cat=unlist(Cat_grouped)), group_id1]
d_grouped_long <- d_grouped[, list(cat=unlist(Cat_grouped)), group_id2]

:

result_b <- merge(d1_long, d_grouped_long, by='cat')

, , ...

result_b[, cat := NULL]

, group_id, , . .

result_b <- unique(result_b)

_b:

   group_id.1 group_id.2
1:          1          1
2:          2          1
3:          3          1
4:          3          2

b a, .

, ,

result_a <- merge(result_b, d1, by = 'group_id1')
result_a <- merge(result_a, d_grouped, by = 'group_id2')

, Cat_grouped.x , Cat_grouped.x,% % Cat_grouped.y, .

, . , :

row by

result_a[, row := 1:.N]

...

result_a[, x.length := length(Cat_grouped.x[[1]]), row]
result_a[, matches := sum(Cat_grouped.x[[1]] %in% Cat_grouped.y[[1]]), row]

,

result_a <- result_a[x.length==matches]
+4

:

:

Y = CJ(group_id1=d1$group_id1, group_id2=d_grouped$group_id2)

:

Y = Y[d1, on='group_id1'][d_grouped, on='group_id2']

#    group_id1 group_id2 Cat_grouped i.Cat_grouped
# 1:         1         1         a,b     a,b,c,d,e
# 2:         2         1         c,b     a,b,c,d,e
# 3:         3         1     c,d,e,f     a,b,c,d,e
# 4:         4         1         k,l     a,b,c,d,e
# 5:         1         2         a,b     f,g,h,i,j
# 6:         2         2         c,b     f,g,h,i,j
# 7:         3         2     c,d,e,f     f,g,h,i,j
# 8:         4         2         k,l     f,g,h,i,j

mapply , :

Y[mapply(function(u,v) all(u %in% v), Cat_grouped, i.Cat_grouped), 1:2]
#    group_id1 group_id2
# 1:         1         1
# 2:         2         1

Y[mapply(function(u,v) length(intersect(u,v)) > 0, Cat_grouped, i.Cat_grouped), 1:2]
#    group_id1 group_id2
# 1:         1         1
# 2:         2         1
# 3:         3         1
# 4:         3         2
+2

Source: https://habr.com/ru/post/1682632/


All Articles