Convert a list type column to a long form by separating items

I have a table with two columns of interest:

Status_id | hashtag
947306525726527488 | NEWYEARSEVEPARTY919
947306316959281153 | MakeItALifestyle
947306315952611330 | c ("Ejuice", "vape", "vaping")
947306265520328704 | c ("vapefam", "vapenation", "vapefamily")
947305941522771968 | nowplaying

str(juice) #df name
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   5 obs. of  2 variables:
$ status_id: chr  "947306525726527488" "947306316959281153" 
"947306315952611330" "947306265520328704" 
$ hashtags :List of 5
..$ : chr "NEWYEARSEVEPARTY919"
..$ : chr "MakeItALifestyle"
..$ : chr  "Ejuice" "vape" "vaping" "eliquid"
..$ : chr  "vapefam" "vapenation" "vapefamily"
..$ : chr "nowplaying"

Data

structure(list(status_id = c("947306525726527488", "947306316959281153", 
"947306315952611330", "947306265520328704", "947305941522771968"
), hashtags = list("NEWYEARSEVEPARTY919", "MakeItALifestyle", 
    c("Ejuice", "vape", "vaping", "eliquid", "ecigjuice", "ecig", 
    "vapejuice"), c("vapefam", "vapenation", "vapefamily", "vapelife", 
    "vapelyfe", "vapeon", "positivity"), "nowplaying")), .Names = c("status_id", 
"hashtags"), row.names = c(NA, -5L), class = c("tbl_df", "tbl", 
"data.frame"))

Expected Result

I need the following two tables (of course, in the actual df original, there are more columns that I deleted, since they are not relevant to the question):

df1
Status_id
947306525726527488
947306316959281153
947306315952611330
947306265520328704
947305941522771968

df2
status_id |
947306525726527488 | NEWYEARSEVEPARTY919
947306316959281153 | MakeItALifestyle
947306315952611330 | Ejuice
947306315952611330 | Vape
947306315952611330 | vaping
947306265520328704 | vapefam
947306265520328704 | vapenation
947306265520328704 | vapefamily
947305941522771968 | nowplaying

status_id > 1 c (...) - : "list". df2 , .

google, , "list"

+4
3

. mydf. hashtags. hashtags unlist() paste(). , toSting() paste(). hashtags, . , 3- 4- . . cSplit() splitstackshape. - , df2. df1. status_id status_id.

library(dplyr)
library(splitstackshape)

df2 <- mydf %>%
       rowwise %>%
       mutate(hashtags = paste(unlist(hashtags), collapse = ",")) %>%
       cSplit(splitCols = "hashtags", sep = ",", direction = "long")

             status_id            hashtags
 1: 947306525726527488 NEWYEARSEVEPARTY919
 2: 947306316959281153    MakeItALifestyle
 3: 947306315952611330              Ejuice
 4: 947306315952611330                vape
 5: 947306315952611330              vaping
 6: 947306315952611330             eliquid
 7: 947306315952611330           ecigjuice
 8: 947306315952611330                ecig
 9: 947306315952611330           vapejuice
10: 947306265520328704             vapefam
11: 947306265520328704          vapenation
12: 947306265520328704          vapefamily
13: 947306265520328704            vapelife
14: 947306265520328704            vapelyfe
15: 947306265520328704              vapeon
16: 947306265520328704          positivity
17: 947305941522771968          nowplaying

df1 <- unique(df2[, 1, with = FALSE])

            status_id
1: 947306525726527488
2: 947306316959281153
3: 947306315952611330
4: 947306265520328704
5: 947305941522771968

splitstackshape . listCol_l() - , , , . , .

df2 <- listCol_l(mydf, "hashtags") 
+3
library(data.table)
library(dplyr)

rm(list=ls())

k <- c(LETTERS[1:5])
v <- list('a','b', c('c','d','e'), c('f','g'), 'h')
df <- cbind(k, v) %>% as.data.frame(df)
df

df.temp <- df %>% mutate(vn = sapply(v, length)) 
k <- rep(df.temp$k, df.temp$vn, each=TRUE) %>% unlist
v <- unlist(df.temp$v)

df2 <- data.frame(k, v)
df1 <- df$k %>% unlist %>% data.frame(k=.)

df1
df2

enter image description here
enter image description here

+2

For completeness, there is also a solution data.table:

library(data.table)
df2 <- setDT(juice)[, .(hashtag = unlist(hashtags)), by = status_id]
df1 <- unique(juice[, .(status_id)])

df2
             status_id             hashtag
 1: 947306525726527488 NEWYEARSEVEPARTY919
 2: 947306316959281153    MakeItALifestyle
 3: 947306315952611330              Ejuice
 4: 947306315952611330                vape
 5: 947306315952611330              vaping
 6: 947306315952611330             eliquid
 7: 947306315952611330           ecigjuice
 8: 947306315952611330                ecig
 9: 947306315952611330           vapejuice
10: 947306265520328704             vapefam
11: 947306265520328704          vapenation
12: 947306265520328704          vapefamily
13: 947306265520328704            vapelife
14: 947306265520328704            vapelyfe
15: 947306265520328704              vapeon
16: 947306265520328704          positivity
17: 947305941522771968          nowplaying
df1
            status_id
1: 947306525726527488
2: 947306316959281153
3: 947306315952611330
4: 947306265520328704
5: 947305941522771968
+1
source

Source: https://habr.com/ru/post/1691558/


All Articles