tidyr :
library(tidyverse)
library(magrittr)
example_data <- matrix(
c(293994,8,3,
293994,5,3,
949859,2,1,
949859,1,1,
123234,1,1,
123234,3,1,
123234,4,1),
ncol = 3,
byrow = TRUE) %>%
as.data.frame %>%
set_colnames(c('userid','productid','freq'))
spread(example_data, key = productid, value = freq, fill = 0)
spread , R table, , data.table, , tidyr/dplyr. , , data.table dcast . , -, , , , .
tidyr (2 mio records). . , ( rbind), ( rhadoop sparklyr).
, " " , - - .
randomkey <- function(digits){
paste(sample(LETTERS, digits, replace = TRUE), collapse = '')
}
products <- replicate(10, randomkey(20)) %>% unique
customers <- replicate(500000, randomkey(50)) %>% unique
big_example_data <- data.frame(
useruid = rep(sample(customers, length(customers), replace = FALSE), 4),
productid = sample(products, replace = TRUE),
freq = sample(1:5)
)
dim(big_example_data)
head(big_example_data)
system.time(
big_matrix <- spread(big_example_data, key = productid, value = freq, fill = 0)
)