data.table:
library(data.table) #1.9.6+
setDT(test)
dcast(test[ , .N, by = .(start.station.id, hour)],
start.station.id ~ hour, value.var = "N")
(, ):
dcast(test, start.station.id ~ hour, fun.aggregate = length, value.var = "hour")
:
set.seed(10932)
NN <- 1e6
test <- data.table(start.station.id = sample(1000, NN, T),
hour = sample(24, NN, T))
library(microbenchmark)
microbenchmark(times = 100L,
preagg = dcast(test[ , .N, by = .(start.station.id, hour)],
start.station.id ~ hour, value.var = "N"),
postagg = dcast(test, start.station.id ~ hour,
fun.aggregate = length, value.var = "hour"))
Unit: milliseconds
expr min lq mean median uq max neval
preagg 55.83240 59.88939 66.56289 61.37408 64.37049 166.8902 100
postagg 91.16012 93.68588 101.17297 96.04823 101.20717 203.4270 100
, , , test[ , .N, by = vars] data.table.