IIUC your final data set will be ~ 200 million rows in 3 columns, the whole type numericthat takes up the total space:
200e6 (rows) * 3 (cols) * 8 (bytes) / (1024 ^ 3)
# ~ 4.5GB
This is pretty big data where copying should be avoided, where possible.
, data.table () vecseq ( C + ) - :=, .
fn1 <- function(x) {
require(data.table)
lx = length(x)
vx = as.integer(lx * (lx-1)/2)
ans = setDT(list(v1 = rep.int(head(x,-1L), (lx-1L):1L),
v2=x[data.table:::vecseq(2:lx, (lx-1L):1, vx)]))
ans[, v3 := v2-v1]
}
:
. , R v3.0.2, fn1() ( ) R v3.1.0, list(.) .
fn2 <- function(x) {
diffmat <- outer(x, x, "-")
ss <- which(upper.tri(diffmat), arr.ind = TRUE)
data.frame(v1 = x[ss[,1]], v2 = x[ss[,2]], v3 = diffmat[ss])
}
fn3 <- function(x) {
idx <- combn(seq_along(x), 2)
out2 <- data.frame(v1=x[idx[1, ]], v2=x[idx[2, ]])
out2$v3 <- out2$v2-out2$v1
out2
}
set.seed(45L)
x = runif(20e3L)
system.time(ans1 <- fn1(x))
system.time(ans2 <- fn2(x))
system.time(ans3 <- fn3(x))
, fn2() - outer ( >= 19 ) , fn1(). fn3() (- combn ).