When is it appropriate to use sparse matrices in R?

I have been doing power simulation recently, and I have the following code:

library(MASS)
library(Matrix)

simdat <- data.frame(mmm = rep(rep(factor(1:2,
                                          labels=c("m1", "m2")),
                                   each = 2),
                               times = 2800),
                 ttt = rep(factor(1:2,
                                  labels = c("t1", "t2")),
                           times = 5600),
                 sss = rep(factor(1:70),
                            each = 160),
                 iii = rep(rep(factor(1:40),
                               each = 4),
                           times = 70))

beta <- c(1, 2)

X1 <- model.matrix(~ mmm,
                   data = simdat)

Z1 <- model.matrix(~ ttt,
                   data = simdat)

X1and Z1are matrices 11200x2. With Stackoverflow, I was able to make my calculations much more efficient than before:

funab <- function(){
    ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))

    ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))

    Mb <- as.vector(X1 %*% beta)

    M1 <- rowSums(Z1 * ran_sub[rep(1:70,
                                        each = 160),])

    M2 <- rowSums(Z1 * ran_ite[rep(rep(1:40, each = 4),
                                        times = 70),])

    Mout <- Mb + M1 + M2

    Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}

Ywill then be a length vector 11200. Then I repeat this function a lot (say 1000once):

sim <- replicate(n        = 1000,
                 expr     = funab()},
                 simplify = FALSE)

simwill be 11200x1000. Given that I want to do this a lot more and maybe add more code to funab(), I wonder if it is advisable to use calculations with sparse matrices for X1and Z1in calculations in funab(), as of now?

+4
source share
1

, , , microbenchmark. , :

library(MASS)
library(Matrix)

simdat <- data.frame(mmm = rep(rep(factor(1:2,
                                          labels=c("m1", "m2")),
                                   each = 2),
                               times = 2800),
                 ttt = rep(factor(1:2,
                                  labels = c("t1", "t2")),
                           times = 5600),
                 sss = rep(factor(1:70),
                            each = 160),
                 iii = rep(rep(factor(1:40),
                               each = 4),
                           times = 70))

beta <- c(1, 2)

X1 <- model.matrix(~ mmm,
                   data = simdat)

Z1 <- model.matrix(~ ttt,
                   data = simdat)

, :

sparseX1 <- sparse.model.matrix(~ mmm,
                                data = simdat)

sparseZ1 <- sparse.model.matrix(~ ttt,
                                data = simdat)

:

funab_sparse <- function(){
    ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))

    ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))

    Mb <- as.vector(sparseX1 %*% beta)

    M1 <- Matrix::rowSums(sparseZ1 * ran_sub[rep(1:70,
                                        each = 160),])

    M2 <- Matrix::rowSums(sparseZ1 * ran_ite[rep(rep(1:40, each = 4),
                                        times = 70),])

    Mout <- Mb + M1 + M2

    Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}

funab <- function(){
    ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))

    ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))

    Mb <- as.vector(X1 %*% beta)

    M1 <- rowSums(Z1 * ran_sub[rep(1:70,
                                        each = 160),])

    M2 <- rowSums(Z1 * ran_ite[rep(rep(1:40, each = 4),
                                        times = 70),])

    Mout <- Mb + M1 + M2

    Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}

library(microbenchmark)
res <- microbenchmark(funab(), funab_sparse(), times = 1000)

:

> res <- microbenchmark(funab(), funab_sparse(), times = 1000)
> res
Unit: milliseconds
           expr      min       lq   median       uq      max neval
        funab() 2.200342 2.277006 2.309587 2.481627 69.99895  1000
 funab_sparse() 8.419564 8.568157 9.666248 9.874024 75.88907  1000

, , , .

+1

Source: https://habr.com/ru/post/1547881/


All Articles