How to vectorize this operation for each row of the matrix

I have a matrix filled with TRUE/ values FALSE, and I'm trying to find the index position of the first value TRUEfor each row (or return NAif there is no value TRUEin the row). The following code does the job, but it uses a call apply(), which I believe is only a wrapper around the for loop. I work with some large datasets and performance suffers. Is there a faster way?

> x <- matrix(rep(c(F,T,T),10), nrow=10)
> x
       [,1]  [,2]  [,3]
 [1,] FALSE  TRUE  TRUE
 [2,]  TRUE  TRUE FALSE
 [3,]  TRUE FALSE  TRUE
 [4,] FALSE  TRUE  TRUE
 [5,]  TRUE  TRUE FALSE
 [6,]  TRUE FALSE  TRUE
 [7,] FALSE  TRUE  TRUE
 [8,]  TRUE  TRUE FALSE
 [9,]  TRUE FALSE  TRUE
[10,] FALSE  TRUE  TRUE

> apply(x,1,function(y) which(y)[1])
 [1] 2 1 1 2 1 1 2 1 1 2
+3
source share
3 answers

Not sure if this is better, but this is one solution:

> x2 <- t(t(matrix(as.numeric(x), nrow=10)) * 1:3)
> x2[x2 == 0] <- Inf
> rowMins(x2)
 [1] 2 1 1 2 1 1 2 1 1 2

Edit: here is the best solution using the R base:

> x2 <- (x2 <- which(x, arr=TRUE))[order(x2[,1]),]
> x2[as.logical(c(1,diff(x2[,1]) != 0)),2]
 [1] 2 1 1 2 1 1 2 1 1 2
+4
source

.

1) max.col:

> max.col(x, "first")
 [1] 2 1 1 2 1 1 2 1 1 2

2) aggregate:

> aggregate(col ~ row, data = which(x, arr.ind = TRUE), FUN = min)$col
 [1] 2 1 1 2 1 1 2 1 1 2

, . :

abiel <- function(n){apply(n, 1, function(y) which(y)[1])}
maxcol <- function(n){max.col(n, "first")}
aggr.min <- function(n){aggregate(col ~ row, data = which(n, arr.ind = TRUE), FUN = min)$col}
shane.bR <- function(n){x2 <- (x2 <- which(n, arr=TRUE))[order(x2[,1]),]; x2[as.logical(c(1,diff(x2[,1]) != 0)),2]}
joris <- function(n){z <- which(t(n))-1;((z%%ncol(n))+1)[match(1:nrow(n), (z%/%ncol(n))+1)]}

-, :

xl <- matrix(sample(c(F,T),9e5,replace=TRUE), nrow=1e5)

-, :

library(microbenchmark)
microbenchmark(abiel(xl), maxcol(xl), aggr.min(xl), shane.bR(xl), joris(xl),
               unit = 'relative')

:

Unit: relative
         expr        min         lq       mean     median         uq       max neval   cld
    abiel(xl)  55.102815  33.458994  15.781460  33.243576  33.196486  2.911675   100    d 
   maxcol(xl)   1.000000   1.000000   1.000000   1.000000   1.000000  1.000000   100 a    
 aggr.min(xl) 439.863935 262.595535 118.436328 263.387427 256.815607 16.709754   100     e
 shane.bR(xl)  12.477856   8.522470   7.389083  13.549351  24.626431  1.748501   100   c  
    joris(xl)   7.922274   5.449662   4.418423   5.964554   9.855588  1.491417   100  b   
+3

, %% %/%:

x <- matrix(rep(c(F,T,T),10), nrow=10)

z <- which(t(x))-1
((z%%ncol(x))+1)[match(1:nrow(x), (z%/%ncol(x))+1)]

: , .

1 000 000 5:

x <- matrix(sample(c(F,T),5000000,replace=T), ncol=5)

system.time(apply(x,1,function(y) which(y)[1]))

#>   user  system elapsed 
#>  12.61    0.07   12.70 

system.time({
 z <- which(t(x))-1
 (z%%ncol(x)+1)[match(1:nrow(x), (z%/%ncol(x))+1)]}
)

#>   user  system elapsed 
#>   1.11    0.00    1.11 

.

+2

Source: https://habr.com/ru/post/1761318/


All Articles