Find the first sequence of length n in R

Say I have such a data.frame file

df <- data.frame(signal = c(0, 0, 1, 0, 1, 1, 0, 1, 1, 1)) 

What is the best way to find the first signal by the number that go sequentially n times. For example, if n = 1, then my signal will be the third element, and I would like to receive the answer as follows:

 c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0) 

If n = 2, the answer will be:

 c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0) 

And for n = 3, the last element is a signal after three lines in a row:

 c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1) 
+5
source share
3 answers
 fun <- function(signal, n) { r <- rle(signal == 1) replace(numeric(length(signal)), sum(r$l[seq.int(head(which(r$l * r$v == n), 1))]), 1) } fun(df$signal, 1) # [1] 0 0 1 0 0 0 0 0 0 0 fun(df$signal, 2) # [1] 0 0 0 0 0 1 0 0 0 0 fun(df$signal, 3) # [1] 0 0 0 0 0 0 0 0 0 1 fun(df$signal, 4) # [1] 0 0 0 0 0 0 0 0 0 0 
+3
source
 x <- c(0, 0, 1, 0, 1, 1, 0, 1, 1, 1) y <- rle(x) y$values <- y$lengths * y$values (y <- inverse.rle(y)) # [1] 0 0 1 0 2 2 0 3 3 3 f <- function(n) {z <- rep(0, length(y)); z[which.max(cumsum(y == n))] <- 1; z} f(1) # [1] 0 0 1 0 0 0 0 0 0 0 f(2) # [1] 0 0 0 0 0 1 0 0 0 0 f(3) # [1] 0 0 0 0 0 0 0 0 0 1 

Full function will be

 g <- function(x, n) { y <- rle(x) y$values <- y$lengths * y$values y <- inverse.rle(y) z <- rep_len(0, length(x)) z[which.max(cumsum(y == n))] <- 1 z } g(x, 1) g(x, 2) g(x, 3) 

change version 2

 g <- function(x, n, ties = c('first','random','last')) { ties <- match.arg(ties) FUN <- switch(ties, first = min, last = max, random = function(x) x[sample.int(length(x), 1)]) y <- rle(x) y$values <- y$lengths * y$values y <- inverse.rle(y) z <- rep_len(0, length(x)) if (!length(wh <- which(y == n))) return(z) wh <- wh[seq_along(wh) %% n == 0] z[FUN(wh)] <- 1 z } x <- c(0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1) g(x, 1, 'first') # [1] 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 g(x, 1, 'last') # [1] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 g(x, 1, 'random') # [1] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 g(x, 4) # [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+5
source

The first 1 in the rolling product signal with window size = n is the beginning of the signal, therefore

 f <- function(x, n){ y <- numeric(length(x)) k <- RcppRoll::roll_prod(x, n) y[which(k==1)[1] + n-1] <- 1 y } > f(df$signal, 1) [1] 0 0 1 0 0 0 0 0 0 0 > f(df$signal, 2) [1] 0 0 0 0 0 1 0 0 0 0 > f(df$signal, 3) [1] 0 0 0 0 0 0 0 0 0 1 

security check

 set.seed(1) signal <- sample(0:1, 10, TRUE) signal # [1] 0 0 1 1 0 1 1 1 1 0 f(signal, 3) # [1] 0 0 0 0 0 0 0 1 0 0 g(signal, 3) # [1] 1 0 0 0 0 0 0 0 0 0 fun(signal, 3) Error in 1:which(r$len * r$val == n)[1] : NA/NaN argument 
+5
source

Source: https://habr.com/ru/post/1245102/


All Articles