Count sequences of numbers

Question

Count sequences of numbers

I have the following framework with 0 , 1 and NA for identifiers A through E for one year:

dat <- data.frame(
 id = c("A", "B", "C", "D", "E"),
 jan = c(0, 0, NA, 1, 0),
 feb = c(0, 1, 1, 0, 0),
 mar = c(0, 0, 1, 0, 1),
 apr = c(0, NA, 0, NA, 1),
 may = c(0, NA, 0, 0, 0),
 jun = c(0, 0, 0, 0, 0),
 jul = c(0, 0, 0, 0, 1),
 aug = c(NA, 0, 0, 1, 1),
 sep = c(NA, 0, 0, 1, NA),
 okt = c(NA, 0, 0, 0, NA),
 nov = c(NA, 0, 0, 0, 1),
 dez = c(NA, 0, 0, 0, 0)
)

> dat
  id jan feb mar apr may jun jul aug sep okt nov dez
   A   0   0   0   0   0   0   0  NA  NA  NA  NA  NA
   B   0   1   0  NA  NA   0   0   0   0   0   0   0
   C  NA   1   1   0   0   0   0   0   0   0   0   0
   D   1   0   0  NA   0   0   0   1   1   0   0   0
   E   0   0   1   1   0   0   1   1  NA  NA   1   0

I would like to calculate the amount of 1s for each identifier for this one year, but the following conditions must be met:

The first occurrence in 1 is always considered 1
NA should be considered as 0s
The second occurrence of 1 is only considered if preceded by six or more 0s / NAs

In my example, the counter will be:

> dat
   id jan feb mar apr may jun jul aug sep okt nov dez     count
 1  A   0   0   0   0   0   0   0  NA  NA  NA  NA  NA      => 0
 2  B   0   1   0  NA  NA   0   0   0   0   0   0   0      => 1
 3  C  NA   1   1   0   0   0   0   0   0   0   0   0      => 1
 4  D   1   0   0  NA   0   0   0   1   1   0   0   0      => 2
 5  E   0   0   1   1   0   0   1   1  NA  NA   1   0      => 1

The function must be applied in the form apply(dat[, -1], 1, my_fun)and return a vector containing the counter (i.e. 0, 1, 1, 2, 1). Does anyone know how to achieve this?

+4

r dataframe

piptoma Jul 26 '17 at 6:27

source share

4

, 2 , . , - , ( 1 .)

yoursum <- function(x)
{
  x[is.na(x)]<-0
  booleans = with(rle(x),values==0 & lengths>5)
  if(any(booleans))
  {
    if(which(booleans)<length(booleans) & which(booleans)>1 )
      return(2)
  }

  if(any(x>0))
    return(1)
  else
    return(0)
}

apply(dat[,-1],1,yoursum)

:

[1] 0 1 1 2 1

+2

Florian 26 . '17 6:57

, 12 , 1 1, 1 , - , . - . , ..

#Create the pattern to accept 6 or more 0 before the second 1
#Compliments of @DavidArenburg
ptn <- "10{6,}1"


replace(grepl(ptn, do.call(paste0, dat[-1]))+1, rowSums(dat[-1]) == 0, 0)
#[1] 0 1 1 2 1

,

get_counts <- function(df, ptn = "10{6,}1"){
  v1 <- paste0(ptn, collapse = '')
  replace(grepl(v1, do.call(paste0, df[-1]))+1, rowSums(df[-1]) == 0, 0)
}

get_counts(dat)
#[1] 0 1 1 2 1

+2

Sotos 26 . '17 7:08

- , , "1". R , 12 , 12 .

, - :

last_seen_one = integer(nrow(dat))

:

ones_nr = integer(nrow(dat))

, , :

for(j in 2:length(dat)) {
    has_one = dat[[j]] == 1L
    no_one = !last_seen_one
    i = which(has_one & (no_one | ((j - last_seen_one) >= 6)))
    ones_nr[i] = ones_nr[i] + 1L
    last_seen_one[has_one] = j
}

We get:

ones_nr
#[1] 0 1 1 2 1

Thus, instead of a loop over each id / row, only a 12 month / column loop is required.

+1

alexis_laz Jul 26 '17 at 11:02

source share

demirev · Accepted Answer · 2017-07-26T06:53:49+0000

rollapply zoo:

library(zoo)
library(magrittr)

myfun <- function(y, pattern = c(0,0,0,0,0,0,1)){
    y[is.na(y)] <- 0 # to account for both 0s and NAs
    first <- sum(y[1:(length(pattern)-1)])!=0
    rest  <- y %>% as.numeric() %>% rollapply(7, identical, pattern) %>% sum
    return(first+rest)
}

apply(dat[,-1],1,myfun)

[1] 0 1 1 2 1

rollapply 0, 1 .

1 6 ( , ). myfun.

Count sequences of numbers

More articles: