Find similar vector elements and change everything in between.

I have a large data frame (df) with binomial values ​​ranging from 1 to 2. NA is also included in the data. As a practical example, I will create a short vector containing a subset of user data:

df <- c(NA,NA,2,1,1,1,2,1,2,2,1,1,1,NA,2,2,1,2,1,1,1,2)

In principle, I would like to get the result - this is a function that searches for the first and second 2arrays and converts everything within this interval to 2. Nevertheless, if the difference between the positions of the second and first 2 is equal to> 3, then the values ​​remain what they are is, and no changes are happening.

In addition to the above, the function must have a loop for each value df. For example, consider the case again:

df <- c(NA,NA,2,1,1,1,2,1,2,2,1,1,1,NA,2,2,1,2,1,1,1,2)

The function should have the following result:

df_outcome <- c(NA,NA,2,1,1,1,2,2,2,2,1,1,1,NA,2,2,2,2,1,1,1,2)

, df_outcome 2 , a > 3. , , 2.

( ):

rollapply zoo , 2 , .

func <- function (q) {
  for (i in (which(q %in% 2)[1]):(which(q %in% 2)[2])) {
    q[i]<-2
  }
  return(q)
}

rollapply, , ().

df_outcome<-rollapply(df, width = 3, FUN = func, fill = NA, partial = TRUE, align = "left")

, , . , rollapply, :

( (q% % 2) [1]):( (q% % 2) [2]): NA/NaN : FUN ( [replace (posns,! Ix, 0)],...)

, rollapply , , , , . rollapply, . , , , User_ID ( .variables ddply by data.table).

.

+4
3

rle:

rldf <- rle(df)
rllag <- c(tail(rldf$values,-1), NA)
rllead <- c(NA, head(rldf$values,-1))

rldf$values[which(rldf$values == 1 & rllag == 2 & rllead == 2 & rldf$lengths < 3)] <- 2

df_out <- inverse.rle(rldf)

:

> df_out
 [1] NA NA  2  1  1  1  2  2  2  2  1  1  1 NA  2  2  2  2  1  1  1  2

> identical(df_outcome,df_out)
[1] TRUE
+6

2 df. , , , 2:

# position of the 2s
pos_df_2 <- which(df==2) 
# which of the difference in positions are less than 3
wh_pos2_inf3 <- which(c(FALSE, diff(pos_df_2)<=3))
# get all indices between positions that are separated by less than 3 elements
ind_to_replace <- unique(unlist(sapply(wh_pos2_inf3, function(x) {pos_df_2[x-1]:pos_df_2[x]}))) 
# replace the elements by 2
df[ind_to_replace] <- 2 
df
#[1] NA NA  2  1  1  1  2  2  2  2  1  1  1 NA  2  2  2  2  1  1  1  2
+5

data.table (, , ):

df<-c(NA, NA, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, NA, 2, 2, 1, 2, 1, 1, 1, 2)
dt<-data.table(val=df)
dt[,`:=`(id=rleid(val), p=shift(val,type="lag"), n=shift(val,type="lead"))]
dt[,`:=`(s=.N, f=p[1], e=n[.N]), by=id]
dt[f==2 & e==2 & s<3, val:=2]

:

df , 1

df<-c(NA, NA, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, NA, 2, 2, 1, 2, 1, 
  1, 1, 2)
dt<-data.table(val=df)

rleid val, lag val ( )

dt[,`:=`(id=rleid(val), p=shift(val, type="lag"), n=shift(val, type="lead"))]

id (group) ,

dt[,`:=`(s=.N, f=p[1], e=n[.N]), by=id]

( - 2, 3) val 2

dt[f==2 & e==2 & s<3, val:=2]

dt[,val]
[1] NA NA  2  2  2  2  2  2  2  2  1  1  1 NA  2  2  2  2  1  1  1  2

:

[1] NA NA  2  1  1  2  2  1  2  2  1  1  1 NA  2  2  1  2  1  1  1  2   

, , .

+4
source

Source: https://habr.com/ru/post/1659041/


All Articles