Flag observations before and after a certain value in another column

Say I have df:

df <- data.frame(flag = c(rep(0, 20)),
                 include = c(rep(1, 20)))
df[c(4,8,16), ]$flag <- 1
df

   flag include
1     0       1
2     0       1
3     0       1
4     1       1
5     0       1
6     0       1
7     0       1
8     1       1
9     0       1
10    0       1
11    0       1
12    0       1
13    0       1
14    0       1
15    0       1
16    1       1
17    0       1
18    0       1
19    0       1
20    0       1

I want to change the flag includeto 0 if the line is within +/- two lines of the line, where flag == 1. The result will look like this:

   flag include
1     0       1
2     0       0
3     0       0
4     1       1
5     0       0
6     0       0
7     0       0
8     1       1
9     0       0
10    0       0
11    0       1
12    0       1
13    0       1
14    0       0
15    0       0
16    1       1
17    0       0
18    0       0
19    0       1
20    0       1

I thought of some โ€œinnovativeโ€ (read: inefficient and complicated) ways to do this, but I thought that there should be an easy way with which I skip.

It would be nice if the answer was such that I could generalize it to +/- lines n, since I have a lot more data and you will look for a potential search within +/- 10 lines ...

+4
source share
4 answers

data.table:

library(data.table)
n = 2
# find the row number where flag is one
flag_one = which(df$flag == 1)

# find the index where include needs to be updated
idx = setdiff(outer(flag_one, -n:n, "+"), flag_one)

# update include in place
setDT(df)[idx[idx >= 1 & idx <= nrow(df)], include := 0][]

# or as @Frank commented the last step with base R would be
# df$include[idx[idx >= 1 & idx <= nrow(df)]] = 0

#    flag include
# 1:    0       1
# 2:    0       0
# 3:    0       0
# 4:    1       1
# 5:    0       0
# 6:    0       0
# 7:    0       0
# 8:    1       1
# 9:    0       0
#10:    0       0
#11:    0       1
#12:    0       1
#13:    0       1
#14:    0       0
#15:    0       0
#16:    1       1
#17:    0       0
#18:    0       0
#19:    0       1
#20:    0       1

:

update_n <- function(df, n) {
    flag_one = which(df$flag == 1)
    idx = setdiff(outer(flag_one, -n:n, "+"), flag_one)
    df$include[idx[idx >= 1 & idx <= nrow(df)]] = 0
    df
}
+3

, , , sapply which

df$include[sapply(which(df$flag == 1) , function(x) c(x-2, x-1, x+1, x+2))] <- 0

df
#   flag include
#1     0       1
#2     0       0
#3     0       0
#4     1       1
#5     0       0
#6     0       0
#7     0       0
#8     1       1
#9     0       0
#10    0       0
#11    0       1
#12    0       1
#13    0       1
#14    0       0
#15    0       0
#16    1       1
#17    0       0
#18    0       0
#19    0       1
#20    0       1

, flag 1, include 0.


n

n = 2
df$include[sapply(which(df$flag == 1),function(x) setdiff(seq(x-n, x+n),x))] <- 0
+2
replace(x = df$include,
        list = sapply(1:NROW(df), function(i)
            any(df$flag[c(max(1, i-2):max(1, i-1),
                          min(i+1, NROW(df)):min(i+2, NROW(df)))] == 1)), values = 0)
# [1] 1 0 0 1 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 1

n ,

replace(x = df$include,
        list = sapply(1:NROW(df), function(i)
            any(df$flag[c(max(1, i-n):max(1, i-1),
                          min(i+1, NROW(df)):min(i+n, NROW(df)))] == 1)), values = 0)
+1

- zoo::rollapply. , +/- , flag == 1, , flag 1.

rollapply, rollmax, partial = T.

is_within_flag_window <- function(flag, n) {
  zoo::rollapply(flag, width = (2 * n) + 1, partial = T, FUN = max) == 1
}

df %>%
  mutate(include = ifelse(flag == 1, 1,
                   ifelse(is_within_flag_window(flag, 2), 0,
                   1)))
+1
source

Source: https://habr.com/ru/post/1685707/


All Articles