Find the previous value in the vector and apply certain conditions

For each value, myvector1I would like to know the value mycategoryfor the immediate previous value in myvector1, given that it is mystatuson; otherwise, I would look for the corresponding next value as long as it is on.

The instructions look like this:

  • For this "myvector" position, give me your value.
  • Find the position of the intermediate previous value in "Myvector"
  • Check asssociated status. If it is on, give me "My_category". If it is turned off, go to step 2 again.
  • Assign the resulting "mycategory" to the new "mysolution" vector.

Given the dataset mydfI'm looking for DesiredSolution(which I filled out manually).

mydf <- structure(list(myvector1 = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 
2L, 4L, 5L, 2L, 3L, 4L, 5L, 2L, 3L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 
2L, 4L, 5L, 1L, 1L, 2L, 3L, 4L, 5L, 3L), .Label = c("0", "1", 
"2", "3", "4"), class = "factor"), mystatus = structure(c(2L, 
1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("OFF", 
"ON"), class = "factor"), mycategory = structure(c(2L, 2L, 3L, 
1L, 1L, 1L, 1L, 3L, 3L, 1L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L), .Label = c("bye", 
"hi", "stay"), class = "factor"), DesiredSolution = structure(c(3L, 
3L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 1L, 4L, 4L, 4L, 1L, 2L, 4L, 1L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L), .Label = c("bye", 
"hi", "NA", "stay"), class = "factor")), .Names = c("myvector1", 
"mystatus", "mycategory", "DesiredSolution"), row.names = c(NA, 
-32L), class = "data.frame")
+4
3

. , .

DesiredSolution = vector()
for (i in 1:length(myvector1)) {
  step_1 = myvector1[i]
  step_2 = grep(step_1, myvector1)
  step_2 = step_2[step_2 < i]
  step_2a = sort(step_2, decreasing = T)

  if (length(step_2) != 0) {
    for (d in 1:length(step_2a)) {
      k = step_2a[d]
      step_3 = mystatus[k]
      if (step_3 == 'ON') {
        step_4 = mycategory[k]
        break
      } else if (step_3 == 'OFF' & d == length(step_2a)) {
        step_4 = NA
      }
    }
  } else {
      step_4 = NA
  }
  DesiredSolution = c(DesiredSolution, step_4)

}
+1

data.table...

library(data.table)
setDT(mydf)
mydf[, r := .I]
mydf[, v := mydf[mystatus == "ON"][mydf, on=.(r < r, myvector1), mult="last", x.mycategory]]

    myvector1 mystatus mycategory DesiredSolution  r    v
 1:         0       ON         hi              NA  1   NA
 2:         1      OFF         hi              NA  2   NA
 3:         2       ON       stay              NA  3   NA
 4:         3       ON        bye              NA  4   NA
 5:         4      OFF        bye              NA  5   NA
 6:         0       ON        bye              hi  6   hi
 7:         1       ON        bye              NA  7   NA
 8:         3       ON       stay             bye  8  bye
 9:         4       ON       stay              NA  9   NA
10:         1      OFF        bye             bye 10  bye
11:         2       ON         hi            stay 11 stay
12:         3       ON         hi            stay 12 stay
13:         4       ON       stay            stay 13 stay
14:         1      OFF        bye             bye 14  bye
15:         2       ON        bye              hi 15   hi
16:         4       ON        bye            stay 16 stay
17:         0       ON        bye             bye 17  bye
18:         1      OFF        bye             bye 18  bye
19:         2       ON         hi             bye 19  bye
20:         3       ON         hi              hi 20   hi
21:         4      OFF       stay             bye 21  bye
22:         0      OFF        bye             bye 22  bye
23:         1       ON        bye             bye 23  bye
24:         3      OFF        bye              hi 24   hi
25:         4       ON        bye             bye 25  bye
26:         0      OFF        bye             bye 26  bye
27:         0      OFF         hi             bye 27  bye
28:         1      OFF         hi             bye 28  bye
29:         2      OFF         hi              hi 29   hi
30:         3      OFF         hi              hi 30   hi
31:         4      OFF       stay             bye 31  bye
32:         2       ON       stay              hi 32   hi
    myvector1 mystatus mycategory DesiredSolution  r    v

: mydf[mystatus == "ON"], r myvector1 . mycategory, , .

+4

"" data.frame zoo::na.locf

,

mydf$pos <- seq_len(nrow(mydf))

split data.frame myvector1:

spl_mydf <- split(mydf, mydf$myvector1)

Then apply to each “slice” (only one value myvector1) a function that saves only the values mycategorywhen it is mystatusturned on, add the others as NAand replace NAwith the previous value NA. The first element NA, and you will not save the last element (to get the desired lag).

my_out <- lapply(spl_mydf,
                 function(sl_df){
                    out <- sl_df$mycategory
                    out[sl_df$mystatus=="OFF"] <- NA
                    data.frame(pos=sl_df$pos, 
                               out=c(NA, head(na.locf(as.character(out), na.rm=FALSE), -1))) # as.character is to avoid getting the factors levels
 })

Finally, return everything according to the position and suppress the pos column:

out <- do.call(rbind, my_out)
mydf$output <- out$out[order(out$pos)]
mydf$pos <- NULL

head(mydf, 10)
   myvector1 mystatus mycategory DesiredSolution output
1          0       ON         hi              NA   <NA>
2          1      OFF         hi              NA   <NA>
3          2       ON       stay              NA   <NA>
4          3       ON        bye              NA   <NA>
5          4      OFF        bye              NA   <NA>
6          0       ON        bye              hi     hi
7          1       ON        bye              NA   <NA>
8          3       ON       stay             bye    bye
9          4       ON       stay              NA   <NA>
10         1      OFF        bye             bye    bye

check everything is fine:

all(mydf$DesiredSolution==mydf$output, na.rm=TRUE) # TRUE
all((as.character(mydf$DesiredSolution)=="NA")==is.na(mydf$output)) # TRUE 

(NA is considered one of the levels in your data.frame file)

+3
source

Source: https://habr.com/ru/post/1690791/


All Articles