R: Aggregation of history by identifier and given data

I asked a similar question before and got a lot of help: R: Aggregating history by ID by date

The difference was that for the previous post it was interesting for me to combine ALL historical information, but now I hope to indicate only 90 days ago.

Here is an example of how my data might look:

strDates <- c("09/09/16", "5/7/16", "5/6/16", "2/13/16", "2/11/16","1/7/16",
          "11/8/16","6/8/16", "5/8/16","2/13/16","1/3/16", "1/1/16")
Date<-as.Date(strDates, "%m/%d/%y")
ID <- c("A", "A", "A", "A","A", "A", "B","B","B","B","B", "B")
Event <- c(1,0,1,0,1,1, 0,1,1,1,0, 1)
sample_df <- data.frame(Date,ID,Event)

Like the conclusion:

enter image description here

Background Information

I want to save all the attached information to the meeting, but then summarize the following historical information by identifier for up to 90 days.

  • Number of previous meetings in the last 90 days
  • The number of previous events over the past 90 days.

Example

As an example, consider line 2.

2 - A, 3-6 ( Row 2 Encounter). , 3,4,5 90 , 6 - .

90 . 2: 3

90 2: 2 (5/6/16 2/11/16)

:

enter image description here

+4
4

data.table, . equi, v 1.10.0 by = .EACHI,

library(data.table) #v1.10.0
setDT(sample_df)[, Date2 := Date - 90] # Set range (Maybe in future this could be avoided)
sample_df[sample_df, # Binary join with itself
          .(Enc90D = .N, Ev90D = sum(Event, na.rm = TRUE)), # Make calculations
          on = .(ID = ID, Date < Date, Date > Date2), # Join by
          by = .EACHI] # Do calculations per each match
#     ID       Date       Date Enc90D Ev90D
#  1:  A 2016-09-09 2016-06-11      0     0
#  2:  A 2016-05-07 2016-02-07      3     2
#  3:  A 2016-05-06 2016-02-06      2     1
#  4:  A 2016-02-13 2015-11-15      2     2
#  5:  A 2016-02-11 2015-11-13      1     1
#  6:  A 2016-01-07 2015-10-09      0     0
#  7:  B 2016-11-08 2016-08-10      0     0
#  8:  B 2016-06-08 2016-03-10      1     1
#  9:  B 2016-05-08 2016-02-08      1     1
# 10:  B 2016-02-13 2015-11-15      2     1
# 11:  B 2016-01-03 2015-10-05      1     1
# 12:  B 2016-01-01 2015-10-03      0     0
+9

dplyr, do ( ) rowwise ( Date Date .$Date Date ):

sample_df %>% 
    group_by(ID) %>% 
    do(rowwise(.) %>% 
        mutate(PrevEnc90D = sum(Date - .$Date < 90 & Date - .$Date > 0), 
               PrevEvent90D = sum(.$Event[Date - .$Date < 90 & Date - .$Date > 0])))

#Source: local data frame [12 x 5]
#Groups: ID [2]

#         Date     ID Event PrevEnc90D PrevEvent90D
#       <date> <fctr> <dbl>      <int>        <dbl>
#1  2016-09-09      A     1          0            0
#2  2016-05-07      A     0          3            2
#3  2016-05-06      A     1          2            1
#4  2016-02-13      A     0          2            2
#5  2016-02-11      A     1          1            1
#6  2016-01-07      A     1          0            0
#7  2016-11-08      B     0          0            0
#8  2016-06-08      B     1          1            1
#9  2016-05-08      B     1          1            1
#10 2016-02-13      B     1          2            1
#11 2016-01-03      B     0          1            1
#12 2016-01-01      B     1          0            0
+2

dplyr, , . , , . , .

library(dplyr)

dates <- data.frame(Date = seq(from = -90 + min(sample_df$Date), to = max(sample_df$Date), by=1)) 
extended_df <- data.frame(ID = unique(sample_df$ID)) %>%
  merge(dates) %>% 
  left_join(sample_df, by=(c("ID", "Date"))) %>% 
  arrange(ID, desc(Date)) %>%
  mutate(Encounter = as.integer(!is.na(Event)),
         Event = ifelse(is.na(Event), 0, Event)) %>%
  group_by(ID) %>%
  mutate(PrevEnc90D   = rollsum(lead(Encounter), k=90, fill=0, align="left"),
        PrevEvent90D  = rollsum(lead(Event),     k=90, fill=0, align="left")) %>%
  inner_join(sample_df[,c("ID", "Date")]) %>%
  arrange(ID, desc(Date))

extended_df

: [12 x 6] : ID [2]

       ID       Date Event Encounter PrevEnc90D PrevEvent90D
   <fctr>     <date> <dbl>     <int>      <dbl>        <dbl>
1       A 2016-09-09     1         1          0            0
2       A 2016-05-07     0         1          3            2
3       A 2016-05-06     1         1          2            1
4       A 2016-02-13     0         1          2            2
5       A 2016-02-11     1         1          1            1
6       A 2016-01-07     1         1          0            0
7       B 2016-11-08     0         1          0            0
8       B 2016-06-08     1         1          1            1
9       B 2016-05-08     1         1          1            1
10      B 2016-02-13     1         1          2            1
11      B 2016-01-03     0         1          1            1
12      B 2016-01-01     1         1          0            0
+2

, , :

do.call(rbind, 
        lapply(split(sample_df, sample_df$ID), 
               function(x) {
                   i = nrow(x) - findInterval(x$Date - 90, rev(x$Date))
                   cs = cumsum(x$Event)
                   cbind(x, PrevEnc90D = i - (1:nrow(x)), PrevEvent90D = cs[i] - cs)
               }))
#           Date ID Event PrevEnc90D PrevEvent90D
#A.1  2016-09-09  A     1          0            0
#A.2  2016-05-07  A     0          3            2
#A.3  2016-05-06  A     1          2            1
#A.4  2016-02-13  A     0          2            2
#A.5  2016-02-11  A     1          1            1
#A.6  2016-01-07  A     1          0            0
#B.7  2016-11-08  B     0          0            0
#B.8  2016-06-08  B     1          1            1
#B.9  2016-05-08  B     1          1            1
#B.10 2016-02-13  B     1          2            1
#B.11 2016-01-03  B     0          1            1
#B.12 2016-01-01  B     1          0            0

, "" "" ( , ). , (i) 90- , (ii) , (iii) / cumsum . split/lapply , "ID", , , .

+1

Source: https://habr.com/ru/post/1666175/


All Articles