Total based on certain conditions

This is my data frame:

       X   Y  Date   Qty  CumSumA  CumSumB
    1  A   B   1/1     1        1        0
    2  A   A   1/1     2        3        2
    3  A   E   1/1     2        5        2
    4  B   A   1/1     1        1        1
    5  B   B   1/1     3        4        4
    6  B   C   1/1     2        6        4
    7  C   D   1/1     2        2        2
    8  C   E   1/1     4        6        2
    9  C   A   1/1     1        7        2
   10  A   C   1/2     2        2        0
   11  A   D   1/2     3        5        0
   12  A   E   1/2     2        7        0
   13  B   A   1/2     5        5        0
   14  B   B   1/2     1        6        1
   15  B   C   1/2     2        8        1
   16  C   D   1/2     2        2        4
   17  C   E   1/2     1        1        4
   18  C   A   1/2     3        4        4

I get a CumSumA column with

library(dplyr)
data <- data %>% 
        group_by(Date,X) %>% 
        mutate(CumSumA= cumsum(Qty)) 

How can I get a CumSumB column in such a way that this is the cumulative sum Qtyfor all of the above rows that have (a) the same value Dateand (b) the value of the same row Xin the column Y.

So, for example, line 16 has a Xvalue of C and a Datevalue of 1/2. I want to get the cumulative sum of Qtyall rows with a Yvalue of C and a value of Date1/2. So it will be lines 10 plus 15, so CumSumB is 2 + 2 = 4.

Note that for columns X and Y there are more than 140 unique variables.

+4
source share
2

data.table allow.cartesian=TRUE

require(data.table)
setDT(DT)

data.table, X, .

DT_X <- DT[,.(X,Y, Date, indx = .I)]
setkey(DT_X, Date, X)

X DT

DT[,`:=`(X=NULL, indy = .I)]
setkey(DT, Date, Y)

, X = Y ( allow.cartesian=TRUE). DT_join, . . X [Y] data.tables, ?,

DT_join <- DT_X[DT, allow.cartesian=TRUE]

indy<=indx - , " ", .

DT_join[!is.na(Y), .(CumSumB=sum(Qty * (indy<=indx))), by=.(X,Y,Date)]

( aosmith): by=.(X,Y,Date) by=indx

:

    X Y Date CumSumB
 1: A B  1/1       0
 2: A A  1/1       2
 3: A E  1/1       2
 4: B A  1/1       1
 5: B B  1/1       4
 6: B C  1/1       4
 7: C D  1/1       2
 8: C E  1/1       2
 9: C A  1/1       2
10: A C  1/2       0
11: A D  1/2       0
12: A E  1/2       0
13: B A  1/2       0
14: B B  1/2       1
15: B C  1/2       1
16: C D  1/2       4
17: C E  1/2       4
18: C A  1/2       4
+3

dplyr, , @Floo0. , .

. CumSumB .

library(dplyr)

dat = dat %>% mutate(row = row_number())

, X Y Date. , X (.. left_join).

row , row.x, X row.y, Y.

dat %>% 
    left_join(select(dat, X, Date, Y, row), ., by  = c("X" = "Y", "Date" = "Date"))

, row.x, Qty, row.x, row.y.

dat %>% 
    left_join(select(dat, X, Date, Y, row), ., by  = c("X" = "Y", "Date" = "Date")) %>%
    group_by(row.x) %>%
    summarise(CumSumB = sum(Qty[row.y <= row.x]))

, . - , , select(-row) .

dat %>% 
    left_join(select(dat, X, Date, Y, row), ., by  = c("X" = "Y", "Date" = "Date")) %>%
    group_by(row.x) %>%
    summarise(CumSumB = sum(Qty[row.y <= row.x])) %>%
    left_join(dat, ., by = c("row" = "row.x"))

   X Y Date Qty CumSumA CumSumB.x row CumSumB.y
1  A B  1/1   1       1         0   1         0
2  A A  1/1   2       3         2   2         2
3  A E  1/1   2       5         2   3         2
4  B A  1/1   1       1         1   4         1
5  B B  1/1   3       4         4   5         4
6  B C  1/1   2       6         4   6         4
7  C D  1/1   2       2         2   7         2
8  C E  1/1   4       6         2   8         2
9  C A  1/1   1       7         2   9         2
10 A C  1/2   2       2         0  10         0
11 A D  1/2   3       5         0  11         0
12 A E  1/2   2       7         0  12         0
13 B A  1/2   5       5         0  13         0
14 B B  1/2   1       6         1  14         1
15 B C  1/2   2       8         1  15         1
16 C D  1/2   2       2         4  16         4
17 C E  1/2   1       1         4  17         4
18 C A  1/2   3       4         4  18         4
+2

Source: https://habr.com/ru/post/1606577/


All Articles