How to lag an integer variable using R?

Let's say I have the following historical league results:

Season <- c(1,1,2,2,3,3,4,4,5,5)
Team <- c("Diverpool","Deverton","Diverpool","Deverton","Diverpool","Deverton","Diverpool","Deverton","Diverpool","Deverton")
End.Rank <- c(8,17,4,15,3,6,4,16,3,17)
PLRank <- cbind(Season,Team,End.Rank)

I want to (efficiently) create a variable in one year for each team based on two criteria:

  • lag End.Rankon Season(i.e. t-1 s Seasonas a temporary variable)
  • separately on command (Deverton lagged behind End.RankDiverpool lagged End.Rank)

Essentially, I would like the result to be as follows:

l.End.Rank <- c(NA,NA,8,17,4,15,3,6,4,16)

Tried lag()and lost while trying to do this in a loop for()at the moment.

+4
source share
1 answer

You can try one of the following ...

, data.frame matrix, cbind:

PLRank <- data.frame(Season, Team, End.Rank)

"data.table":

library(data.table)
setDT(PLRank)[, l.End.Rank := shift(End.Rank), by = .(Team)][]
#     Season      Team End.Rank l.End.Rank
#  1:      1 Diverpool        8         NA
#  2:      1  Deverton       17         NA
#  3:      2 Diverpool        4          8
#  4:      2  Deverton       15         17
#  5:      3 Diverpool        3          4
#  6:      3  Deverton        6         15
#  7:      4 Diverpool        4          3
#  8:      4  Deverton       16          6
#  9:      5 Diverpool        3          4
# 10:      5  Deverton       17         16

"dplyr":

library(dplyr)
PLRank %>%
  group_by(Team) %>%
  mutate(l.End.Rank = lag(End.Rank))
# Source: local data frame [10 x 4]
# Groups: Team [2]
# 
#    Season      Team End.Rank l.End.Rank
#     (dbl)    (fctr)    (dbl)      (dbl)
# 1       1 Diverpool        8         NA
# 2       1  Deverton       17         NA
# 3       2 Diverpool        4          8
# 4       2  Deverton       15         17
# 5       3 Diverpool        3          4
# 6       3  Deverton        6         15
# 7       4 Diverpool        4          3
# 8       4  Deverton       16          6
# 9       5 Diverpool        3          4
# 10      5  Deverton       17         16

, , .

, , , . .

:

dcast "data.table" "End.Rank" "Team". .

library(data.table)
teams <- as.character(unique(PLRank$Team))
dcast(as.data.table(PLRank), Season ~ Team, value.var = "End.Rank")[
  , (teams) := lapply(.SD, shift), .SDcols = teams][]
#    Season Deverton Diverpool
# 1:      1       NA        NA
# 2:      2       17         8
# 3:      3       15         4
# 4:      4        6         3
# 5:      5       16         4

, , , - :

dcast(as.data.table(PLRank)[, ind := sequence(.N), by = Season], 
      Season ~ ind, value.var = c("Team", "End.Rank"))[
        , c("End.Rank_1", "End.Rank_2") := lapply(.SD, shift), 
        .SDcols = c("End.Rank_1", "End.Rank_2")][]
#    Season    Team_1   Team_2 End.Rank_1 End.Rank_2
# 1:      1 Diverpool Deverton         NA         NA
# 2:      2 Diverpool Deverton          8         17
# 3:      3 Diverpool Deverton          4         15
# 4:      4 Diverpool Deverton          3          6
# 5:      5 Diverpool Deverton          4         16

"dplyr" . , "tidyr".

library(dplyr)
library(tidyr)
PLRank %>%
  spread(Team, End.Rank) %>%
  mutate_each(funs(lag), -Season)
#   Season Deverton Diverpool
# 1      1       NA        NA
# 2      2       17         8
# 3      3       15         4
# 4      4        6         3
# 5      5       16         4
+2

Source: https://habr.com/ru/post/1622454/


All Articles