How to find a correlation for every n lines in R?

Question

How to find a correlation for every n lines in R?

Below is my data frame lanec:

read.table(textConnection(scan(,character(),sep="\n")))
   vehicle.id frame.id svel PrecVehVel
1           2        1   55         59
2           2        2   55         59
3           2        3   53         57
4           2        4   50         54
5           2        5   48         52
6           3        3   49         53
7           3        4   55         59
8           3        5   55         59
9           3        6   43         47
10          3        7   45         49
11          3        8   52         56
12          3        9   50         54
13          4        1   38         42
14          4        2   42         46
15          4        3   45         49
16          4        4   48         52
17          4        5   50         54
18          4        6   52         56
19          4        7   55         59
20          5        6   49         53
21          5        7   52         56
22          5        8   54         58
23          5        9   58         62
24          5       10   60         64
25          5       11   63         67
26          5       12   70         74

<Carriage return>

I want to find a correlation corbetween sveland PrecVehVel(vehicle speed and previous vehicle speed, respectively) on vehicle.idfor every three lines, but for consecutive lines. This means that in the data frame lanecfor vehicle.id==2R, the correlation between

   svel PrecVehVel
1    55         59
2    55         59
3    53         57

svel (55,55,53) and PrecVehVel (59,59,57), then start again from the second row and find the correlation between

   svel PrecVehVel
2    55         59
3    53         57
4    50         54

svel (55,53,50) and PrecVehVel (59,57,54) etc.

The result should be something like this:

vehicle.id     frames     speed.cor
2               1 - 3     1
2               2 - 4     1
2               3 - 5     1
2               4 - 5     1

, frames 2 , , 2. , R, :

ddply(lanec, 'vehicle.id', summarize, speed.cor = cor(svel, PrecVehVel) )

, vehicle.id

+4

r

umair durrani 15 '14 21:18

3

, rollapply zoo:

library(zoo)
ddply(lanec, 'vehicle.id', function(dat){
  dat$speed.cor = rollapply(data = dat, width = 3,
                            FUN = function(x) cor(x[ , "svel"], x[ , "PrecVehVel"]),
                            by.column = FALSE, fill = NA)
  dat
})

, 3. , "2 ".

OP. . . , .

ddply(lanec, 'vehicle.id', function(dat){
  speed.cor <- rollapply(data = dat, width = 3,
                         FUN = function(x) cor(x[ , "svel"], x[ , "PrecVehVel"]),
                         by.column = FALSE)
  len <- length(speed.cor)
  vehicle.id <- head(dat$vehicle.id, len)
  first.frame.id <- head(dat$frame.id, len)
  last.frame.id <- tail(dat$frame.id, len)
  data.frame(vehicle.id, first.frame.id, last.frame.id, speed.cor)
})

#    vehicle.id first.frame.id last.frame.id speed.cor
# 1           2              1             3         1
# 2           2              2             4         1
# 3           2              3             5         1
# 4           3              3             5         1
# 5           3              4             6         1
# 6           3              5             7         1
# 7           3              6             8         1
# 8           3              7             9         1
# 9           4              1             3         1
# 10          4              2             4         1
# 11          4              3             5         1
# 12          4              4             6         1
# 13          4              5             7         1
# 14          5              6             8         1
# 15          5              7             9         1
# 16          5              8            10         1
# 17          5              9            11         1
# 18          5             10            12         1

+1

Henrik 15 '14 21:55

This makes the matrix from which rows should be used in each group:

> bandSparse(5,4,-2:0)
5 x 4 sparse Matrix of class "ngCMatrix"

[1,] | . . .
[2,] | | . .
[3,] | | | .
[4,] . | | |
[5,] . . | |

We can apply columns above it for each car:

> vehicle2 <- subset(lanec, vehicle.id == 2)
> apply(bandSparse(5,4,-2:0), 2, function(i) list(r= cor(vehicle2[i,3:4]) ) )

Then we can do this for each car:

by(lanec vehicle$vehicle.id, function(x) apply(bandSparse(nrow(x),nrow(x)-1,-2:0), 2, function(i) list(r= cor(x[i,3:4]) ) ) )

0

Neal fultz May 15, '14 at 22:01

source share

Mike.Gahan · Accepted Answer · 2014-05-15T22:34:11+0000

. , R- "" . data.table (, - ). parallel, , . mc.cores=1 , Mac Linux, , , .

lanec <- structure(list(vehicle.id = c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L), frame.id = c(1L, 2L, 3L, 4L, 5L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L), svel = c(55, 75, 53, 50, 32, 49, 55, 55, 43, 45, 52, 50, 
38, 42, 45, 48, 50, 52, 55, 49, 52, 54, 58, 60, 63, 70), PrecVehVel = c(59, 
59, 57, 54, 52, 53, 59, 59, 47, 49, 56, 54, 42, 46, 49, 52, 54, 
56, 59, 53, 56, 58, 62, 64, 67, 74)), .Names = c("vehicle.id", 
"frame.id", "svel", "PrecVehVel"), class = "data.frame", row.names = c(NA, 
-26L))

#Load data.table package
require("data.table")
require("parallel")
data <- data.table(lanec)

#What length of correlation vector do you want?
cor.vec <- 2

##Assign each customer an ID
data[,ID:=.GRP,by=c("vehicle.id")]

##Group values at the list level
Ref <- data[,list(frame.id=list(I(frame.id)),svel.list=list(I(svel)),PrecVehVel.list=list(I(PrecVehVel))),by=list(vehicle.id)]

#Calculate rolling calculation
data$Roll.Corr <- mcmapply(FUN = function(RD, NUM) {

  #mcmapply is a multicore version of mapply. If running Linux or Mac, you can up the amount of cores and have the code run faster

  #d is the difference between the current frame.id and all other frame.id within the   same vehicle id.
  d <- (Ref$frame.id[[NUM]] - RD)

  #The following checks whether d is within the "window" you want. If not in the desired "window", then svel1 and prec1 will have zero values. If in desired "window", then its value will be the respective "svel" and "prec" value in original data.
  svel1 <- (d >= 0 & d <= cor.vec)*Ref$svel.list[[NUM]]
  prec1 <- (d >= 0 & d <= cor.vec)*Ref$PrecVehVel.list[[NUM]]

  #Following discards all data points not in sliding "window" (deletes all of the zeros)
  keep <- which(d >= 0 & d <= cor.vec)
  svel1 <- svel1[keep]
  prec1 <- prec1[keep]

  #Following makes sure a correlation value is only provided if the number of points within the window is larger than the correlation "window" length
  if (length(svel1)>cor.vec){
      cor(svel1,prec1) 
    } else {
      NA
    }   
}, RD = data$frame.id,NUM=data$ID,mc.cores=1)

#Print data
data[,frame.start:=ifelse(is.na(Roll.Corr),NA,frame.id)]
data[,frame.end:=ifelse(is.na(Roll.Corr),NA,frame.id+cor.vec)]
head(data,10)

    vehicle.id frame.id svel PrecVehVel ID Roll.Corr frame.start frame.end
 1:          2        1   55         59  1 0.5694948           1         3
 2:          2        2   75         59  1 0.8635894           2         4
 3:          2        3   53         57  1 0.8746393           3         5
 4:          2        4   50         54  1        NA          NA        NA
 5:          2        5   32         52  1        NA          NA        NA
 6:          3        3   49         53  2 1.0000000           3         5
 7:          3        4   55         59  2 1.0000000           4         6
 8:          3        5   55         59  2 1.0000000           5         7
 9:          3        6   43         47  2 1.0000000           6         8
10:          3        7   45         49  2 1.0000000           7         9

How to find a correlation for every n lines in R?

More articles: