Converting interval time data to hourly average representation in LESS TIME

I have a data file containing readings taken with a duration of 30 seconds. File Organization:

> head(dframe)
            timestamp    power
1 2015-08-01 00:00:04 584.1379
2 2015-08-01 00:00:34 585.8087
3 2015-08-01 00:01:04 584.9335
4 2015-08-01 00:01:34 584.4366
5 2015-08-01 00:02:04 584.2829

Now, to present the 30 second duration data in an hourly average, I use the following command R:

df = aggregate(list(power=dframe$power),by=list(timestamp=cut(as.POSIXct(dframe$timestamp),"hour")),mean) 

It works great. But the actual problem is related to the time required for large files (data of one year). Can I somehow reduce the time needed for the conversion process? In other words, is there any other better alternative that takes less time to convert second data to hourly average data in R?

UPDATE:   4 , @akrun @Joshua.

dframe<-read.csv(path,head=TRUE,sep=",")
dframe$timestamp<- as.POSIXct(dframe$timestamp)
xframe = dframe
#using aggregate
system.time(
df1<- aggregate(list(power=dframe$power),by=list(timestamp=cut(dframe$timestamp,"hour")),mean)
)
# using data.table
system.time(
dfx<-setDT(dframe)[, list(power= mean(power)) ,(timestamp= cut(timestamp, 'hour'))]
)
# using dplyr
system.time( 
xframe %>% group_by(timestamp= cut(timestamp, 'hour')) %>% summarise(power=mean(power))
)
#using xts
system.time({
  x <- xts(dframe$power,dframe$timestamp)
  h <- period.apply(x, endpoints(x, "hours"), mean)
  h <- data.frame(timestamp=trunc(index(h),'hours'), power=coredata(h))
})

, ( , ) : :

Method       user  system elapsed 
Aggregate    0.137   0.005   0.142
data.table   0.031   0.001   0.032 
dplyr        0.035   0.001   0.036  
xts          0.053   0.000   0.053  

:

Aggregate    0.456   0.019   0.475 
data.table   0.099   0.002   0.102  
dplyr        0.099   0.004   0.103  
xts          0.158   0.004   0.161

. , xts, POSIXct Factor. , timestamp , . , POSIXct, xts , - data.table.

DATASET

+4
2

aggregate . data.table . 'data.frame' 'data.table' (setDT(dframe)), cut "timestamp" mean "power".

library(data.table)
setDT(dframe)[, list(power= mean(power)) ,(timestamp= cut(as.POSIXct(timestamp), 'hour'))]
+3

xts.

# sample data
set.seed(21)
N <- 2e6
dframe <- data.frame(timestamp=seq(Sys.time(), by="30 sec", length.out=N),
                     power=rnorm(N))
# aggregate
system.time(a <- aggregate(list(power=dframe$power),by=list(timestamp=cut(dframe$timestamp,"hour")), mean))
#    user  system elapsed 
#   2.456   0.000   2.457 

# xts
system.time({
  x <- xts(dframe$power, dframe$timestamp)
  h <- period.apply(x, endpoints(x, "hours"), mean)
  h <- data.frame(timestamp=trunc(index(h),'hours'), power=coredata(h))
})
#    user  system elapsed 
#   0.888   0.004   0.893 
+4

Source: https://habr.com/ru/post/1612182/


All Articles