Calculate the duration of a time interval for a given period

I have a data frame with start time and length (in seconds):

dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC","2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC","2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC","2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC","2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC","2010-04-24 07:55:19 UTC")),length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))

> dates
                 start length
1  2010-04-03 03:02:38   3600
2  2010-04-03 06:03:14    300
3  2010-04-20 03:05:52    900
4  2010-04-20 03:17:42   3600
5  2010-04-21 03:09:38    300
6  2010-04-21 07:10:14    900
7  2010-04-21 08:12:52   3600
8  2010-04-23 03:13:42    300
9  2010-04-23 03:25:42    900
10 2010-04-23 03:36:38   3600
11 2010-04-23 08:58:14    300
12 2010-04-24 03:21:52    900
13 2010-04-24 03:22:42   3600
14 2010-04-24 07:24:19    300
15 2010-04-24 07:55:19    900

I need to find the total duration (duration) for the period from 2010-04-02 00:00:00 to 2010-04-21 09:00:00, and for the period from 2010-04-23 03: from 15:00 to 2010 -04-24 08:00:00.

The tricky part is that the specified length can go beyond the end of the specified period, and I do not want to count on this extra duration.

I expect to receive:

  • 12,428 seconds for 2010-04-02 00:00:00 to 2010-04-21 09:00:00
  • 10103 seconds for 2010-04-23 03:15:00 to 2010-04-24 08:00:00

I thought of using lubridateand determining the interval for each line, and then summing the durations, but I cannot figure it out.

+4
source
3

, . length . , , , , ( ). , 7 2010-04-21 09:00:00. !

, , , , . , , . , , !

dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC","2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC","2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC","2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC","2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC","2010-04-24 07:55:19 UTC")),length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))
library(dplyr)
library(lubridate)

length_within <- function(tbl, interval_start, interval_end){
  intv_start = as.POSIXct(interval_start)
  intv_end = as.POSIXct(interval_end)
  tbl %>%
    mutate(
      end = start + length,
      counted_start = ifelse(start < intv_start, intv_start, start),
      counted_end = ifelse(end > intv_end, intv_end, end),
      seconds = counted_end - counted_start
    ) %>%
    filter(seconds >= 0) %>%
    summarise(total = sum(seconds)) %>%
    `[[` (1)
}

length_within(dates,"2010-04-02 00:00:00", "2010-04-21 09:00:00")
#> [1] 12428
length_within(dates,"2010-04-23 03:15:00", "2010-04-24 08:00:00")
#> [1] 10103
+3

first last dplyr. first last sum 1- .

library(dplyr)
calculate_duration <- function(df, start_time, end_time){
  start_time <- as.POSIXct(start_time)
  end_time <- as.POSIXct(end_time)

  df %>% filter((start+length) >= start_time & start < end_time) %>%
    arrange(start) %>% 
    summarise(last_time = last(start) + last(length),
       first_time = first(start) + first(length),
       sum = sum(length) - 
       ifelse(last_time > end_time, 
             difftime(last_time, end_time, units = 'secs'), 0L) -
       ifelse(first(start) <  start_time, 
             difftime(start_time, first(start), units = 'secs'), 0L) ) %>%
    select(sum)

}

calculate_duration(dates,"2010-04-02 00:00:00", "2010-04-21 09:00:00")
#    sum
#1 12428

calculate_duration(dates,"2010-04-23 03:15:00", "2010-04-24 08:00:00")
#    sum
#1 10103


# Data

dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC",
"2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC",
"2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC",
"2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC",
"2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC",
"2010-04-24 07:55:19 UTC")),
length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))
+2

Here is an example:

library(lubridate)

t0 <- as.POSIXct('2010-04-02 00:00:00')
t1 <- as.POSIXct('2010-04-21 09:00:00')

sum(dates$length[dates$start %within% interval(t0,t1)])
# [1] 13200
0
source

Source: https://habr.com/ru/post/1693754/


All Articles