In R, is it possible to include the same line in several groups, or is there another workaround?

I measured N20 flux from the soil at several points in time per day (uneven). I am trying to calculate the total N20 flux from the soil over several days by finding the area under the curve for a given day. I know how to do this using only measures from this day, however I would like to include the last measure of the previous day and the first measure of the next day to improve the curve estimate.

Here is an example to give a more specific idea:

library(MESS)
library(lubridate)
library(dplyr)

Create a reproducible example

datetime <- seq(ymd_hm('2015-04-07 11:20'),ymd('2015-04-13'), by = 'hours')
dat <- data.frame(datetime, day = day(datetime), Flux = rnorm(n = length(datetime), mean = 400, sd = 20))

useDate <- data.frame(day = c(7:12), DateGood = c("No", "Yes", "Yes", "No", "Yes", "No"))
  dat <- left_join(dat, useDate)

"" ( ), - "" (). , (), "" , .

  out <- dat %>%
      mutate(lagDateGood = lag(DateGood),
             leadDateGood = lead(DateGood)) %>%
      filter(lagDateGood != "No" | leadDateGood != "No")

-

out2 <- out %>%
    group_by(day) %>%
    mutate(hourOfday = hour(datetime) + minute(datetime)/60) %>%
    summarize(auc = auc(x = hourOfday, y = Flux, from = 0, to = 24, type = "spline"))

, AUC. , 10- , "" .

, . (, 8- AUC 8- 9- ). ? ?

0
2

, , . , . "out" :

#Now I need to calculate the area under the curve for each day
n <- nrow(out)
extract <- function(ix) out[seq(max(1, min(ix)-1), min(n, max(ix) + 1)), ]
res <- lapply(split(1:n, out$day), extract)

calcTotalFlux <- function(df) {
    if (nrow(df) < 10) {              # make sure the day has at least 10 measures
        NA
    } else {
    day_midnight <- floor_date(df$datetime[2], "day")
    df %>%
    mutate(time = datetime - day_midnight) %>%
    summarize(TotalFlux = auc(x = time, y = Flux, from = 0, to = 1440, type = "spline"))}
}

do.call("rbind",lapply(res, calcTotalFlux))

    TotalFlux
7         NA
8   585230.2
9   579017.3
10        NA
11  563689.7
12        NA
0

. @Alex Brown.

 # Another way
last <- out %>%
    group_by(day) %>%
    filter(datetime == max(datetime)) %>%
    ungroup() %>%
    mutate(day = day + 1)

first <- out %>%
    group_by(day) %>%
    filter(datetime == min(datetime)) %>%
    ungroup() %>%
    mutate(day = day - 1)

d <- rbind(out, last, first) %>%
    group_by(day) %>%
    arrange(datetime)

n_measures_per_day <- d %>%
    summarize(n = n())

d <- left_join(d, n_measures_per_day) %>%
    filter(n > 4)

TotalFluxDF <- d %>%
    mutate(timeAtMidnight = floor_date(datetime[3], "day"),
           time = datetime - timeAtMidnight) %>%
    summarize(auc = auc(x = time, y = Flux, from = 0, to = 1440, type = "spline"))

TotalFluxDF

Source: local data frame [3 x 2]

    day      auc
  (dbl)    (dbl)
1     8 585230.2
2     9 579017.3
3    11 563689.7
0

Source: https://habr.com/ru/post/1616435/


All Articles