I need to group and summarize a data frame using various summation functions depending on the variable that I am summing up. These functions can have different main and optional arguments, and I would like to code a function that can do all this.
Here are the simpler functions that I managed to code to show their logic.
require(tidyverse)
require(magrittr)
require(rlang)
example <- data.frame(y = as.factor(c('A','B','C','A','B')),
x1 = c(7, 10, NA, NA, 2),
x2 = c(13, 0, 0, 2, 1),
z = c(0, 1, 0, 1, 0))
do_summary_prefix <- function(dataset, y, prefix, fun, ...){
y <- enquo(y)
prefix <- quo_name(enquo(prefix))
fun <- match.fun(fun)
dataset %<>%
group_by(!!y) %>%
summarise_at(vars(starts_with(prefix)), funs(fun), ...) %>%
ungroup()
return(dataset)
}
do_summary_prefix(example, y, x, 'quantile', probs = 0.25, na.rm = T)
do_summary_x <- function(dataset, y, x, fun, ...){
y <- enquo(y)
x <- enquo(x)
dataset %<>%
group_by(!!y) %>%
summarise(!!paste(quo_name(x), fun, sep = '_') := do.call(match.fun(fun), list(x = !!x, ...))) %>%
ungroup()
return(dataset)
}
do_summary_x(example, y, x1, fun = 'mean', na.rm = F)
This is normal for me, and I could use it do_summary_xas a loop over the variables that I want to generalize in order to do the job. But I would like to integrate the loop into a higher level function, using ..., while still having the ability to use various parameters for my generic functions.
, ... , ( , ) , do.call. ... , , . , :
join_all <- plyr::join_all
do_summary <- function(dataset, y, ..., fun, other_args = list(NULL =
NULL)){
y_quo <- enquo(y)
y_name <- quo_name(y_quo)
values <- quos(...)
datasets <- lapply(values, function(value){
summarised_data <- dataset %>%
group_by(!!y_quo) %>%
summarise(calcul = do.call(fun,
unlist(list(list(x = !!value),
other_args),
recursive = F))) %>%
ungroup() %>%
rename(!!paste(quo_name(value), stat, sep = '_') := calcul)
return(summarised_data)
})
finished <- join_all(datasets, by = y_name, type = 'left')
return(finished)
}
do_summary(example, y,
x1, x2, z,
stat = 'quantile',
other_args = list(probs = 0.1, na.rm = T))
do_summary(example, y,
x1, x2, z,
fun = 'mean')
, , , x.
, fun, x . ?
, < <24 > - changing_arg = !!x, list(!!changing_arg := !!x)