R: row-wise dplyr :: mutate using a function that takes a row of a data frame and returns an integer

I am trying to use the mutate operator with a custom function. I looked at this somewhat similar SO publication , but in vain. Let's say I have such a data frame (where blobis some variable that is not related to a specific task, but is part of all the data):

df <- 
  data.frame(exclude=c('B','B','D'), 
             B=c(1,0,0), 
             C=c(3,4,9), 
             D=c(1,1,0), 
             blob=c('fd', 'fs', 'sa'), 
             stringsAsFactors = F)

I have a function that uses variable names, so select some based on the value in the column excludeand for example. calculates the sum of variables not specified in exclude(which is always the only character).

FUN <- function(df){
  sum(df[c('B', 'C', 'D')] [!names(df[c('B', 'C', 'D')]) %in% df['exclude']] )
}

( 1) FUN, C D (, exclude), 4:

FUN(df[1,])

( s). :

df %>% mutate(s=FUN(.))
df %>% group_by(1:n()) %>% mutate(s=FUN(.))

UPDATE :

df %>% rowwise(.) %>% mutate(s=FUN(.))

, dplyr mutate ( pipe):

df$s <- sapply(1:nrow(df), function(x) FUN(df[x,]))
+7
2

dplyr, rowwise FUN.

df %>% 
    rowwise %>% 
    do({
        result = as_data_frame(.)
        result$s = FUN(result)
        result
    })

group_by rowwise ( ), do mutate

df %>% 
    group_by(1:n()) %>% 
    do({
        result = as_data_frame(.)
        result$s = FUN(result)
        result
    })

mutate , , , FUN(df).

, , , , rowSums.

cols <- c('B', 'C', 'D')
include_mat <- outer(function(x, y) x != y, X = df$exclude, Y = cols)
# or outer(`!=`, X = df$exclude, Y = cols) if it more readable to you
df$s <- rowSums(df[cols] * include_mat)
+8

purrr

nest map_dbl :

library(tidyverse)
df %>% 
  rowwise %>% 
  nest(-blob) %>% 
  mutate(s = map_dbl(data, FUN)) %>% 
  unnest

. -, rowwise , .

, nest , , FUN ( tibbles vs data.frames!). rowwise, exclude:D

, map_dbl FUN . map_dbl map_* , (.. ).

unnest .

purrrlyr

purrrlyr "", dplyr purrr, by_row .

df FUN :

df %>% 
  by_row(..f = FUN, .to = "s", .collate = "cols")

! :

# tibble [3 x 6]
  exclude     B     C     D  blob     s
    <chr> <dbl> <dbl> <dbl> <chr> <dbl>
1       B     1     3     1    fd     4
2       B     0     4     1    fs     5
3       D     0     9     0    sa     9

, , :

  • ..f= ,
  • .to= , s
  • .collate= .collate , . FUN , "cols" "rows"

purrrlyr...


, by_row, ! purrr , . . microbenchmark:

library(microbenchmark)
mbm <- microbenchmark(
  purrr.test = df %>% rowwise %>% nest(-blob) %>% 
    mutate(s = map_dbl(data, FUN)) %>% unnest,
  purrrlyr.test = df %>% by_row(..f = FUN, .to = "s", .collate = "cols"),
  rowwise.test = df %>% 
    rowwise %>% 
    do({
      result = as_tibble(.)
      result$s = FUN(result)
      result
    }),
  group_by.test = df %>% 
    group_by(1:n()) %>% 
    do({
      result = as_tibble(.)
      result$s = FUN(result)
      result
    }),
  sapply.test = {df$s <- sapply(1:nrow(df), function(x) FUN(df[x,]))}, 
  times = 1000
)
autoplot(mbm)

enter image description here

, purrrlyr , do rowwise group_by(1:n()) (. @konvas), sapply. , , . purrr , , , , . .

+5

Source: https://habr.com/ru/post/1678173/


All Articles