Tracking which group fails in the dplyr chain

Question

Tracking which group fails in the dplyr chain

How to find out which group failed when used group_byin a type chain dplyr. Take for example:

library(dplyr)

data(iris)

iris %>%
  group_by(Species) %>%
  do(mod=lm(Petal.Length ~ Petal.Width, data = .)) %>%
  mutate(Slope = summary(mod)$coeff[2])

It works great. Now, if I add some problem data to iris:

iris$Petal.Width[iris$Species=="versicolor"]= NA

Thus, when trying to run a linear model, it does not work:

iris_sub <- iris[iris$Species=="versicolor",]
lm(Petal.Length ~ Petal.Width, data = iris_sub)

But if I approached this blind with a massive dataset, if I did:

iris %>%
  group_by(Species) %>%
  do(mod=lm(Petal.Length ~ Petal.Width, data = .)) %>%
  mutate(Slope = summary(mod)$coeff[2])

This error message will not help me find out at what level the model error is:

Error in lm.fit (x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases

I could use a loop as shown below. This, at least, allows me to find out at what level the Speciesfunction does not work. However, I would prefer to use the dplyr setting:

lmdf <- c()
for (i in unique(iris$Species)) {
  cat(i, "\n")
  u <- iris %>%
    filter(Species==i) %>%
    do(mod=lm(Petal.Length ~ Petal.Width, data = .))
  lmdf = rbind(lmdf, u)
}

? , dplyr, , .

tryCatch, , . :

tryCatch (lm (v3 ~ v4, df), error = if (e $message == all_na_msg) default else stop (e)): object 'e' not found

+4

r dplyr

boshek 16 . '17 18:02

2

dplyr, split-apply try R. :

# use split to make a list of data sets by group (here, species)
iris.split <- split(iris, iris$Species)

# iterate your modeling function over that list, using 'try' to let the
# process keep running when an error is thrown and logging an object of
#class "try-error" in that slot on the resulting list
iris.mods <- lapply(iris.split, function(i) try(lm(Petal.Length ~ Petal.Width, data = i)))

# get a vector of slopes from those models with NA where any errors killed
# the modeling process
slopes <- sapply(iris.mods, function(x) ifelse(is(x, "try-error"), NA, x$coefficients[2]))

:

> slopes
    setosa versicolor  virginica 
 0.5464903         NA  0.6472593

+2

ulfelder 16 . '17 18:46

Axeman · Accepted Answer · 2017-02-16T19:23:05+0000

purrr::safely:

Prep

library(tidyverse)
data(iris)
iris$Petal.Width[iris$Species == "versicolor"] <-  NA

(.. 0 (non-NA) cases), :

iris %>%
  group_by(Species) %>%
  do(mod = safely(lm)(Petal.Length ~ Petal.Width, data = .)$result) %>% 
  mutate(Slope = ifelse(!is.null(mod), summary(mod)$coeff[2], NA))

!

Source: local data frame [3 x 3]
Groups: <by row>

# A tibble: 3 × 3
     Species      mod     Slope
      <fctr>   <list>     <dbl>
1     setosa <S3: lm> 0.5464903
2 versicolor   <NULL>        NA
3  virginica <S3: lm> 0.6472593

, ( NULL , Slope ). , - , ( ).

,

step1 <- iris %>%
  group_by(Species) %>%
  do(res = safely(lm)(Petal.Length ~ Petal.Width, data = .)) %>%
  mutate(err = map(list(res), 'error'),
         mod = map(list(res), 'result'))

, list , , . , ungroup .

, ( ) , :

filter(step1, !is.null(err))

, filter :

step1 %>% 
  filter(is.null(err)) %>% 
  mutate(Slope = summary(mod)$coeff[2])

broom, .

Tracking which group fails in the dplyr chain

Prep

,

More articles: