Saving Regression Residues in the Original Data Frame

I cannot comment directly on the page in question, but essentially I'm trying to get the code from Combining dplyr :: do () with dplyr :: mutate? work.

dat <- mtcars

dat %>% 
    group_by(gear) %>% 
    mutate(res = residuals(lm(deparse(substitute(mpg ~ disp)))))

By running the above code, I get:

"Error in eval(substitute(expr), envir, enclos) : object 'mpg' not found"

Did I miss something?

+4
source share
2 answers

There are many options here, including modelr::add_residuals(see @LmW answer), broom::augmentand plain old residuals. If you work with grouped models, nested models in the list column are convenient and, of course, lead to iteration over the list of models to calculate residues, etc.


residuals

R purrr ( lapply, ):

library(tidyverse)

mtcars %>% 
    rownames_to_column('car') %>% 
    nest(-gear) %>% 
    mutate(model = map(data, ~lm(mpg ~ disp, data = .x)),
           resid = map(model, residuals)) %>%
    unnest(data, resid)

#> # A tibble: 32 × 13
#>     gear       resid            car   mpg   cyl  disp    hp  drat    wt
#>    <dbl>       <dbl>          <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1      4  0.98649891      Mazda RX4  21.0     6 160.0   110  3.90 2.620
#> 2      4  0.98649891  Mazda RX4 Wag  21.0     6 160.0   110  3.90 2.875
#> 3      4 -3.56856040     Datsun 710  22.8     4 108.0    93  3.85 2.320
#> 4      4  2.76107028      Merc 240D  24.4     4 146.7    62  3.69 3.190
#> 5      4  0.44001547       Merc 230  22.8     4 140.8    95  3.92 3.150
#> 6      4  0.11531527       Merc 280  19.2     6 167.6   123  3.92 3.440
#> 7      4 -1.28468473      Merc 280C  17.8     6 167.6   123  3.92 3.440
#> 8      4  2.45060811       Fiat 128  32.4     4  78.7    66  4.08 2.200
#> 9      4  0.08397007    Honda Civic  30.4     4  75.7    52  4.93 1.615
#> 10     4  3.02179175 Toyota Corolla  33.9     4  71.1    65  4.22 1.835
#> # ... with 22 more rows, and 4 more variables: qsec <dbl>, vs <dbl>,
#> #   am <dbl>, carb <dbl>

lm residuals:

mtcars %>% 
    rownames_to_column('car') %>% 
    group_by(gear) %>% 
    mutate(resid = residuals(lm(mpg ~ disp)))

, , , - . (, , , , , .)


broom::augment

augment , , :

mtcars %>% 
    rownames_to_column('car') %>%
    nest(-gear) %>% 
    mutate(model = map(data, ~lm(mpg ~ disp, data = .x)), 
           model_data = map(model, broom::augment)) %>% 
    unnest(model_data)

#> # A tibble: 32 × 10
#>     gear   mpg  disp  .fitted   .se.fit      .resid       .hat   .sigma
#>    <dbl> <dbl> <dbl>    <dbl>     <dbl>       <dbl>      <dbl>    <dbl>
#> 1      4  21.0 160.0 20.01350 0.9758770  0.98649891 0.16546553 2.503083
#> 2      4  21.0 160.0 20.01350 0.9758770  0.98649891 0.16546553 2.503083
#> 3      4  22.8 108.0 26.36856 0.7466989 -3.56856040 0.09687426 2.197330
#> 4      4  24.4 146.7 21.63893 0.8206560  2.76107028 0.11701449 2.331455
#> 5      4  22.8 140.8 22.35998 0.7674126  0.44001547 0.10232345 2.524090
#> 6      4  19.2 167.6 19.08468 1.0800836  0.11531527 0.20268993 2.528466
#> 7      4  17.8 167.6 19.08468 1.0800836 -1.28468473 0.20268993 2.482941
#> 8      4  32.4  78.7 29.94939 1.0762841  2.45060811 0.20126638 2.357875
#> 9      4  30.4  75.7 30.31603 1.1195513  0.08397007 0.21777368 2.528634
#> 10     4  33.9  71.1 30.87821 1.1879209  3.02179175 0.24518417 2.247410
#> # ... with 22 more rows, and 2 more variables: .cooksd <dbl>,
#> #   .std.resid <dbl>

, model_data model_data = map2(model, data, broom::augment)), augment a data , , .

+6

modelr::add_residuals() , :

require(tidyverse)
require(modelr)

models <- mtcars %>% 
    group_by(gear) %>% 
    nest() %>%
    mutate(model = map(data, ~lm(mpg ~ disp, data = .)),
           residuals = map2(data, model, add_residuals))

models %>% unnest(residuals)

# A tibble: 32 × 12
    gear   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1      4  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4
2      4  21.0     6 160.0   110  3.90 2.875 17.02     0     1     4
3      4  22.8     4 108.0    93  3.85 2.320 18.61     1     1     1
4      4  24.4     4 146.7    62  3.69 3.190 20.00     1     0     2
5      4  22.8     4 140.8    95  3.92 3.150 22.90     1     0     2
6      4  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4
7      4  17.8     6 167.6   123  3.92 3.440 18.90     1     0     4
8      4  32.4     4  78.7    66  4.08 2.200 19.47     1     1     1
9      4  30.4     4  75.7    52  4.93 1.615 18.52     1     1     2
10     4  33.9     4  71.1    65  4.22 1.835 19.90     1     1     1
# ... with 22 more rows, and 1 more variables: resid <dbl>

modelr; .

+4

Source: https://habr.com/ru/post/1665809/


All Articles