Using dplyr :: group_by () to find the minimum dates with NA

I find the minimum date in the group. Many times the group included only missing dates (in this case, I would prefer something like that to NAbe assigned).

NAdisplayed correctly, but they do not respond is.na()as I expect. When the cell is displayed as NA, the output is is.na()unexpectedly FALSE.

library(magrittr)
ds_visit <- tibble::tribble(
  ~subject_id,                   ~date,
           1L,  as.Date("2017-01-01" ),
           1L,  as.Date("2017-02-01" ), 

           2L,  as.Date(NA_character_),        
           2L,  as.Date("2017-01-02" ),

           3L,  as.Date(NA_character_),        
           3L,  as.Date(NA_character_),   

           4L,  as.Date(NA_character_),        
           4L,  as.Date(NA_character_)       
)

ds_subject <- ds_visit %>% 
  # as.data.frame() %>% 
  dplyr::group_by(subject_id) %>% 
  dplyr::mutate(
    date_na     = is.na(date),          # Works as expected
    date_min    = min(date, na.rm=T),   # Works as expected

    date_min_na = is.na(date_min)       # Does NOT work as expected.
  ) %>% 
  dplyr::ungroup() # %>% as.data.frame() 

ds_visitlooks right. ds_subjectLooks correct for me, except for the last column.

ds_subject (the last four rows of the last column are unexpected.)

# A tibble: 8 x 5
  subject_id date       date_na date_min   date_min_na
       <int> <date>     <lgl>   <date>     <lgl>      
1          1 2017-01-01 F       2017-01-01 F          
2          1 2017-02-01 F       2017-01-01 F          
3          2 NA         T       2017-01-02 F          
4          2 2017-01-02 F       2017-01-02 F          
5          3 NA         T       NA         F         # Should be 'T'?
6          3 NA         T       NA         F         # Should be 'T'?
7          4 NA         T       NA         F         # Should be 'T'?
8          4 NA         T       NA         F         # Should be 'T'?

, : (a) , (b) R ( 3.4.3 ), (c) dplyr rlang ( CRAN GitHub) (d) tibble data.frame. ( ), , , .

( 3 4): , , Inf, NA. ( min(as.Date(NA), na.rm=T)).

1: In min.default(c(NA_real_, NA_real_), na.rm = TRUE) :
  no non-missing arguments to min; returning Inf
2: In min.default(c(NA_real_, NA_real_), na.rm = TRUE) :
  no non-missing arguments to min; returning Inf

. - , NA, .

> str(ds_subject$date_min)
 Date[1:8], format: "2017-01-01" "2017-01-01" "2017-01-02" "2017-01-02" NA NA NA NA

, - ? NA ?

1

: @eipi10 @mtoto . . , "NA" "Inf", .

, , base::min()?

, dplyr::mutate()/dplyr::summarize(), SQL. ( dplyr is.na(), summarize() mutate()).

:

"
  SELECT 
    subject_id,
    MIN(date) AS date_min
    --MIN(date) OVER (PARTITION BY subject_id) AS date_min --`OVER` not supported by sqlite
  FROM ds_visit
  GROUP BY subject_id
" %>% 
  sqldf::sqldf() %>% 
  tibble::as_tibble() %>% 
  dplyr::mutate(
    # date_min_na_1 = is.na(date_min), #Before conversion back to date (from numeric); same result as below.
    date_min      = as.Date(date_min, "1970-01-01"),
    date_min_na   = is.na(date_min)
  )

, NA, is.na():

# A tibble: 4 x 3
  subject_id date_min   date_min_na
       <int> <date>     <lgl>      
1          1 2017-01-01 F          
2          2 2017-01-02 F          
3          3 NA         T          
4          4 NA         T          

2

, R Inf, Date, NA. ( ), , .

NA, . base::min(). , base::min() /, SQL.

( @alistaire base:min() , / .)

+4
1

, min na.rm = TRUE all- NA Inf (max -Inf), print.Date , NA, .

min(NA, na.rm = TRUE)
#> Warning in min(NA, na.rm = TRUE): no non-missing arguments to min;
#> returning Inf
#> [1] Inf

x <- min(as.Date(NA), na.rm = TRUE)
#> Warning in min.default(structure(NA_real_, class = "Date"), na.rm = TRUE):
#> no non-missing arguments to min; returning Inf

x
#> [1] NA

is.na(x)
#> [1] FALSE

x == Inf
#> [1] TRUE

, , , , .

print.Date <- function(x, ...){
    if(x == Inf | x == -Inf) {
        print(as.numeric(x))
    } else {
        base::print.Date(x, ...)
    }
}

x
#> [1] Inf

, , , NA:

library(tidyverse)

ds_visit <- data_frame(subject_id = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L), 
                       date = as.Date(c("2017-01-01", "2017-02-01", NA, "2017-01-02", NA, NA, NA, NA)))

ds_visit %>% 
    group_by(subject_id) %>% 
    summarise(date_min = if(all(is.na(date))) NA else min(date, na.rm = TRUE), 
              date_min_na = is.na(date_min))
#> # A tibble: 4 x 3
#>   subject_id date_min   date_min_na
#>        <int> <date>     <lgl>      
#> 1          1 2017-01-01 F          
#> 2          2 2017-01-02 F          
#> 3          3 NA         T          
#> 4          4 NA         T

, .

+3

Source: https://habr.com/ru/post/1692791/


All Articles