Non-standard subset of data.

One of the quirks of a subset of a data frame is that you need to repeatedly type the name of this data frame when you mention columns. For example, the data frame is carsmentioned here 3 times:

cars[cars$speed == 4 & cars$dist < 10, ]
##   speed dist
## 1     4    2

The package data.tablesolves this.

library(data.table)
dt_cars <- as.data.table(cars)
dt_cars[speed == 4 & dist < 10]

As well as dplyr.

library(dplyr)
cars %>% filter(speed == 4, dist < 10)

I would like to know if there is a solution for standard data.frames data (i.e. does not resort to data.tableor dplyr).

I think I'm looking for something like

cars[MAGIC(speed == 4 & dist < 10), ]

or

MAGIC(cars[speed == 4 & dist < 10, ])

where MAGICto be determined.

I tried the following, but it gave me an error.

library(rlang)
cars[locally(speed == 4 & dist < 10), ]
# Error in locally(speed == 4 & dist < 10) : object 'speed' not found
+4
source share
5 answers

1) a subset This only requires to be carsmentioned once. Packages are not used.

subset(cars, speed == 4 & dist < 10)
##   speed dist
## 1     4    2

2) sqldf , dplyr data.table, , :

library(sqldf)

sqldf("select * from cars where speed = 4 and dist < 10")
##   speed dist
## 1     4    2

3) , , cars , ., . cars . .

. <- cars
.[.$speed == 4 & .$dist < 10, ]
##   speed dist
## 1     4    2

. <- cars
with(., .[speed == 4 & dist < 10, ])
##   speed dist
## 1     4    2

Bizarro Pipe: http://www.win-vector.com/blog/2017/01/using-the-bizarro-pipe-to-debug-magrittr-pipelines-in-r/

4) magrittr magrittr, . , magrittr %$%:

library(magrittr)

cars %$% .[speed == 4 & dist < 10, ]
##   speed dist
## 1     4    2
+7

subset - , . , R, subset, . , subset() (, ).

var : 40, 30. local() , . , subset 30 . 40 ( ).

var <- 40

local({
  var <- 30
  dfs <- list(mtcars, mtcars)
  lapply(dfs, subset, mpg > var)
})

#> [[1]]
#>  [1] mpg  cyl  disp hp   drat wt   qsec vs   am   gear carb
#> <0 rows> (or 0-length row.names)
#> 
#> [[2]]
#>  [1] mpg  cyl  disp hp   drat wt   qsec vs   am   gear carb
#> <0 rows> (or 0-length row.names)

, parent.frame(), subset(), lapply(), . , var 40.

( rlang package) . , , . base::subset.data.frame().

subset2 <- function (x, subset, select, drop = FALSE, ...) {
  r <- if (missing(subset))
    rep_len(TRUE, nrow(x))
  else {
    r <- rlang::eval_tidy(rlang::enquo(subset), x)
    if (!is.logical(r))
      stop("'subset' must be logical")
    r & !is.na(r)
  }
  vars <- if (missing(select))
    TRUE
  else {
    nl <- as.list(seq_along(x))
    names(nl) <- names(x)
    rlang::eval_tidy(rlang::enquo(select), nl)
  }
  x[r, vars, drop = drop]
}

base::subset.data.frame().

subset2(mtcars, gear > 4, disp:wt)
#>                 disp  hp drat    wt
#> Porsche 914-2  120.3  91 4.43 2.140
#> Lotus Europa    95.1 113 3.77 1.513
#> Ford Pantera L 351.0 264 4.22 3.170
#> Ferrari Dino   145.0 175 3.62 2.770
#> Maserati Bora  301.0 335 3.54 3.570

subset2() . 30 var, .

local({
  var <- 30
  dfs <- list(mtcars, mtcars)
  lapply(dfs, subset2, mpg > var)
})

#> [[1]]
#>                 mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Fiat 128       32.4   4 78.7  66 4.08 2.200 19.47  1  1    4    1
#> Honda Civic    30.4   4 75.7  52 4.93 1.615 18.52  1  1    4    2
#> Toyota Corolla 33.9   4 71.1  65 4.22 1.835 19.90  1  1    4    1
#> Lotus Europa   30.4   4 95.1 113 3.77 1.513 16.90  1  1    5    2
#> 
#> [[2]]
#>                 mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Fiat 128       32.4   4 78.7  66 4.08 2.200 19.47  1  1    4    1
#> Honda Civic    30.4   4 75.7  52 4.93 1.615 18.52  1  1    4    2
#> Toyota Corolla 33.9   4 71.1  65 4.22 1.835 19.90  1  1    4    1
#> Lotus Europa   30.4   4 95.1 113 3.77 1.513 16.90  1  1    5    2

, , .

, , . , , . !

. (hygienic fexprs), rlang vignette on tidy evaluation dplyr vignette.

+3

, , :):

with(cars, data.frame(speed=speed,dist=dist)[speed == 4 & dist < 10,])
#   speed dist
# 1     4    2

:

`[` <- function(x,i,j){
  rm(`[`,envir = parent.frame())
  eval(parse(text=paste0("with(x,x[",deparse(substitute(i)),",])")))
  }
cars[speed == 4 & dist < 10, ]

#   speed dist
# 1     4    2
+2

[ data.frame. i, , data.frame.

##### override subsetting method
`[.data.frame` = function (x, i, j, ...) {
    if(!missing(i) && (is.language(i) || is.symbol(i) || inherits(i, "formula"))) {
        if(inherits(i, "formula")) i = as.list(i)[[2]] 
        i = eval(i, x, enclos = baseenv())
    } 
    base::`[.data.frame`(x, i, j, ...)
}

#####

data(cars)
cars[cars$speed == 4 & cars$dist < 10, ]
#     speed dist
# 1     4    2

# cars[speed == 4 & dist < 10, ] # error

cars[quote(speed == 4 & dist < 10),] 
#     speed dist
# 1     4    2


# ,or
cars[~ speed == 4 & dist < 10,]
#     speed dist
# 1     4    2

. R, :

locally = function(expr){
    curr_call = as.list(sys.call(1))
    if(as.character(curr_call[[1]])=="["){
        possibly_df = eval(curr_call[[2]], parent.frame())
        if(is.data.frame(possibly_df)){
            expr = substitute(expr)
            expr = eval(expr, possibly_df, enclos = baseenv())
        }
    }
    expr
}

cars[locally(speed == 4 & dist < 10), ]
#     speed dist
# 1     4    2
+2

attach()

attach(cars)
cars[speed == 4 & dist < 10,]
#   speed dist
# 1     4    2

R, attach(), , , , .

0

Source: https://habr.com/ru/post/1689090/


All Articles