3 , , , ( ) SO.
:
"" data.frame R/dplyr?
, "" data.frame, , -, -.
- "" data.frame R/dplyr?
- , ?
"" R/dplyr?
, , . , , , , .
:
- , ,
- ,
- ,
: , . , . , , "" (№ 2 № 3 ). , , .
data.frames. ( , dplyr, , ).
, dplyr::select_if:
yelpBusinessData %>%
dplyr::select_if(purrr::negate(is.data.frame)) %>%
dplyr::filter(city == 'Phoenix')
dplyr , , data.frames (, attribute s). ..
"" data.frame R/dplyr?
"" , flatten .
attributes, jsonlite::flatten - :
yelpBusinessData %>%
dplyr::select_if(purrr::negate(is.data.frame)) %>%
dplyr::bind_cols(jsonlite::flatten(yelpBusinessData$attributes, recursive = T)) %>%
dplyr::filter(city == 'Phoenix') %>%
dplyr::filter(grepl("Restaurants", categories)) %>%
dplyr::filter(Price_Range == 4)
hours, , -. hours data.frame data.frame ("" ""). purrr:map , data.frame .
hours <-
yelpBusinessData$hours %>%
purrr::map(. %>%
dplyr::transmute(hours = stringr::str_c(open, close, sep = ' - ')) %>%
unlist()) %>%
tibble::as_tibble()
data.frame data.frame :
> str(hours)
Classes ‘tbl_df, ‘tbl and 'data.frame': 61184 obs. of 7 variables:
$ Tuesday : chr "08:00 - 17:00" NA NA "10:00 - 21:00" ...
$ Friday : chr "08:00 - 17:00" NA NA "10:00 - 21:00" ...
$ Monday : chr "08:00 - 17:00" NA NA "10:00 - 21:00" ...
$ Wednesday: chr "08:00 - 17:00" NA NA "10:00 - 21:00" ...
$ Thursday : chr "08:00 - 17:00" NA NA "10:00 - 21:00" ...
$ Sunday : chr NA NA NA "11:00 - 18:00" ...
$ Saturday : chr NA NA NA "10:00 - 21:00" ...
map2_dfc ( bind_cols bind_cols ), :
hours <- yelpBusinessData$hours %>%
purrr::map2_dfc(.x = .,
.y = names(.),
.f = ~ .x %>%
dplyr::rename_all(funs(stringr::str_c(.y, ., sep = '_'))))
data.frame :
> str(hours)
'data.frame': 61184 obs. of 14 variables:
$ Tuesday_close : chr "17:00" NA NA "21:00" ...
$ Tuesday_open : chr "08:00" NA NA "10:00" ...
$ Friday_close : chr "17:00" NA NA "21:00" ...
$ Friday_open : chr "08:00" NA NA "10:00" ...
$ Monday_close : chr "17:00" NA NA "21:00" ...
$ Monday_open : chr "08:00" NA NA "10:00" ...
$ Wednesday_close: chr "17:00" NA NA "21:00" ...
$ Wednesday_open : chr "08:00" NA NA "10:00" ...
$ Thursday_close : chr "17:00" NA NA "21:00" ...
$ Thursday_open : chr "08:00" NA NA "10:00" ...
$ Sunday_close : chr NA NA NA "18:00" ...
$ Sunday_open : chr NA NA NA "11:00" ...
$ Saturday_close : chr NA NA NA "21:00" ...
$ Saturday_open : chr NA NA NA "10:00" ...
, , "" , :
> purrr::flatten_dfr(yelpBusinessData$hours, .id = 'day')
# A tibble: 61,184 x 3
day close open
<chr> <chr> <chr>
1 1 NA NA
2 1 NA NA
3 1 NA NA
4 1 21:00 10:00
5 1 16:00 10:00
6 1 NA NA
7 1 NA NA
8 1 NA NA
9 1 NA NA
10 1 02:00 08:00
# ... with 61,174 more rows
, ?
, . data.frame R , data.frame of data.frames. .
, , data.frame . purrr, , / , data.frame .
, simplify ( ) data.frame, jsonlite. , . .
:
## read in yelpBusinessData without converting to data.frame
yelpBusinessData2 <- fromJSON(sprintf("[%s]",
paste(readLines(yelpBusinessDataFilePath),
collapse = ",")),
flatten = FALSE,
simplify = FALSE)
# filter to Phoenix cities _before_ converting to a data.frame
> yelpBusinessData2 %>%
purrr::keep(~ .$'city' == 'Phoenix'
&& stringr::str_detect(.$categories, pattern = 'Restaurants')) %>%
jsonlite:::simplify(., flatten = T) %>%
dplyr::select(business_id, full_address, contains('kids')) %>%
str()
'data.frame': 8410 obs. of 5 variables:
$ business_id : chr "vcNAWiLM4dR7D2nwwJ7nCA" "x5Mv61CnZLohZWxfCVCPTQ" "2ZnCITVa0abGce4gZ6RhIw" "EmzaQR5hQlF0WIl24NxAZA" ...
$ full_address : chr "4840 E Indian School Rd\nSte 101\nPhoenix, AZ 85018" "2819 N Central Ave\nPhoenix, AZ 85004" "1850 N Central Ave\nPhoenix, AZ 85004" "132 E Washington St\nPhoenix, AZ 85004" ...
$ attributes.Good for Kids : logi NA FALSE TRUE FALSE NA NA ...
$ attributes.Good For Kids : logi NA NA NA NA NA NA ...
$ attributes.Hair Types Specialized In.kids: logi NA NA NA NA NA NA ...
, , - , R, clean_names() . , Excel.