R - reorder columns based on matching (pattern)

So, I have a large dataset that looks like this:

     V1       V2   V3         V4
1 Sleep Domestic  Eat Child Care
2 Sleep Domestic  Eat       Paid
3 Sleep Domestic  Eat Child Care
4 Sleep      Eat Paid       <NA>

What I would like to do is reorder“template” based columns

["Sleep", "Eat", "Domestic", "Paid", "Child care"] 

To receive (exit)

   V1    V2       V3      V4            V5
Sleep   Eat Domestic      NA    Child Care
Sleep   Eat Domestic    Paid            NA
Sleep   Eat Domestic      NA    Child Care
Sleep   Eat       NA    Paid            NA

So, in columns 1, Sleepcolumns 2 Eat, ...

I don’t know where to start. Any ideas?

<strong> data

x = structure(list(V1 = c("Sleep", "Sleep", "Sleep", "Sleep"), V2 = c("Domestic", 
"Domestic", "Domestic", "Eat"), V3 = c("Eat", "Eat", "Eat", "Paid"
), V4 = c("Child Care", "Paid", "Child Care", NA)), .Names = c("V1", 
"V2", "V3", "V4"), row.names = c(NA, 4L), class = "data.frame")

template = c('Sleep', 'Eat', 'Domestic', 'Paid', 'Child care')
+4
source share
3 answers

Here is an option with tidyverse

library(dplyr)
library(tidyr)
library(tibble)
rownames_to_column(x, 'id') %>% 
       gather(Var, Val, -id, na.rm = TRUE) %>% 
       mutate(Var = factor(Val, levels = template)) %>% 
       spread(Var, Val) %>% 
       select(-id) %>% 
       setNames(., paste0("V", seq_along(template)))
#     V1  V2       V3   V4         V5
#1 Sleep Eat Domestic <NA> Child Care
#2 Sleep Eat Domestic Paid       <NA>
#3 Sleep Eat Domestic <NA> Child Care
#4 Sleep Eat     <NA> Paid       <NA>
+2
source

Check rowSumsfor each value template, and then reconnect it:

template <- c("Sleep", "Eat", "Domestic", "Paid", "Child Care")
# i've fixed this template so the case matches the values for 'Child Care'

data.frame(lapply(
  setNames(template, seq_along(template)),
  function(v) c(NA,v)[(rowSums(x==v,na.rm=TRUE)>0)+1]
))

#     X1  X2       X3   X4         X5
#1 Sleep Eat Domestic <NA> Child Care
#2 Sleep Eat Domestic Paid       <NA>
#3 Sleep Eat Domestic <NA> Child Care
#4 Sleep Eat     <NA> Paid       <NA>

Or an alternative with pmax:

data.frame(
  lapply(
    setNames(template, seq_along(template)), 
    function(v) do.call(pmax, c(replace(x, x != v,NA),na.rm=TRUE)) 
  )
)
+3
source

reshape2 dplyr. , , . , ( ), .

library(reshape2)
library(dplyr)

# make and id column 
x$id <- row.names(x)

# make a tall result id, var, value
tall <- x %>% 
  melt(id.vars="id") %>%
  select(id, value) 

# make an ordered factor with the template
tall$value <- factor(tall$value, levels=template, ordered = TRUE) 

# make wide result with dcast
result <-  tall %>%  
  filter(!is.na(value)) %>%  # drop the NAs 
  mutate(var = value) %>%    # name the column the same as the value
  dcast(id ~ var)            # make into wide format

result
#  id Sleep Eat Domestic Paid Child Care
#1  1 Sleep Eat Domestic <NA> Child Care
#2  2 Sleep Eat Domestic Paid       <NA>
#3  3 Sleep Eat Domestic <NA> Child Care
#4  4 Sleep Eat     <NA> Paid       <NA>
+2

Source: https://habr.com/ru/post/1667505/


All Articles