Less clumsy restructuring of anscombe data

I tried to use ggplot2for building an embedded dataset anscombein R (which contains four different small data sets with the same correlations, but fundamentally different relations between X and Y). My attempts to modify the data correctly were pretty ugly. I used a combination reshape2and base R; Hadleyverse 2 solution ( tidyr/ dplyr) or data.tablewould be perfect with me, but the ideal solution would be

  • short / non-repeating code
  • understandable (somewhat contrary to criterion No. 1)
  • implies as little hard coding of column numbers, etc.

Source format:

 anscombe
 ##     x1 x2 x3 x4    y1   y2   y3     y4
 ##  1  10 10 10  8  8.04 9.14  7.46  6.58
 ##  2   8  8  8  8  6.95 8.14  6.77  5.76
 ##  3  13 13 13  8  7.58 8.74 12.74  7.71
 ## ...
 ## 11   5  5  5  8  5.68 4.74  5.73  6.89

Desired format:

 ##    s  x    y
 ## 1  1 10 8.04
 ## 2  1  8 6.95
 ## ...
 ## 44 4  8 6.89

Here is my attempt:

 library("reshape2")
 ff <- function(x,v) 
     setNames(transform(
        melt(as.matrix(x)),
             v1=substr(Var2,1,1),
             v2=substr(Var2,2,2))[,c(3,5)],
          c(v,"s"))
 f1 <- ff(anscombe[,1:4],"x")
 f2 <- ff(anscombe[,5:8],"y")
 f12 <- cbind(f1,f2)[,c("s","x","y")]

Now the plot:

 library("ggplot2"); theme_set(theme_classic())
 th_clean <- 
  theme(panel.margin=grid::unit(0,"lines"),
    axis.ticks.x=element_blank(),
    axis.text.x=element_blank(),
    axis.ticks.y=element_blank(),
    axis.text.y=element_blank()
    )
ggplot(f12,aes(x,y))+geom_point()+
  facet_wrap(~s)+labs(x="",y="")+
  th_clean

enter image description here

+4
3

anscombe, , @Thela reshape .

:

1: R

" ", , - :

myReshape <- function(indf = anscombe, stubs = c("x", "y")) {
  temp <- sapply(stubs, function(x) {
    unlist(indf[grep(x, names(indf))], use.names = FALSE)
  })
  s <- rep(seq_along(grep(stubs[1], names(indf))), each = nrow(indf))
  data.frame(s, temp)
}

:

  • , , , .
  • , " " (, "x", "y".)

2: "dplyr" + "tidyr"

, :

library(dplyr)
library(tidyr)

anscombe %>%
  gather(var, val, everything()) %>%
  extract(var, into = c("variable", "s"), "(.)(.)") %>% 
  group_by(variable, s) %>%
  mutate(ind = sequence(n())) %>%
  spread(variable, val)

:

  • , , , , .
  • .

3: "splitstackshape"

, @Arun melt.data.table, merged.stack splitstackshape. :

library(splitstackshape)
setnames(
  merged.stack(
    data.table(anscombe, keep.rownames = TRUE), 
               var.stubs = c("x", "y"), sep = "var.stubs"), 
  ".time_1", "s")[]

:

  • merged.stack - "id", , data.table(anscombe, keep.rownames = TRUE), "rn"
  • sep = "var.stubs" , , , "time"
  • merged.stack , . , anscombe2 <- anscombe[1:7] "anscombe".
  • reshape, reshape, . , merged.stack. Reshape(data.table(anscombe, keep.rownames = TRUE), var.stubs = c("x", "y"), sep = ""), "time" setnames.

4: melt.data.table

, . R reshape , :

library(data.table)
melt(as.data.table(anscombe), 
     measure.vars = patterns(c("x", "y")), 
     value.name=c('x', 'y'), 
     variable.name = "s")

:

  • .
  • , "splitstackshape" reshape; -)
  • .
+8

, : 1) 2) 3) . - .

reshape(anscombe, varying=TRUE, sep="", direction="long", timevar="s")

#     s  x     y id
#1.1  1 10  8.04  1
#...
#11.1 1  5  5.68 11
#1.2  2 10  9.14  1
#...
#11.2 2  5  4.74 11
#1.3  3 10  7.46  1
#...
#11.3 3  5  5.73 11
#1.4  4  8  6.58  1
#...
#11.4 4  8  6.89 11
+6

I don’t know if the solution is acceptable without changes, but you are here:

library(data.table)
#create the pattern that will have the Xs
#this will make it easy to create the Ys
pattern <- 1:4
#use Map to create a list of data.frames with the needed columns
#and also use rbindlist to rbind the list produced by Map
lists <- rbindlist(Map(data.frame, 
                       pattern,
                       anscombe[pattern], 
                       anscombe[pattern+length(pattern)]
                       )
                   )
#set the correct names
setnames(lists, names(lists), c('s','x','y')) 

Output:

> lists
    s  x     y
 1: 1 10  8.04
 2: 1  8  6.95
 3: 1 13  7.58
 4: 1  9  8.81
 5: 1 11  8.33
 6: 1 14  9.96
 7: 1  6  7.24
 8: 1  4  4.26
 9: 1 12 10.84
10: 1  7  4.82
....
+3
source

Source: https://habr.com/ru/post/1614964/


All Articles