Ggplot stat_summary_bin crash?

I was pleased to discover that ggplot has straightened scatterplots that are useful for exploring and visualizing relationships in big data. However, the top drawer seems to be misbehaving. Here's an example: all middle bins are roughly linearly aligned, as it should be, but the top is disabled in both dimensions:

enter image description here

the code:

library(ggplot2)

# simulate an example of linear data 
set.seed(1)
N <- 10^4
x <- runif(N)
y <- x + rnorm(N)
dt <- data.frame(x=x, y=y)

ggplot(dt, aes(x, y)) + 
  geom_point(alpha = 0.1, size = 0.01) +
  stat_summary_bin(fun.y='mean', bins=10, color='orange', size=5, geom='point')

Is there an easy way (and where should this be posted)?

+4
source share
2 answers

stat_summary_bin x , bin = NA. . -, , , .

, , stat_summary_bin, . ggplot_build , ggplot .

p1 = ggplot(dt, aes(x, y)) + 
  geom_point(alpha = 0.1, size = 0.01) +
  stat_summary_bin(fun.y=mean, bins=10, size=5, geom='text',
                   aes(label=..y..)) +
  stat_summary_bin(fun.y=length, bins=10, size=5, geom='text',
                   aes(label=..y.., y=0)) 

p1b = ggplot_build(p1)

mean length, . 9 11 ( ). Bin 11 - "" , , 2 ( label - 2 ) -0.1309998, . .

p1b$data[[2]][9:11,c(1,2,4,6,7)]
        label bin          y         x      width
9   0.8158320   9  0.8158320 0.8498505 0.09998242
10  0.9235531  10  0.9235531 0.9498329 0.09998242
11 -0.1309998  11 -0.1309998 1.0498154 0.09998244
p1b$data[[3]][9:11,c(1,2,4,6,7)]
   label bin    y         x      width
9   1025   9 1025 0.8498505 0.09998242
10  1042  10 1042 0.9498329 0.09998242
11     2  11    2 1.0498154 0.09998244

? , :

mean(dt[order(-dt$x), "y"][1:2]) 
[1] -0.1309998

, stat_summary_bin , x.

, , . , , . dplyr, (%>%) " ":

library(dplyr)

ggplot(dt, aes(x, y)) + 
  geom_point(alpha = 0.1, size = 0.01) +
  stat_summary_bin(fun.y='mean', bins=10, color='orange', size=5, geom='point') +
  geom_point(data=dt %>% 
               group_by(bins=cut(x,breaks=seq(min(x),max(x),length.out=11), include.lowest=TRUE)) %>%
               summarise(x=mean(x), y=mean(y)),
             aes(x,y), size=3, color="blue") +
  theme_bw()

enter image description here

+3

@eipi10 , .

, scale_x_continuous , "NA" bin .

ggplot(dt, aes(x, y)) + 
  geom_point(alpha = 0.1, size = 0.01) +
  stat_summary_bin(fun.y='mean', bins=10, color='orange', size=5, geom='point') +
  scale_x_continuous(limits = range(x))

enter image description here

, , , , , . , , , , @eipi, .

+1

Source: https://habr.com/ru/post/1653611/


All Articles