How to create a plot that overlays data points with an unevenly distributed matrix?

I want to create a graph similar to the attached image, where these points lie on the matrix color chart:

Excel plot overlaying point chart over colored cells

I have data xand y. Then I created a matrix xy_bincountby counting the number of points in xand ythat lie in my bin xy combinations. The width of the hopper is uneven, as can be seen in the attached figure.

Would it be easier to create this graph in R, Matlab or Python?

Thanks for the help!

x<-c(2.56481, 2.11009, 1.72927, 1.47803, 1.74279, 3.29555, 3.66061, 2.63349, 2.43808, 2.13, 3.09267, 2.3555, 2.48811, 4.05344, 3.38401, 2.69907, 2.26378, 2.71978)
y<-c(-1.26044, 13.6098, 0.710325, -4.27657, 11.1908, -7.2431, -3.19167, 20.7423, 10.009, 32.12, 42.6192, 13.9598, -0.412724, -20.3846, -6.97259, -14.2046, 8.30859, 0.0386572)
xylabels<-c("A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R")

xy_bincount<-matrix(c(0, 0, 0, 6, 0, 0, 6, 12, 0, 0, 24, 6, 0, 0, 29, 0, 0, 0, 12, 6),nrow = 5, ncol = 4, byrow = TRUE)
+4
source share
1 answer

You can try

library(tidyverse)
y_breaks <-  c(-25,-15,-5,5,15, 55)
x_breaks <-  c(0,0.5,1.5,3, 4.5)
foo <- function(x) as.numeric(as.character(x))
tibble(x,y) %>% 
  mutate(y_bins=cut(y, breaks = y_breaks, labels = y_breaks[-1],include.lowest = T)) %>% 
  mutate(x_bins=cut(x, breaks = x_breaks , labels = x_breaks[-1], include.lowest = T)) %>% 
  add_count(y_bins, x_bins) %>% 
  mutate(percent=n/n()) %>% 
    ggplot(aes(x,y)) +
    geom_point() + 
    geom_text(data = . %>% 
                     select(y_bins , x_bins, percent) %>% 
                     complete(y_bins, x_bins, fill=list(percent=0)) %>% 
                     distinct(), 
              aes(x=foo(x_bins)-0.15, y=foo(y_bins)-2, label=scales::percent(percent)),
              color="red")+
    scale_x_continuous(breaks = x_breaks, limits = c(0,4.5), expand = c(0, 0), minor_breaks=NULL,position="top") +
    scale_y_reverse(breaks = y_breaks, limits = c(55,-25), expand = c(0, 0),minor_breaks=NULL) 

enter image description here

for rectangles, you can use this hard-coded solution.

# calculate the positions for the rectangle, e.g. xmin, ymin and xmax, ymax
df2 <- df1 %>% 
  select(y_bins , x_bins, percent) %>% 
  complete(y_bins, x_bins, fill=list(percent=0)) %>% 
  distinct() %>% 
  bind_cols(
    tibble(y_start=y_breaks[-6],
           y_end=y_breaks[-1]) %>% 
      bind_rows(.,.,.,.) %>% 
      arrange(y_start)    %>%  
      mutate(x_start=rep(x_breaks[-5],5),
             x_end=rep(x_breaks[-1],5))) %>% 
  mutate(percent_gr=as.numeric(gsub("%","",percent)))
# and the plot
df1 %>% 
  ggplot(aes(x,y)) +
  geom_rect(data = df2,
            aes(xmin=x_start, xmax=x_end, ymin=y_start, ymax=y_end, fill=percent_gr),
            alpha=0.8,inherit.aes = FALSE) +
  geom_point() + 
  geom_text(data = . %>% 
              select(y_bins , x_bins, percent) %>% 
              complete(y_bins, x_bins, fill=list(percent=0)) %>% 
              distinct(), 
              aes(x=foo(x_bins)-0.15, y=foo(y_bins)-2, label=percent))+
  scale_x_continuous(breaks = x_breaks, limits = c(0,4.5), expand = c(0, 0), minor_breaks=NULL,position="top") +
  scale_y_reverse(breaks = y_breaks, limits = c(55,-25), expand = c(0, 0), minor_breaks=NULL) +
  scale_fill_gradient(low = "white", high = "red") +
   theme_linedraw() 

enter image description here

And finally, you can try everything in one solution using geom_tile

tibble(x,y) %>% 
  mutate(y_bins=cut(y, breaks = y_breaks, labels = y_breaks[-1],include.lowest = T)) %>% 
  mutate(x_bins=cut(x, breaks = x_breaks , labels = x_breaks[-1], include.lowest = T)) %>% 
  add_count(y_bins, x_bins) %>% 
  mutate(percent=scales::percent(n/n())) %>% 
    ggplot(aes(x,y)) +
    geom_tile(data = . %>% 
                select(y_bins , x_bins, percent) %>% 
                complete(y_bins, x_bins, fill=list(percent=0)) %>% 
                distinct() %>% 
                group_by(y_bins) %>% 
                mutate(w=-(lag(foo(x_bins),default = 0)-foo(x_bins)),
                       x=foo(x_bins)-w/2) %>% 
                group_by(x_bins) %>% 
                arrange(x_bins) %>% 
                mutate(h=-(lag(foo(y_bins),default = -25)-foo(y_bins)),
                       y=foo(y_bins)-h/2) %>% 
                mutate(percent_gr=as.numeric(gsub("%","",percent))),
              aes(y=y, x=x,width=w,height=h, fill=percent_gr))+
    geom_point() + 
  geom_text(data = . %>% 
              select(y_bins , x_bins, percent) %>% 
              complete(y_bins, x_bins, fill=list(percent=0)) %>% 
              distinct(), 
            aes(x=foo(x_bins)-0.15, y=foo(y_bins)-2, label=percent))+
  scale_x_continuous(breaks = x_breaks, limits = c(0,4.5), expand = c(0, 0), minor_breaks=NULL,position="top") +
  scale_y_reverse(breaks = y_breaks, limits = c(55,-25), expand = c(0, 0),minor_breaks=NULL) +
  scale_fill_gradient(low = "white", high = "red") +
  theme_linedraw() 
+5

Source: https://habr.com/ru/post/1695107/


All Articles