R - add significance level to the correlation map

Question

R - add significance level to the correlation map

I have the following df data frame (attached)

I wrote a short script to build a correlation heat map

library(ggplot2) library(plyr) library(reshape2) library(gridExtra) #Load data frame df <- data.frame(read.csv("~/Documents/wig_cor.csv",sep="\t")) c = cor(df[sapply(df,is.numeric)]) #Plot all data plots <- dlply(df, .(Method), function (x1) { ggplot(melt(cor(x1[sapply(x1,is.numeric)])), aes(x=Var1,y=Var2,fill=value)) + geom_tile(aes(fill = value),colour = "white") + geom_text(aes(label = sprintf("%1.2f",value)), vjust = 1) + theme_bw() + theme(legend.position = 'none') + scale_fill_gradient2(midpoint=0.8,low = "white", high = "steelblue")}) #Plot by EF Analysis Method plots <- dlply(df, .(Method), function (x1) { ggplot(subset(melt(cor(x1[sapply(x1,is.numeric)]))[lower.tri(c),],Var1 != Var2), aes(x=Var1,y=Var2,fill=value)) + geom_tile(aes(fill = value),colour = "white") + geom_text(aes(label = sprintf("%1.2f",value)), vjust = 1) + theme_bw() + scale_fill_gradient2(name="R^2",midpoint=0.7,low = "white", high = "red") + xlab(NULL)+ylab(NULL) + theme(axis.text.x=element_blank(),axis.text.y=element_blank(), axis.ticks=element_blank(),panel.border=element_blank()) + ggtitle(x1$Method) + theme(plot.title = element_text(lineheight=1,face="bold")) + geom_text(data = subset(melt(cor(x1[sapply(x1,is.numeric)])),Var1==Var2),aes(label=Var1),vjust=3 ) }) #Function to grab legend g_legend<-function(a.gplot){ tmp <- ggplot_gtable(ggplot_build(a.gplot)) leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") legend <- tmp$grobs[[leg]] legend } legend <- g_legend(plots$WIG_Method) png(file = "/misc/croc_common/physics/jamie/Portfolio/WesternEF/EFCorrelations.png", width = 1200, height = 400) grid.arrange(legend,plots$Single_ROI+theme(legend.position='none'), plots$Simple_2_ROI+theme(legend.position='none'),plots$WIG_Method+theme(legend.position='none'), plots$WIG_drawn_bg+theme(legend.position='none'), ncol=5, nrow=1, widths=c(1/17,4/17,4/17,4/17,4/17)) dev.off()

However, I would like to use stars to emphasize the statistical significance of each correlation described here, but I completely lost it on how to do this. Any guidance

 structure(list(Study = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L), .Label = c("WCBP12236", "WCBP12241", "WCBP12242", "WCBP12243", "WCBP12245", "WCBP13001", "WCBP13002", "WCBP13003", "WCBP13004", "WCBP13005", "WCBP13006", "WCBP13007", "WCBP13008", "WCBP13009", "WCBP13010", "WCBP13011", "WCBP13012", "WCBP13013", "WCBP13014" ), class = "factor"), G1 = c(68, 68.6, 66.6, 73.1, 51.6, 50.1, 64.1, 73, 63.7, 43.2, 62.3, 59.2, 67.5, 68.2, 54.6, 67.9, 56.5, 54.2, 67.3, 68, 68.4, 67.9, 73.3, 51.7, 50.3, 63.9, 73.9, 64, 42.9, 62.5, 59.3, 66.7, 68.4, 54, 68.2, 56.8, 54.5, 67, 53.2, 41.4, 53, 52.3, 41, 37.4, 56.9, 65.3, 36.2, 35.3, 36.1, 32.5, 56.5, 47.7, 39.4, 59.6, 38.1, 24.2, 30.2, 68.5, 68.9, 70.7, 74.9, 53.4, 51.6, 65.9, 75.7, 64.7, 42.8, 61.4, 60.8, 69.5, 68.7, 55.9, 70.7, 59.5, 51.1, 69.5), G2 = c(79.8, 72.2, 73.5, 74.4, 50.4, 54.8, 63.1, 70.4, 63.6, 45.1, 65.3, 49.4, 65.3, 76.2, 51, 63.9, 58.7, 57.8, 67, 79.6, 72.1, 73.9, 74.7, 50.5, 55.1, 62.8, 70.5, 63.3, 44.6, 65.5, 48.9, 64.9, 76.3, 50.6, 64.8, 58.6, 58.3, 67.4, 51.2, 37.7, 49.1, 53.7, 44.6, 37.3, 54.9, 64.1, 33.8, 31.9, 34.2, 30.3, 56.2, 44.6, 38.2, 63.2, 35.8, 26.5, 27.6, 80.6, 71.6, 75.4, 77.1, 52.4, 56.3, 66, 72.3, 64.5, 38.2, 64.3, 49.2, 66.9, 77.1, 52.4, 67.5, 59.6, 55.6, 69.9), S1 = c(75.1, 65.9, 72.7, 68.8, 49, 57.5, 66.5, 74.1, 60.9, 51.8, 58, 64.3, 71.1, 71.4, 58.9, 62.2, 58, 57.7, 58.6, 75.2, 66, 73.2, 69.7, 48.9, 57.7, 66.5, 74.7, 60.8, 51.4, 58.9, 65.5, 70.5, 71.4, 58.9, 65.1, 60.8, 57.7, 58.4, 54.3, 40.2, 52.6, 60.5, 42.6, 34.1, 55, 64.7, 36.3, 32.5, 39, 38.8, 58.1, 48, 40.5, 61, 40, 26.4, 28.8, 76.4, 66.5, 73.9, 72, 50.7, 59.2, 69.9, 76.3, 62.4, 50, 58.5, 66.6, 73.7, 72.3, 62.6, 69.6, 62.7, 57.9, 61.1), S2 = c(76.6, 71.6, 71.2, 72.7, 51.6, 56.7, 65.9, 73.5, 63.6, 55.2, 62.6, 62.2, 69.1, 71.1, 56.8, 61, 61.7, 60, 55.7, 76.9, 71.6, 72.3, 73.2, 51.7, 56.8, 64.5, 74.9, 63.6, 51.3, 63, 62.8, 68.7, 71.3, 56.8, 64.2, 62.8, 60.4, 55.8, 53.6, 42.5, 50, 54.4, 42.2, 36.4, 57.7, 64.1, 35.1, 30.8, 39.1, 37.4, 58.7, 47.8, 42, 58.8, 39.4, 24.2, 28.2, 78.2, 73.3, 72.3, 75.6, 53.4, 57.8, 68.3, 76.6, 63.7, 51.7, 63.4, 63.3, 71.5, 72.3, 60.2, 67.1, 65.5, 58.2, 59.1), Method = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Simple_2_ROI", "Single_ROI", "WIG_drawn_bg", "WIG_Method"), class = "factor")), .Names = c("Study", "G1", "G2", "S1", "S2", "Method"), row.names = c(NA, -76L), class = "data.frame")

+4

r ggplot2

moadeep Jul 24 '13 at 10:51

source share

2 answers

Here is an example of one way to do this with ggplot. You essentially add stars of significance as symbols to the data frame and map them as text on the heat map: https://github.com/andrewheiss/Attitudes-in-the-Arab-World/blob/master/figure12.R

+6

Andrew Aug 17 '13 at 15:23

source share

jakub · Accepted Answer · 2013-08-17T13:40:08+0000

Useful function to get p values from the rcorr correlation rcorr from Hmisc . Using this, I got the following:

enter image description here

Each cell of the correlation matrix has a pair of numbers: the top represents the correlation coefficient (as well as the color gradient of the cell), and the bottom represents the p value. Is that what you wanted? (See the bottom of the answer for a better answer, whereby I convert p values to stars ...)

I have done the following:

Since your p values would be very small in this data frame, I used jitter and stripped the number of observations to reduce the statistical significance. The reason for this is that very low p values will be very difficult to read in this type of correlation matrix. Therefore, the result does not make much sense from a statistical point of view, but it perfectly demonstrates how significance levels can be added to the matrix.

First shiver and limit the number of observations:

 mydf=df mydf[,2:5] = sapply(mydf[,2:5],jitter,amount=20) mydf=mydf[c(1:5,20:24,39:43,58:62),]

Then calculate the coefficient r and p values:

 library(Hmisc) # calculate r c = rcorr(as.matrix(mydf[sapply(mydf,is.numeric)]))$r # calculate p values p = rcorr(as.matrix(mydf[sapply(mydf,is.numeric)]))$P

Make a graph based on both of these values:

 plots <- dlply(mydf, .(Method), function (x1) { ggplot(data.frame(subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$r)[lower.tri(c),],Var1 != Var2), pvalue=subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$P)[lower.tri(p),],Var1 != Var2)$value), aes(x=Var1,y=Var2,fill=value)) + geom_tile(aes(fill = value),colour = "white") + geom_text(aes(label = sprintf("%1.2f",value)), vjust = 0) + geom_text(aes(label = sprintf("%1.2f",pvalue)), vjust = 1) + theme_bw() + scale_fill_gradient2(name="R^2",midpoint=0.25,low = "blue", high = "red") + xlab(NULL) + ylab(NULL) + theme(axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), panel.border=element_blank()) + ggtitle(x1$Method) + theme(plot.title = element_text(lineheight=1,face="bold")) + geom_text(data = subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$r),Var1==Var2), aes(label=Var1),vjust=1 ) })

Show graph.

 grid.arrange(plots$Single_ROI + theme(legend.position='none'), plots$Simple_2_ROI + theme(legend.position='none'), plots$WIG_Method + theme(legend.position='none'), plots$WIG_drawn_bg + theme(legend.position='none'), ncol=2, nrow=2)

Stars instead of p values:

Change the data frame (this time I leave a few observations):

 library(Hmisc) library(car) mydf=df set.seed(12345) mydf[,2:5] = sapply(mydf[,2:5],jitter,amount=15) mydf=mydf[c(1:10,20:29,39:48,58:67),]

Calculate the values of r, p and encode the values of p into stars inside the graph function:

 # calculate r c = rcorr(as.matrix(mydf[sapply(mydf,is.numeric)]))$r # calculate p values p = rcorr(as.matrix(mydf[sapply(mydf,is.numeric)]))$P plots <- dlply(mydf, .(Method), function (x1) { ggplot(data.frame(subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$r)[lower.tri(c),],Var1 != Var2), pvalue=Recode(subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$P)[lower.tri(p),],Var1 != Var2)$value , "lo:0.01 = '***'; 0.01:0.05 = '*'; else = ' ';")), aes(x=Var1,y=Var2,fill=value)) + geom_tile(aes(fill = value),colour = "white") + geom_text(aes(label = sprintf("%1.2f",value)), vjust = 0) + geom_text(aes(label = pvalue), vjust = 1) + theme_bw() + scale_fill_gradient2(name="R^2",midpoint=0.25,low = "blue", high = "red") + xlab(NULL) + ylab(NULL) + theme(axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), panel.border=element_blank()) + ggtitle(x1$Method) + theme(plot.title = element_text(lineheight=1,face="bold")) + geom_text(data = subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$r),Var1==Var2), aes(label=Var1),vjust=1 ) })

Show graph.

 grid.arrange(plots$Single_ROI + theme(legend.position='none'), plots$Simple_2_ROI + theme(legend.position='none'), plots$WIG_Method + theme(legend.position='none'), plots$WIG_drawn_bg + theme(legend.position='none'), ncol=2, nrow=2)

R - add significance level to the correlation map

More articles: