Algorithm for automating paired labels for grouping values ​​in R

After a long struggle with this problem, I hope to get some tips here. I am wondering if anyone knows of an automated method for determining paired grouping labels by value. The question does not depend on the significance test (for example, Tukey for parametric or Mann-Whitney for non-parametric) - in these pairwise comparisons, some figures of the boxplot type often represent these groups with a sub script:

enter image description here

, . , - , , , , , , 2 , 3 .., , .

, 1 3 5, 3 5 (.. ).

:

set.seed(1)
n <- 7
n2 <- 100
mu <- cumsum(runif(n, min=-3, max=3))
sigma <- runif(n, min=1, max=3)

dat <- vector(mode="list", n)
for(i in seq(dat)){
    dat[[i]] <- rnorm(n2, mean=mu[i], sd=sigma[i])
}

df <- data.frame(group=as.factor(rep(seq(n), each=n2)), y=unlist(dat))

bp <- boxplot(y ~ group, df, notch=TRUE)
kr <- kruskal.test(y ~ group, df)
kr
mw <- pairwise.wilcox.test(df$y, df$g)
mw
mw$p.value > 0.05 # TRUE means that the levels are not significantly different at the p=0.05 level

#      1     2     3     4     5     6
#2 FALSE    NA    NA    NA    NA    NA
#3  TRUE FALSE    NA    NA    NA    NA
#4 FALSE FALSE FALSE    NA    NA    NA
#5  TRUE FALSE FALSE FALSE    NA    NA
#6 FALSE FALSE FALSE  TRUE FALSE    NA
#7 FALSE FALSE FALSE FALSE FALSE FALSE

text(x=1:n, y=bp$stats[4,], labels=c("AB", "C", "A", "D", "B", "D", "E"), col=1, cex=1.5, pos=3, font=2)
+4
3

. . , . , , , , . , . , ( ? ?) . . ( , .) . , , .

, , , .

3---1---5       4--6

. Bron - Kerbosch, . {1, 3}, {1, 5} {4, 6}. , {1} , , {1, 3}. {1, 3, 5} , 3 5.

  1
 / \
3---5       4--6,

{1, 3, 5} {4, 6}.

. . , , , (1,2) < (1,5) (2,3) (2,5) (3,4). , , , , , . .

, . memoize: , . , R , . ( , .)

+3

, , :

set.seed(1)
n <- 7
n2 <- 100
mu <- cumsum(runif(n, min=-3, max=3))
sigma <- runif(n, min=1, max=3)

dat <- vector(mode="list", n)
for(i in seq(dat)){
    dat[[i]] <- rnorm(n2, mean=mu[i], sd=sigma[i])
}
df <- data.frame(group=as.factor(rep(seq(n), each=n2)), y=unlist(dat))
bp <- boxplot(y ~ group, df, notch=TRUE)


#significance test
kr <- kruskal.test(y ~ group, df)
mw <- pairwise.wilcox.test(df$y, df$g)

#matrix showing connections between levels
g <- as.matrix(mw$p.value > 0.05)
g <- cbind(rbind(NA, g), NA)
g <- replace(g, is.na(g), FALSE)
g <- g + t(g)
diag(g) <- 1
rownames(g) <- 1:n
colnames(g) <- 1:n
g

#install.packages("igraph")
library(igraph)

# Load data
same <- which(g==1)
topology <- data.frame(N1=((same-1) %% n) + 1, N2=((same-1) %/% n) + 1)
topology <- topology[order(topology[[1]]),] # Get rid of loops and ensure right naming of vertices
g3 <- simplify(graph.data.frame(topology,directed = FALSE))
get.data.frame(g3)

# Plot graph
plot(g3)

# Calcuate the maximal cliques
res <- maximal.cliques(g3)

# Reorder given the smallest level
res <- sapply(res, sort)
res <- res[order(sapply(res,function(x)paste0(sort(x),collapse=".")))]

ml<-max(sapply(res, length))
reord<-do.call(order, data.frame(
    do.call(rbind, 
        lapply(res, function(x) c(sort(x), rep.int(0, ml-length(x))))
    )
))
res <- res[reord]

lab.txt <- vector(mode="list", n)
lab <- letters[seq(res)]
for(i in seq(res)){
    for(j in res[[i]]){
        lab.txt[[j]] <- paste0(lab.txt[[j]], lab[i])
    }
}

bp <- boxplot(y ~ group, df, notch=TRUE, outline=FALSE, ylim=range(df$y)+c(0,1))
text(x=1:n, y=bp$stats[5,], labels=lab.txt, col=1, cex=1, pos=3, font=2)

enter image description here

+1

.

, order() do.call:

reord<-do.call("order", data.frame(
do.call(rbind, 
    lapply(res, function(x) c(sort(x), rep.int(0, ml-length(x))))
)
))
0

Source: https://habr.com/ru/post/1540722/


All Articles