Algorithm for automating paired labels for grouping values in R

Question

Algorithm for automating paired labels for grouping values in R

After a long struggle with this problem, I hope to get some tips here. I am wondering if anyone knows of an automated method for determining paired grouping labels by value. The question does not depend on the significance test (for example, Tukey for parametric or Mann-Whitney for non-parametric) - in these pairwise comparisons, some figures of the boxplot type often represent these groups with a sub script:

enter image description here

, . , - , , , , , , 2 , 3 .., , .

, 1 3 5, 3 5 (.. ).

:

set.seed(1)
n <- 7
n2 <- 100
mu <- cumsum(runif(n, min=-3, max=3))
sigma <- runif(n, min=1, max=3)

dat <- vector(mode="list", n)
for(i in seq(dat)){
    dat[[i]] <- rnorm(n2, mean=mu[i], sd=sigma[i])
}

df <- data.frame(group=as.factor(rep(seq(n), each=n2)), y=unlist(dat))

bp <- boxplot(y ~ group, df, notch=TRUE)
kr <- kruskal.test(y ~ group, df)
kr
mw <- pairwise.wilcox.test(df$y, df$g)
mw
mw$p.value > 0.05 # TRUE means that the levels are not significantly different at the p=0.05 level

#      1     2     3     4     5     6
#2 FALSE    NA    NA    NA    NA    NA
#3  TRUE FALSE    NA    NA    NA    NA
#4 FALSE FALSE FALSE    NA    NA    NA
#5  TRUE FALSE FALSE FALSE    NA    NA
#6 FALSE FALSE FALSE  TRUE FALSE    NA
#7 FALSE FALSE FALSE FALSE FALSE FALSE

text(x=1:n, y=bp$stats[4,], labels=c("AB", "C", "A", "D", "B", "D", "E"), col=1, cex=1.5, pos=3, font=2)

+4

algorithm r clique-problem

Marc in the box 15 '14 14:48

3

, , :

set.seed(1)
n <- 7
n2 <- 100
mu <- cumsum(runif(n, min=-3, max=3))
sigma <- runif(n, min=1, max=3)

dat <- vector(mode="list", n)
for(i in seq(dat)){
    dat[[i]] <- rnorm(n2, mean=mu[i], sd=sigma[i])
}
df <- data.frame(group=as.factor(rep(seq(n), each=n2)), y=unlist(dat))
bp <- boxplot(y ~ group, df, notch=TRUE)


#significance test
kr <- kruskal.test(y ~ group, df)
mw <- pairwise.wilcox.test(df$y, df$g)

#matrix showing connections between levels
g <- as.matrix(mw$p.value > 0.05)
g <- cbind(rbind(NA, g), NA)
g <- replace(g, is.na(g), FALSE)
g <- g + t(g)
diag(g) <- 1
rownames(g) <- 1:n
colnames(g) <- 1:n
g

#install.packages("igraph")
library(igraph)

# Load data
same <- which(g==1)
topology <- data.frame(N1=((same-1) %% n) + 1, N2=((same-1) %/% n) + 1)
topology <- topology[order(topology[[1]]),] # Get rid of loops and ensure right naming of vertices
g3 <- simplify(graph.data.frame(topology,directed = FALSE))
get.data.frame(g3)

# Plot graph
plot(g3)

# Calcuate the maximal cliques
res <- maximal.cliques(g3)

# Reorder given the smallest level
res <- sapply(res, sort)
res <- res[order(sapply(res,function(x)paste0(sort(x),collapse=".")))]

ml<-max(sapply(res, length))
reord<-do.call(order, data.frame(
    do.call(rbind, 
        lapply(res, function(x) c(sort(x), rep.int(0, ml-length(x))))
    )
))
res <- res[reord]

lab.txt <- vector(mode="list", n)
lab <- letters[seq(res)]
for(i in seq(res)){
    for(j in res[[i]]){
        lab.txt[[j]] <- paste0(lab.txt[[j]], lab[i])
    }
}

bp <- boxplot(y ~ group, df, notch=TRUE, outline=FALSE, ylim=range(df$y)+c(0,1))
text(x=1:n, y=bp$stats[5,], labels=lab.txt, col=1, cex=1, pos=3, font=2)

+1

Marc in the box 19 '14 4:29

.

, order() do.call:

reord<-do.call("order", data.frame(
do.call(rbind, 
    lapply(res, function(x) c(sort(x), rep.int(0, ml-length(x))))
)
))

0

Marcelo Araya-Salas 24 . '14 14:25

David Eisenstat · Accepted Answer · 2014-05-15T16:21:52+0000

. . , . , , , , . , . , ( ? ?) . . ( , .) . , , .

, , , .

3---1---5       4--6

. Bron - Kerbosch, . {1, 3}, {1, 5} {4, 6}. , {1} , , {1, 3}. {1, 3, 5} , 3 5.

  1
 / \
3---5       4--6,

{1, 3, 5} {4, 6}.

. . , , , (1,2) < (1,5) (2,3) (2,5) (3,4). , , , , , . .

, . memoize: , . , R , . ( , .)

Algorithm for automating paired labels for grouping values ​​in R

:

More articles:

Algorithm for automating paired labels for grouping values in R