Sort output dist ()

I have a matrix m

m <- matrix ( 
  c( 2, 1, 8, 5,
     7, 6, 3, 4,
     9, 3, 2, 8,
     1, 3, 7, 4),
  nrow  = 4,
  ncol  = 4,
  byrow = TRUE)

rownames(m) <- c('A', 'B', 'C', 'D')

Now I would like to order the rows mbased on their respective distance, so I usedist()

dist_m <- dist(m)

dist_m when printing

          A         B         C
B  8.717798
C  9.899495  5.477226
D  2.645751  7.810250 10.246951

Since I want it to be ordered, I try sort(dist_m), which prints

[1]  2.645751  5.477226  7.810250  8.717798  9.899495 10.246951

This is almost what I want. But I would be happier if he also printed the names of two lines, the number of which is the distance, something like

 2.645751  A  D
 5.477226  B  C
 7.810250  B  D
 8.717798  A  B
 9.899495  A  C
10.246951  C  D

This, of course, is possible, but I have no idea how I could achieve this.

+4
source share
3 answers

One option is to convert distto matrix, replace the top values ​​of the triangle with 0, meltwith subsetnonzero values, and then orderbased on the value of the column.

m1 <- as.matrix(dist_m)
m1[upper.tri(m1)] <- 0
library(reshape2)
m2 <- subset(melt(m1), value!=0)
m2[order(m2$value),3:1]
#         value Var2 Var1
#4   2.645751    A    D
#7   5.477226    B    C
#8   7.810250    B    D
#2   8.717798    A    B
#3   9.899495    A    C
#12 10.246951    C    D

base R, @David Arenburg 'm1'

 m2 <- cbind(which(m1!=0, arr.ind=TRUE), value= m1[m1!=0])
 m2[order(m2[,'value']),]
+3

= 0 dist

, akrun, dist, = 0. subset, NA, NA, diag ( ). , subset melt, na.omit order:

library(reshape2)

#create matrix
 m <- matrix ( 
 c( 2, 1, 8, 5,
    2, 1, 8, 5,
    9, 3, 2, 8,
    1, 3, 7, 4),
    nrow  = 4,
    ncol  = 4,
    byrow = TRUE)

rownames(m) <- c('A', 'B', 'C', 'D')

# use dist
dist_m <- dist(m)
dist_m 

# A and B are identical
             A         B         C
B  0.000000                    
C  9.899495  9.899495          
D  2.645751  2.645751 10.246951

m1 <- as.matrix(dist_m)
m1[upper.tri(m1)] <- NA
diag(m1) <- NA
m2 <- melt(m1)
na.omit(m2[order(m2$value),3:1])

, A B :

       value Var2 Var1
2   0.000000    A    B
4   2.645751    A    D
8   2.645751    B    D
3   9.899495    A    C
7   9.899495    B    C
12 10.246951    C    D
+1

Using base R:

dm <- as.matrix(dist_m)
df <- data.frame(data = c(dm),
                 column = c(col(dm)),
                 row = c(row(dm)))

# get only one triangle
df <- df[df$row > df$column, ]

# put in order
df[order(df$data), ]

# for letters, add this
df$row <- LETTERS[df$row]
df$column <- LETTERS[df$column]
0
source

Source: https://habr.com/ru/post/1599348/


All Articles