Only for the protocol, having tuned into your myMatrix matrix, I was able to almost exactly restore the example. The only difference (perhaps explainable?) Is located on some opposite signs in Figure 2 (for example, u[1,1] is -0.22 instead of 0.22 , as in W[1,1] Figure 2). The correlation matrix is โโidentical.
It should be noted that, contrary to the statement in the article (p. 13) that Spearman correlation is used, the exact result is obtained using the Pearson correlation method (by default).
Here is the code:
> # term document Matrix > myMatrix <- TermDocumentMatrix(corpus) > > ## reorder rows > myMatrix <- mm[match(c("human","interface","computer","user","system","response","time","eps","survey","trees","graph","minors"), rownames(mm)), ] > > # Delete terms that only appear in a document > rowTotals <- apply(myMatrix, 1, sum) > myMatrix.new <- myMatrix[rowTotals > 1, ] > > mySVD <- svd(myMatrix.new) > > ## Figure 1 > myMatrix.new Docs Terms 1 2 3 4 5 6 7 8 9 human 1 0 0 1 0 0 0 0 0 interface 1 0 1 0 0 0 0 0 0 computer 1 1 0 0 0 0 0 0 0 user 0 1 1 0 1 0 0 0 0 system 0 1 1 2 0 0 0 0 0 response 0 1 0 0 1 0 0 0 0 time 0 1 0 0 1 0 0 0 0 eps 0 0 1 1 0 0 0 0 0 survey 0 1 0 0 0 0 0 0 1 trees 0 0 0 0 0 1 1 1 0 graph 0 0 0 0 0 0 1 1 1 minors 0 0 0 0 0 0 0 1 1 > > ## mySVD Figure 2 > lapply(mySVD,round,2) $d [1] 3.34 2.54 2.35 1.64 1.50 1.31 0.85 0.56 0.36 $u [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,] -0.22 -0.11 0.29 -0.41 -0.11 -0.34 -0.52 0.06 0.41 [2,] -0.20 -0.07 0.14 -0.55 0.28 0.50 0.07 0.01 0.11 [3,] -0.24 0.04 -0.16 -0.59 -0.11 -0.25 0.30 -0.06 -0.49 [4,] -0.40 0.06 -0.34 0.10 0.33 0.38 0.00 0.00 -0.01 [5,] -0.64 -0.17 0.36 0.33 -0.16 -0.21 0.17 -0.03 -0.27 [6,] -0.27 0.11 -0.43 0.07 0.08 -0.17 -0.28 0.02 0.05 [7,] -0.27 0.11 -0.43 0.07 0.08 -0.17 -0.28 0.02 0.05 [8,] -0.30 -0.14 0.33 0.19 0.11 0.27 -0.03 0.02 0.17 [9,] -0.21 0.27 -0.18 -0.03 -0.54 0.08 0.47 0.04 0.58 [10,] -0.01 0.49 0.23 0.02 0.59 -0.39 0.29 -0.25 0.23 [11,] -0.04 0.62 0.22 0.00 -0.07 0.11 -0.16 0.68 -0.23 [12,] -0.03 0.45 0.14 -0.01 -0.30 0.28 -0.34 -0.68 -0.18 $v [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,] -0.20 -0.06 0.11 -0.95 0.05 -0.08 -0.18 0.01 0.06 [2,] -0.61 0.17 -0.50 -0.03 -0.21 -0.26 0.43 -0.05 -0.24 [3,] -0.46 -0.13 0.21 0.04 0.38 0.72 0.24 -0.01 -0.02 [4,] -0.54 -0.23 0.57 0.27 -0.21 -0.37 -0.26 0.02 0.08 [5,] -0.28 0.11 -0.51 0.15 0.33 0.03 -0.67 0.06 0.26 [6,] 0.00 0.19 0.10 0.02 0.39 -0.30 0.34 -0.45 0.62 [7,] -0.01 0.44 0.19 0.02 0.35 -0.21 0.15 0.76 -0.02 [8,] -0.02 0.62 0.25 0.01 0.15 0.00 -0.25 -0.45 -0.52 [9,] -0.08 0.53 0.08 -0.02 -0.60 0.36 -0.04 0.07 0.45 > > Mp <- mySVD$u[, c(1,2)] %*% diag(mySVD$d)[c(1, 2), c(1, 2)] %*% t(mySVD$v[, c(1, 2)]) > rownames(Mp) <- rownames(myMatrix.new) > > ## Figure 3 > round(Mp,2) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] human 0.16 0.40 0.38 0.47 0.18 -0.05 -0.12 -0.16 -0.09 interface 0.14 0.37 0.33 0.40 0.16 -0.03 -0.07 -0.10 -0.04 computer 0.15 0.51 0.36 0.41 0.24 0.02 0.06 0.09 0.12 user 0.26 0.84 0.61 0.70 0.39 0.03 0.08 0.12 0.19 system 0.45 1.23 1.05 1.27 0.56 -0.07 -0.15 -0.21 -0.05 response 0.16 0.58 0.38 0.42 0.28 0.06 0.13 0.19 0.22 time 0.16 0.58 0.38 0.42 0.28 0.06 0.13 0.19 0.22 eps 0.22 0.55 0.51 0.63 0.24 -0.07 -0.14 -0.20 -0.11 survey 0.10 0.53 0.23 0.21 0.27 0.14 0.31 0.44 0.42 trees -0.06 0.23 -0.14 -0.27 0.14 0.24 0.55 0.77 0.66 graph -0.06 0.34 -0.15 -0.30 0.20 0.31 0.69 0.98 0.85 minors -0.04 0.25 -0.10 -0.21 0.15 0.22 0.50 0.71 0.62 > > cor(Mp["human",],Mp["minors",]) [1] -0.83 > > cor(Mp["human",],Mp["user",]) [1] 0.94 > > ## Figure 4 > corMo <- cor(myMatrix.new) > corMo[upper.tri(corMo,diag=TRUE)] <- 0 > corMo 1 2 3 4 5 6 7 8 9 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0 2 -0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0 3 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0 4 0.00 0.00 0.47 0.00 0.00 0.00 0.00 0.00 0 5 -0.33 0.58 0.00 -0.31 0.00 0.00 0.00 0.00 0 6 -0.17 -0.30 -0.21 -0.16 -0.17 0.00 0.00 0.00 0 7 -0.26 -0.45 -0.32 -0.24 -0.26 0.67 0.00 0.00 0 8 -0.33 -0.58 -0.41 -0.31 -0.33 0.52 0.77 0.00 0 9 -0.33 -0.19 -0.41 -0.31 -0.33 -0.17 0.26 0.56 0 > > corMp <- cor(Mp) > corMp[upper.tri(corMp,diag=TRUE)] <- 0 > corMp [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,] 0.00 0.00 0.00 0.00 0.00 0 0 0 0 [2,] 0.91 0.00 0.00 0.00 0.00 0 0 0 0 [3,] 1.00 0.91 0.00 0.00 0.00 0 0 0 0 [4,] 1.00 0.88 1.00 0.00 0.00 0 0 0 0 [5,] 0.84 0.99 0.84 0.81 0.00 0 0 0 0 [6,] -0.86 -0.57 -0.86 -0.89 -0.44 0 0 0 0 [7,] -0.85 -0.56 -0.85 -0.88 -0.44 1 0 0 0 [8,] -0.85 -0.56 -0.85 -0.88 -0.43 1 1 0 0 [9,] -0.81 -0.50 -0.81 -0.84 -0.37 1 1 1 0 >