R, BrodieG.
, , , .
[Michael McCool .., - ].
. KNN.
1.
system.time (1e6), , , , 95% . , .
vals <- 50
clusts <- 6
ClusterCenters <- matrix(runif(vals * clusts), nrow=clusts)
data.count <- 1e6
calcData <- matrix(runif(data.count * vals), nrow=data.count)
system.time({
for(i in 1:nrow(ClusterCenters)) {
dists[i,] <- (rowSums((matrix(unlist(apply(calcData, 1, function(x) {x ClusterCenters[i,]})), ncol = ncol(calcData), byrow = TRUE))^2))^0.5
}
})
user system elapsed
71.62 1.13 73.13
system.time({
for(i in 1: nrow(calcData)) {
ClusterMemberships[i] <- which.min(dists[,i])
}
})
user system elapsed
5.29 0.00 5.31
2.
- R, , @BrodieG. BTW, , , 3-5X, .
dists1 <-matrix(NA, nrow = nrow(ClusterCenters), ncol = nrow(calcData)) system.time({
dists1 <- apply(ClusterCenters, 1, function(x) rowSums(sweep(calcData, 2,x, '-') ^ 2) ^ .5)
min.dist.vec <- max.col(-dists1, ties.method="first")
})
user system elapsed
16.13 1.42 17.61
all.equal(ClusterMemberships, min.dist.vec)
[1] TRUE
3.
, , , (calcData [i,] - ClusterCenters [j,]) ^ 2.
, , , :
calcData [i,] ^ 2 - 2 * calcData [i,] * ClusterCenters [j,] + ClusterCenters [J,] ^ 2
, ,
calcData * calcDatap >
,
ClusterCenters% *% t (calcData)
, , :
dists2 <-matrix(NA, nrow = nrow(ClusterCenters), ncol = nrow(calcData))
system.time({
data2 <- rowSums(calcData*calcData)
clusters2 <- rowSums(ClusterCenters*ClusterCenters)
ClustersXdata <- calcData %*% t(ClusterCenters)
dists2 <- sweep(data2 - 2 * ClustersXdata, 2, clusters2, '+') ^0.5
min.dist.matrix <- max.col(-dists2, ties.method="first")
})
user system elapsed
1.17 0.09 1.28
all.equal(ClusterMemberships, min.dist.matrix)
[1] TRUE
, . 10 ^ 3 10 ^ 7, 50X , 16 calcData.

4.
1- , KNN k = 1. knn , C. , . , :
# Pattern Representation 2: KNN
library("class")
system.time(
min.dist.knn <- knn(ClusterCenters, calcData, cl = 1:nrow(ClusterCenters), k = 1)
)
user system elapsed
1.21 0.12 1.35
all.equal(ClusterMemberships, as.integer(min.dist.knn))
[1] TRUE
KNN 1e6, , , , 2X , KNN (15.9.vs. 29.1).
,, , , , c/++ . KNN NVIDIA, ParallelR