Get the distance from a data point to its center of gravity using Accord.net

I am doing cluster work with the Accord.net library . Ultimately, I try to find the optimal number of clusters to use the elbow method , which requires some relatively simple calculations. However, it is difficult for me to get the values ​​I need to determine the best amount of K to use in my simulation KMeans.

I have an example of data / code:

open Accord
open Accord.Math
open Accord.MachineLearning
open Accord.Statistics
open Accord.Statistics.Analysis

let x = [|
    [|4.0; 1.0; 1.0; 2.0|]; 
    [|2.0; 4.0; 1.0; 2.0|]; 
    [|2.0; 3.0; 1.0; 1.0|]; 
    [|3.0; 6.0; 2.0; 1.0|]; 
    [|4.0; 4.0; 1.0; 1.0|]; 
    [|5.0; 10.0; 1.0; 2.0|]; 
    [|7.0; 8.0; 1.0; 2.0|]; 
    [|6.0; 5.0; 1.0; 1.0|]; 
    [|7.0; 7.0; 2.0; 1.0|]; 
    [|5.0; 8.0; 1.0; 1.0|]; 
    [|4.0; 1.0; 1.0; 2.0|]; 
    [|3.0; 5.0; 0.0; 3.0|]; 
    [|1.0; 2.0; 0.0; 0.0|]; 
    [|4.0; 7.0; 1.0; 2.0|]; 
    [|5.0; 3.0; 2.0; 0.0|]; 
    [|4.0; 11.0; 0.0; 3.0|]; 
    [|8.0; 7.0; 2.0; 1.0|]; 
    [|5.0; 6.0; 0.0; 2.0|]; 
    [|8.0; 6.0; 3.0; 0.0|]; 
    [|4.0; 9.0; 0.0; 2.0|] 
    |]

and I can generate clusters quite easily with

let kmeans = new KMeans 5

let kmeansMod = kmeans.Learn x
let clusters = kmeansMod.Decide x

x ? KMeans , , , .

, , . , -

let dataAndClusters = Array.zip clusters x

let getCentroid (m: KMeansClusterCollection) (i: int) = 
    m.Centroids.[i]

dataAndClusters
|> Array.map (fun (c, d) -> (c, (getCentroid kmeansMod c) 
                                |> Array.map2 (-) d
                                |> Array.sum))

val it : (int * float) [] =
  [|(1, 0.8); (0, -1.5); (1, -0.2); (0, 1.5); (0, -0.5); (4, 0.0); (2, 1.4);
    (2, -3.6); (2, 0.4); (3, 0.75); (1, 0.8); (0, 0.5); (1, -4.2); (3, -0.25);
    (1, 2.8); (4, 0.0); (2, 1.4); (3, -1.25); (2, 0.4); (3, 0.75)|]

? .

, , K KMeans. , , Stats.StackExchange.com. , " ", .

+4
1

, , .

, , R , , R .

:

1. From each data value, subtract the centroid values
2. Sum the differences for a given data/centroid pair
3. Square the differences
4. Find the square root of the differences.

:

let distances = 
    dataAndClusters
    |> Array.map (fun (c, d) -> (c, ((getCentroid kmeansMod c) 
                                    |> Array.map2 (-) d
                                    |> Array.sum
                                    |> float) ** 2.0
                                    |> sqrt))

,

|> float) ** 2.0 float , ( , x**y)

|> sqrt), .

, . .

0

Source: https://habr.com/ru/post/1663634/


All Articles