Range search in d-space with discrete coordinates

I would like to develop a range search algorithm that reports all points at a given distance from the query point.

Points are determined by d integer coordinates in a tiny range, say up to 6 bits per size (range 0..63), for a total number of bits of no more than 60 bits.

The distance metric is Manhattan or Euclid (to you), i.e. the sum of absolute or quadratic differences of coordinates. In the particular case of one bit per measurement, it is equal to the Hamming distance.

There may be up to a million points.

Do you know about a practical data structure that supports fast queries, say O(Log²(n)+k)or similar (with a space O(n)) in such conditions? Reasonable preprocessing time (sub-squared) is also required.

k-D trees are the first option, but they don’t use finiteness of coordinates and probably work poorly in high dimensions.

The case of one bit per coordinate is especially interesting. Even partial solutions are welcome.

+4
source share
2 answers

After some thought (and pushing @YvesDaoust) using the VP tree (Vantage Point Tree https://en.wikipedia.org/wiki/Vantage-point_tree ) is probably the best solution.

VP - BSP, , . ( . - / node. , node, .

JSFiddle http://jsfiddle.net/fgq1rfLk/

var DIMS = 16;
var BITS = 1;
var MASK = (Math.pow(2, BITS) - 1)|0;
var SIZE = DIMS * BITS;
var list = [];
var tree = null;
//
// set bit count (population count)
function popCnt(x) {
    x = x - ((x >> 1) & 0x55555555);
    x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
    x = (x + (x >> 4)) & 0x0F0F0F0F;
    x = x + (x >> 8);
    x = x + (x >> 16);
    return x & 0x0000003F;
}
//
// manhattan distance
function dist(a, b) {
    if(BITS == 1) {
        return popCnt(a ^ b);
    }
    var result = 0;
    for(var i=0; i<DIMS; i++) {
        var shr = i * BITS;
        result += Math.abs(((a >> shr) & MASK) - ((b >> shr) & MASK));
    }
    return result;
}
//
// Vantage point tree
// max size of tree leaf nodes
VP_LEAF_SIZE = 32;
// need to choose a reasonable maximum distance
VP_DISTANCE = (BITS === 1) ? SIZE : 32;
function VPTree(data) {
    this.radius = null;
    this.center = null;
    this.values = null;
    this.inside = null;
    this.outside = null;
    // 
    var n = data.length;
    var r = data[0];
    // leaf node?
    if(n <= VP_LEAF_SIZE || n <= 1) {
        this.values = [].concat(data);
        return this;
    }
    this.center = r;
    // process data for counts at all possible distances
    var buckets = Array(VP_DISTANCE + 1);
    for(var i=0; i<=VP_DISTANCE; i++) { 
        buckets[i] = 0; 
    }
    // distance counts
    for(var i=0; i<n; i++) {
        var v = data[i];
        var d = dist(r, v);
        if(d < VP_DISTANCE) {
            buckets[d]++;
        } else {
            buckets[VP_DISTANCE]++;
        }
    }
    // distance offsets
    var sum = 0;
    for(var i=0; i<=VP_DISTANCE; i++) { 
        buckets[i] = (sum += buckets[i]); 
    }
    // pivot index
    var median = n >> 1;
    var pivot = 1;
    for(var i=1; i<=VP_DISTANCE; i++) {
        if(buckets[i] > median) {
            pivot = (i > 1 && median - buckets[i - 1] <= buckets[i] - median) ? i - 1 : i;
            break; 
        }
    }
    this.radius = pivot;
    // parition data into inside and outside
    var iCount = buckets[pivot] - buckets[0];
    var oCount = (n - buckets[pivot]) - buckets[0];
    var iData = Array(iCount);
    var oData = Array(oCount);
    iCount = oCount = 0;
    for(var i=0; i<n; i++) {
        var v = data[i];
        if(v === r) { continue; };
        if(dist(r, v) <= pivot) {
            iData[iCount++] = v;
        } else {
            oData[oCount++] = v;
        }
    }
    // recursively create the rest of the tree
    if(iCount > 0) {
        this.inside = new VPTree(iData);
    }
    if(oCount > 0) {
        this.outside = new VPTree(oData);
    }
    return this;
}
VPTree.prototype.query = function(value, distance, result) {
    if(result === undefined) {
        return this.query(value, distance, []);
    }
    // leaf node, test all values
    if(this.values !== null) {
        for(var i=0; i<this.values.length; i++) {
            var v = this.values[i];
            if(dist(value, v) <= distance) {
                result.push(v);
            }
        }
        return result;
    }
    // recursively test the rest of the tree
    var tmpDistance = dist(value, this.center);
    // inside
    if(tmpDistance <= distance + this.radius) {
        if(tmpDistance <= distance) {
            result.push(this.center);
        }
        if(this.inside !== null) {
            this.inside.query(value, distance, result);
        }
    }
    // outside
    if(tmpDistance + distance > this.radius && this.outside !== null) {
        this.outside.query(value, distance, result);
    }
    return result;
}

EDIT JSFiddle 2d (x, y) (8, 8) http://jsfiddle.net/fgq1rfLk/1/

0

, d , , , , (, , , ), Kd- , VP- , (), VP-tree "" .

ANN [1] Kd- (L2 Manathan) ( , , , " " ) .

KdTree Geogram [2], ( ​​ANN) , , ( k L2)

Referencecs:

[1] https://www.cs.umd.edu/~mount/ANN/

[2] http://alice.loria.fr/software/geogram/doc/html/classGEO_1_1KdTree.html

0

Source: https://habr.com/ru/post/1608786/


All Articles