How can I efficiently calculate ranges that span a certain range in Perl?

I have a database of 30k ranges, each of which is defined as a pair of start and end points:

[12,80],[34,60],[34,9000],[76,743],...

I would like to write a Perl routine that range (not from the database) and returns the number of ranges in the database that completely "include" the given range.

For example, if we had only these 4 ranges in the database, and the range of queries [38,70], the subroutine should return 2, since the first and third ranges completely contain the range of queries.

Problem: I want to make requests as cheap as possible, I don't mind doing a lot of preprocessing if that helps.

A few notes:

  • I freely used the word "database", I do not mean the actual database (for example, SQL); this is just a long list of ranges.

  • My world is circular ... There is a given max_length(for example, 9999), and ranges, such as [8541,6], are legal (you can consider it as a single range, which is a union of [8541,9999]and [1,6]).

Thanks Dave

UPDATE This was my original code:

use strict;
use warnings;

my $max_length = 200;
my @ranges     = (
    { START => 10,   END => 100 },
    { START => 30,   END => 90 },
    { START => 50, END => 80 },
    { START => 180,  END => 30 }
);

sub n_covering_ranges($) {
    my ($query_h) = shift;
    my $start     = $query_h->{START};
    my $end       = $query_h->{END};
    my $count     = 0;
    if ( $end >= $start ) {

        # query range is normal
        foreach my $range_h (@ranges) {
            if (( $start >= $range_h->{START} and $end <= $range_h->{END} )
                or (    $range_h->{END} <= $range_h->{START} and  $range_h->{START} <= $end )
                or ( $range_h->{END} <= $range_h->{START} and  $range_h->{END} >= $end)
                )
            {
                $count++;
            }
        }

    }

    else {

        # query range is hanging over edge
        # only other hanging over edges can contain it
        foreach my $range_h (@ranges) {
            if ( $start >= $range_h->{START} and $end <= $range_h->{END} ) {
                $count++;
            }
        }

    }

    return $count;
}

print n_covering_ranges( { START => 1, END => 10 } ), "\n";
print n_covering_ranges( { START => 30, END => 70 } ), "\n";

and, yes, I know that they ifare ugly and can be done much better and more efficiently.

UPDATE 2 - SUGGESTED VENTILATION SOLUTIONS

: , cjm, , , , . !

, , :

use strict;
use warnings;

package RangeMap;

sub new {
    my $class      = shift;
    my $max_length = shift;
    my @lookup;
    for (@_) {
        my ( $start, $end ) = @$_;
        my @idx
            = $end >= $start
            ? $start .. $end
            : ( $start .. $max_length, 0 .. $end );
        for my $i (@idx) { $lookup[$i] .= pack 'L', $end }
    }
    bless \@lookup, $class;
}

sub num_ranges_containing {
    my $self = shift;
    my ( $start, $end ) = @_;
    return 0 unless defined $self->[$start];
    return 0 + grep { $end <= $_ } unpack 'L*', $self->[$start];
}

1;

use strict;
use warnings;

package cjm;

sub new {
    my $class      = shift;
    my $max_length = shift;

    my $self = {};
    bless $self, $class;

    $self->{MAX_LENGTH} = $max_length;

    my @normal  = ();
    my @wrapped = ();

    foreach my $r (@_) {
        if ( $r->[0] <= $r->[1] ) {
            push @normal, $r;
        }
        else {
            push @wrapped, $r;
        }
    }

    $self->{NORMAL}  = \@normal;
    $self->{WRAPPED} = \@wrapped;
    return $self;
}

sub num_ranges_containing {
    my $self = shift;
    my ( $start, $end ) = @_;

    if ( $start <= $end ) {

        # This is a normal range
        return ( grep { $_->[0] <= $start and $_->[1] >= $end }
                @{ $self->{NORMAL} } )
            + ( grep { $end <= $_->[1] or $_->[0] <= $start }
                @{ $self->{WRAPPED} } );
    }
    else {

        # This is a wrapped range
        return ( grep { $_->[0] <= $start and $_->[1] >= $end }
                @{ $self->{WRAPPED} } )

            # This part should probably be calculated only once:
            + ( grep { $_->[0] == 1 and $_->[1] == $self->{MAX_LENGTH} }
                @{ $self->{NORMAL} } );
    }
}

1;

: $max_length=3150000, 17000 , , 10000 . ( ) . :

cjm creation done in 0.0082 seconds
cjm querying done in 21.209857 seconds
RangeMap creation done in 45.840982 seconds
RangeMap querying done in 0.04941 seconds

! -! , , , , () . (nstore) ? . retrieve? - ? , , .

3

nstore RangeMap. , . , 1 , 1000 . , , - , . . : http://www.perlmonks.org/?node_id=861961.

4 - RangeMap

, RangeMap . BrowserUK PerlMonks . , $max_lenght=10 [6,2]. [7,8]. 1, 0.

, :

use strict;
use warnings;

package FastRanges;

sub new($$$) {
    my $class      = shift;
    my $max_length = shift;
    my $ranges_a   = shift;
    my @lookup;
    for ( @{$ranges_a} ) {
        my ( $start, $end ) = @$_;
        my @idx
            = $end >= $start
            ? $start .. $end
            : ( $start .. $max_length, 1 .. $end );
        for my $i (@idx) { $lookup[$i] .= pack 'L', $end }
    }
    bless \@lookup, $class;
}

sub num_ranges_containing($$$) {
    my $self = shift;
    my ( $start, $end ) = @_;    # query range coordinates

    return 0
        unless ( defined $self->[$start] )
        ;    # no ranges overlap the start position of the query

    if ( $end >= $start ) {

        # query range is simple
        # any inverted range in {LOOKUP}[$start] must contain it,
        # and so does any simple range which ends at or after $end
        return 0 + grep { $_ < $start or $end <= $_ } unpack 'L*',
            $self->[$start];
    }
    else {

        # query range is inverted
        # only inverted ranges in {LOOKUP}[$start] which also end
        # at of after $end contain it. simple ranges can't contain
        # the query range
        return 0 + grep { $_ < $start and $end <= $_ } unpack 'L*',
            $self->[$start];
    }
}

1;

.

+3
6

?

my $max_length = 9999;
my @range = ( [12,80],[34,60],[34,9000] );

my @lookup;

for ( @range ) {
    my ( $start, $end ) = @$_;
    my @idx = $end >= $start ? $start .. $end : ( $start .. $max_length, 0 .. $end );
    for my $i ( @idx ) { $lookup[$i] .= pack "L", $end }
}

@lookup, , . , , , , . O (n) , ( , ), .

sub num_ranges_containing {
    my ( $start, $end ) = @_;

    return 0 unless defined $lookup[$start];

    # simple ranges can be contained in inverted ranges,
    # but inverted ranges can only be contained in inverted ranges
    my $counter = ( $start <= $end )
        ? sub { 0 + grep { $_ < $start or  $end <= $_ } }
        : sub { 0 + grep { $_ < $start and $end <= $_ } };

    return $counter->( unpack 'L*', $lookup[$start] );
}

.

package RangeMap;

sub new {
    my $class = shift;
    my $max_length = shift;
    my @lookup;
    for ( @_ ) {
        my ( $start, $end ) = @$_;
        my @idx = $end >= $start ? $start .. $end : ( $start .. $max_length, 0 .. $end );
        for my $i ( @idx ) { $lookup[$i] .= pack 'L', $end }
    }
    bless \@lookup, $class;
}

sub num_ranges_containing {
    my $self = shift;
    my ( $start, $end ) = @_;

    return 0 unless defined $self->[$start];

    # simple ranges can be contained in inverted ranges,
    # but inverted ranges can only be contained in inverted ranges
    my $counter = ( $start <= $end )
        ? sub { 0 + grep { $_ < $start or  $end <= $_ } }
        : sub { 0 + grep { $_ < $start and $end <= $_ } };

    return $counter->( unpack 'L*', $self->[$start] );
}

package main;
my $rm = RangeMap->new( 9999, [12,80],[34,60],[34,9000] );

, .

.

+2

:

use strict;
use warnings;

my @ranges = ([12,80],[34,60],[34,9000],[76,743]);

# Split ranges between normal & wrapped:
my (@normal, @wrapped);

foreach my $r (@ranges) {
  if ($r->[0] <= $r->[1]) {
    push @normal, $r;
  } else {
    push @wrapped, $r;
  }
}

sub count_matches
{
  my ($start, $end, $max_length, $normal, $wrapped) = @_;

  if ($start <= $end) {
    # This is a normal range
    return (grep { $_->[0] <= $start and $_->[1] >= $end } @$normal)
        +  (grep { $end <= $_->[1] or $_->[0] <= $start } @$wrapped);
  } else {
    # This is a wrapped range
    return (grep { $_->[0] <= $start and $_->[1] >= $end } @$wrapped)
        # This part should probably be calculated only once:
        +  (grep { $_->[0] == 1 and $_->[1] == $max_length } @$normal);
  }
} # end count_matches

print count_matches(38,70, 9999, \@normal, \@wrapped)."\n";
+2

, : Number::Interval:

my @ranges     = (
    { START => 10,   END => 100 },
    { START => 30,   END => 90 },
    { START => 50, END => 80 },
    { START => 180,  END => 30 }
);
my @intervals;
for my $range ( @ranges ) {
  my $int = new Number::Interval( Min => $range->{START},
                                  Max => $range->{END} );
  push @intervals, $int;
}

intersection(), , :

my $num_overlap = 0;
my $checkinterval = new Number::Interval( Min => $min, Max => $max );
for my $int ( @intervals ) {
  $num_overlap++ if $checkinterval->intersection( $int );
}

, "" ( "" Number::Interval), .

.

: , , intersection() , (, ). , contains() , , .

, Number::Interval, ...: -)

+2

? ? :

  * Iterate through the ranges
  * Foreach range, check if the test range is in it.
  * Profile and benchmark

Perl:

 my $test = [ $n, $m ];
 my @contains = map { 
      $test->[0] >= $_->[0] 
         and 
      $test->[1] <= $_->[1]
      } @ranges

, , . .

, , : , -, . , Stackoverflow , , . . , , . , , , , , .

+1

, , :

:

  • , , .

:

  • ,
  • ,
  • , (@start [0.. $start_index] @end [$ end_index.. $# end]).
+1

, , , ( , ).

.

package SimpleRange;

sub new {
    my $class = shift;
    my ($m, $n) = @_;
    bless { start => $m, end => $n }, $class;
}

sub start { shift->{start} }
sub end   { shift->{end}   }

sub covers {
    # Returns true if the range covers some other range.
    my ($self, $other) = @_;
    return 1 if $self->start <= $other->start
            and $self->end   >= $other->end;
    return;
}

Using this building block, we can create a transport range class that consists of 1 or 2 simple ranges (2 if the range spans around the edge of the universe). Like the class for simple ranges, this class defines a method covers. The logic in this method is pretty intuitive, because we can use the method coversprovided by our objects SimpleRange.

package WrappingRange;

sub new {
    my $class = shift;
    my ($raw_range, $MIN, $MAX) = @_;
    my ($m, $n) = @$raw_range;

    # Handle special case: a range that wraps all the way around.
    ($m, $n) = ($MIN, $MAX) if $m == $n + 1;

    my $self = {min => $MIN, max => $MAX};
    if ($m <= $n){
        $self->{top}  = SimpleRange->new($m, $n);
        $self->{wrap} = undef;
    }
    else {
        $self->{top}  = SimpleRange->new($m, $MAX);
        $self->{wrap} = SimpleRange->new($MIN, $n);    
    }
    bless $self, $class;
}

sub top  { shift->{top}  }
sub wrap { shift->{wrap} }
sub is_simple { ! shift->{wrap} }

sub simple_ranges {
    my $self = shift;
    return $self->is_simple ? $self->top : ($self->top, $self->wrap);
}

sub covers {
    my @selfR  = shift->simple_ranges;
    my @otherR = shift->simple_ranges;
    while (@selfR and @otherR){
        if ( $selfR[0]->covers($otherR[0]) ){
            shift @otherR;
        }
        else {
            shift @selfR;
        }
    }
    return if @otherR;
    return 1;
}

Run some tests:

package main;
main();

sub main {
    my ($MIN, $MAX) = (0, 200);

    my @raw_ranges = (
        [10, 100], [30, 90], [50, 80], [$MIN, $MAX],
        [180, 30], 
        [$MAX, $MAX - 1], [$MAX, $MAX - 2],
        [50, 49], [50, 48],
    );
    my @wrapping_ranges = map WrappingRange->new($_, $MIN, $MAX), @raw_ranges;

    my @tests = ( [1, 10], [30, 70], [160, 10], [190, 5] );
    for my $t (@tests){
        $t = WrappingRange->new($t, $MIN, $MAX);

        my @covers = map $_->covers($t) ? 1 : 0, @wrapping_ranges;

        my $n;
        $n += $_ for @covers;
        print "@covers  N=$n\n";
    }
}

Conclusion:

0 0 0 1 1 1 1 1 1  N=6
1 1 0 1 0 1 1 1 0  N=6
0 0 0 1 0 1 0 1 1  N=4
0 0 0 1 1 1 0 1 1  N=5
+1
source

Source: https://habr.com/ru/post/1766459/


All Articles