I have a database of 30k ranges, each of which is defined as a pair of start and end points:
[12,80],[34,60],[34,9000],[76,743],...
I would like to write a Perl routine that range (not from the database) and returns the number of ranges in the database that completely "include" the given range.
For example, if we had only these 4 ranges in the database, and the range of queries [38,70], the subroutine should return 2, since the first and third ranges completely contain the range of queries.
Problem: I want to make requests as cheap as possible, I don't mind doing a lot of preprocessing if that helps.
A few notes:
I freely used the word "database", I do not mean the actual database (for example, SQL); this is just a long list of ranges.
My world is circular ... There is a given max_length(for example, 9999), and ranges, such as [8541,6], are legal (you can consider it as a single range, which is a union of [8541,9999]and [1,6]).
Thanks Dave
UPDATE
This was my original code:
use strict;
use warnings;
my $max_length = 200;
my @ranges = (
{ START => 10, END => 100 },
{ START => 30, END => 90 },
{ START => 50, END => 80 },
{ START => 180, END => 30 }
);
sub n_covering_ranges($) {
my ($query_h) = shift;
my $start = $query_h->{START};
my $end = $query_h->{END};
my $count = 0;
if ( $end >= $start ) {
foreach my $range_h (@ranges) {
if (( $start >= $range_h->{START} and $end <= $range_h->{END} )
or ( $range_h->{END} <= $range_h->{START} and $range_h->{START} <= $end )
or ( $range_h->{END} <= $range_h->{START} and $range_h->{END} >= $end)
)
{
$count++;
}
}
}
else {
foreach my $range_h (@ranges) {
if ( $start >= $range_h->{START} and $end <= $range_h->{END} ) {
$count++;
}
}
}
return $count;
}
print n_covering_ranges( { START => 1, END => 10 } ), "\n";
print n_covering_ranges( { START => 30, END => 70 } ), "\n";
and, yes, I know that they ifare ugly and can be done much better and more efficiently.
UPDATE 2 - SUGGESTED VENTILATION SOLUTIONS
: , cjm, , , , . !
, , :
use strict;
use warnings;
package RangeMap;
sub new {
my $class = shift;
my $max_length = shift;
my @lookup;
for (@_) {
my ( $start, $end ) = @$_;
my @idx
= $end >= $start
? $start .. $end
: ( $start .. $max_length, 0 .. $end );
for my $i (@idx) { $lookup[$i] .= pack 'L', $end }
}
bless \@lookup, $class;
}
sub num_ranges_containing {
my $self = shift;
my ( $start, $end ) = @_;
return 0 unless defined $self->[$start];
return 0 + grep { $end <= $_ } unpack 'L*', $self->[$start];
}
1;
use strict;
use warnings;
package cjm;
sub new {
my $class = shift;
my $max_length = shift;
my $self = {};
bless $self, $class;
$self->{MAX_LENGTH} = $max_length;
my @normal = ();
my @wrapped = ();
foreach my $r (@_) {
if ( $r->[0] <= $r->[1] ) {
push @normal, $r;
}
else {
push @wrapped, $r;
}
}
$self->{NORMAL} = \@normal;
$self->{WRAPPED} = \@wrapped;
return $self;
}
sub num_ranges_containing {
my $self = shift;
my ( $start, $end ) = @_;
if ( $start <= $end ) {
return ( grep { $_->[0] <= $start and $_->[1] >= $end }
@{ $self->{NORMAL} } )
+ ( grep { $end <= $_->[1] or $_->[0] <= $start }
@{ $self->{WRAPPED} } );
}
else {
return ( grep { $_->[0] <= $start and $_->[1] >= $end }
@{ $self->{WRAPPED} } )
+ ( grep { $_->[0] == 1 and $_->[1] == $self->{MAX_LENGTH} }
@{ $self->{NORMAL} } );
}
}
1;
: $max_length=3150000, 17000 , , 10000 . ( ) . :
cjm creation done in 0.0082 seconds
cjm querying done in 21.209857 seconds
RangeMap creation done in 45.840982 seconds
RangeMap querying done in 0.04941 seconds
! -!
, , , , () . (nstore) ? . retrieve? - ? , , .
3
nstore RangeMap. , . , 1 , 1000 . , , - , . . : http://www.perlmonks.org/?node_id=861961.
4 - RangeMap
, RangeMap . BrowserUK PerlMonks . , $max_lenght=10 [6,2]. [7,8]. 1, 0.
, :
use strict;
use warnings;
package FastRanges;
sub new($$$) {
my $class = shift;
my $max_length = shift;
my $ranges_a = shift;
my @lookup;
for ( @{$ranges_a} ) {
my ( $start, $end ) = @$_;
my @idx
= $end >= $start
? $start .. $end
: ( $start .. $max_length, 1 .. $end );
for my $i (@idx) { $lookup[$i] .= pack 'L', $end }
}
bless \@lookup, $class;
}
sub num_ranges_containing($$$) {
my $self = shift;
my ( $start, $end ) = @_;
return 0
unless ( defined $self->[$start] )
;
if ( $end >= $start ) {
return 0 + grep { $_ < $start or $end <= $_ } unpack 'L*',
$self->[$start];
}
else {
return 0 + grep { $_ < $start and $end <= $_ } unpack 'L*',
$self->[$start];
}
}
1;
.