An SQL query to search for all timestamps covered by an interval in A but not covered by an interval in B ("subtract" or "exclude" between multiple intervals)

I have several tables in a PostgreSQL 9.4 database, where each row contains an interval in the form of two columns "start" (inclusive) and "stop" (exclusive).

Consider the following pseudocode (tables are more complicated).

CREATE TABLE left (   
    start TIMESTAMP,   
    stop TIMESTAMP,   
    [...] 
);

CREATE TABLE right (
    start TIMESTAMP,   
    stop TIMESTAMP,   
    [...] 
);

Intervals include but are not limited to stop.

Now I need a query to find all possible time intervals when the interval on the left is covered, but not at the same time on the right, covering the same interval.

"" , . , , :

left     [-----row 1------------------)   [--row 2--)    [--row 3----)
right  [--row1--)    [--row2--)    [--row3--)         
result          [----)        [----)        [-------)    [-----------)

"left" , , "right" , .

, , / "" , / "", .

, , - , "start" "stop".

( , ), , ...

, .

+4
4

tsrange ( ):

CREATE TABLE leftr (
    duration tsrange
);

CREATE TABLE rightr (
    duration tsrange
);

insert into leftr values
('[2015-01-03, 2015-01-20)'),
('[2015-01-25, 2015-02-01)'),
('[2015-02-08, 2015-02-15)');

insert into rightr values
('[2015-01-01, 2015-01-06)'),
('[2015-01-10, 2015-01-15)'),
('[2015-01-18, 2015-01-26)');

:

select duration* gap result
from (
    select tsrange(upper(duration), lower(lead(duration) over (order by duration))) gap
    from rightr
    ) inv
join leftr
on duration && gap

                    result                     
-----------------------------------------------
 ["2015-01-06 00:00:00","2015-01-10 00:00:00")
 ["2015-01-15 00:00:00","2015-01-18 00:00:00")
 ["2015-01-26 00:00:00","2015-02-01 00:00:00")
 ["2015-02-08 00:00:00","2015-02-15 00:00:00")
(4 rows)    

:

l          [-----row 1------------------)   [--row 2--)    [--row 3----)
r        [--row1--)    [--row2--)    [--row3--)
inv(r)            [----)        [----)        [------------------------->
l*inv(r)          [----)        [----)        [-------)    [-----------)
+2

tsrange , , .

, , . . ( , ).

 insert into t_left 
 select 1,4 from dual union all
 select 6,9 from dual union all
 select 12,13 from dual    
 ;

 insert into t_right 
 select 2,3 from dual union all
 select 5,7 from dual union all
 select 8,10 from dual union all
 select 11,14 from dual    
 ;

 with event as  (
 select  i_start tst, 1 left_change, 0 right_change from t_left union all
 select  i_stop tst, -1 left_change, 0 right_change from t_left union all
 select  i_start  tst, 0 left_change, 1 right_change from t_right  union all
 select  i_stop tst, 0 left_change, -1 right_change from t_right
 )
 select tst, left_change, right_change,
 sum(left_change) over (order by tst) as is_left,
 sum(right_change) over (order by tst) as is_right,
 '['||tst||','||lead(tst) over (order by tst) ||')' intrvl
 from event
 order by tst;

(+1) (-1 CHANGE).

   TST LEFT_CHANGE RIGHT_CHANGE    IS_LEFT   IS_RIGHT INTRVL         

     1           1            0          1          0 [1,2)     
     2           0            1          1          1 [2,3)     
     3           0           -1          1          0 [3,4)    
     4          -1            0          0          0 [4,5)    
     5           0            1          0          1 [5,6)    
     6           1            0          1          1 [6,7)      
     7           0           -1          1          0 [7,8)    
     8           0            1          1          1 [8,9)     
     9          -1            0          0          1 [9,10)     
    10           0           -1          0          0 [10,11)   
    11           0            1          0          1 [11,12)   
    12           1            0          1          1 [12,13)   
    13          -1            0          0          1 [13,14)   
    14           0           -1          0          0 [14,) 

SUM

 sum(left_change) over (order by tst) 

, 1 beeing 0 beeing .

, () ,

is_left = 1 and is_right = 0

() .

:

  • , 0
  • Oracle, PLS Postgres
+1

For completeness: naive method, without the use of interval types. [I used the same sample data as @klin]

CREATE TABLE tleft (
    start TIMESTAMP,
    stop TIMESTAMP,
    payload text
);

INSERT INTO tleft(start,stop) VALUES
-- ('2015-01-08', '2015-03-07'),  ('2015-03-21', '2015-04-14'), ('2015-05-01', '2015-05-15') ;
('2015-01-03', '2015-01-20'), ('2015-01-25', '2015-02-01'), ('2015-02-08', '2015-02-15');

CREATE TABLE tright (
    start TIMESTAMP,
    stop TIMESTAMP,
    payload text
);
INSERT INTO tright(start,stop) VALUES
 -- ('2015-01-01', '2015-01-15'),  ('2015-02-01', '2015-02-14'), ('2015-03-01', '2015-04-07') ;
('2015-01-01', '2015-01-06'), ('2015-01-10', '2015-01-15'), ('2015-01-18', '2015-01-26');

        -- Combine all {start,stop} events into one time series
        -- , encoding the event-type into a state change.
        -- Note: this assumes non-overlapping intervals in both
        -- left and right tables.
WITH zzz AS (
    SELECT stamp, SUM(state) AS state
    FROM (
    SELECT 1 AS state, start AS stamp FROM tleft
        UNION ALL
        SELECT -1 AS state, stop AS stamp FROM tleft
    UNION ALL
        SELECT 2 AS state, start AS stamp FROM tright
        UNION ALL
        SELECT -2 AS state, stop AS stamp FROM tright
        ) zz
    GROUP BY stamp
    )
    -- Reconstruct *all* (sub)intervals
    -- , and calculate a "running sum" over the state variable
SELECT * FROM (
    SELECT zzz.stamp AS zstart
        , LEAD(zzz.stamp) OVER (www) AS zstop
        , zzz.state
        , row_number() OVER(www) AS rn
        , SUM(state) OVER(www) AS sstate
FROM zzz
        WINDOW www AS (ORDER BY stamp)
        ) sub
        -- extract only the (starting) state we are interested in
WHERE sub.sstate = 1
ORDER BY sub.zstart
        ;

Result:

DROP SCHEMA
CREATE SCHEMA
SET
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 3
       zstart        |        zstop        | state | rn | sstate 
---------------------+---------------------+-------+----+--------
 2015-01-06 00:00:00 | 2015-01-10 00:00:00 |    -2 |  3 |      1
 2015-01-15 00:00:00 | 2015-01-18 00:00:00 |    -2 |  5 |      1
 2015-01-26 00:00:00 | 2015-02-01 00:00:00 |    -2 |  9 |      1
 2015-02-08 00:00:00 | 2015-02-15 00:00:00 |     1 | 11 |      1
(4 rows)
0
source

If tsrangenot an option, maybe there is a stored procedure? Something like that:

--create tables
drop table if exists tdate1;
drop table if exists tdate2;

create table tdate1(start timestamp, stop timestamp);
create table tdate2(start timestamp, stop timestamp);

--populate tables
insert into tdate1(start, stop) values('2015-01-01 00:10', '2015-01-01 01:00');
insert into tdate2(start, stop) values('2015-01-01 00:00', '2015-01-01 00:20');
insert into tdate2(start, stop) values('2015-01-01 00:30', '2015-01-01 00:40');
insert into tdate2(start, stop) values('2015-01-01 00:50', '2015-01-01 01:20');
insert into tdate1(start, stop) values('2015-01-01 01:10', '2015-01-01 02:00');
insert into tdate1(start, stop) values('2015-01-01 02:10', '2015-01-01 03:00');

--stored procedure itself
create or replace function tdate_periods(out start timestamp, out stop timestamp)
    returns setof record as
$$
declare
    rec record;
    laststart timestamp = null;
    startdt timestamp = null;
    stopdt timestamp = null;
begin
    for rec in
        select
                t1.start as t1start,
                t1.stop as t1stop,
                t2.start as t2start,
                t2.stop as t2stop
            from tdate1 t1
            left join tdate2 t2 on t2.stop > t1.start or t2.start > t1.stop
    loop
        if laststart <> rec.t1start or laststart is null then
            if laststart is not null then
                if startdt < stopdt then
                    start = startdt;
                    stop = stopdt;
                    return next;

                    startdt = stopdt;
                end if;
            end if;

            startdt = rec.t1start;
            stopdt = rec.t1stop;

            laststart = startdt;
        end if;

        if rec.t2start is not null then
            if startdt < rec.t2start then
                start = startdt;
                stop = rec.t2start;
                return next;
            end if;

            startdt = rec.t2stop;
        end if;
    end loop;

    if startdt is not null and startdt < stopdt then
        start = startdt;
        stop = stopdt;
        return next;
    end if;
end
$$ language plpgsql;

--call
select * from tdate_periods();
0
source

Source: https://habr.com/ru/post/1613621/


All Articles