, , . , SQL, .
1 - . , -
ID, Start_Date, End_Date
1, 2010-01-01, 2010-01-31
1, 2010-02-01, 2010-02-28
-
ID, Start_Date, End_Date
1, 2010-01-01, 2010-02-28.
, ,
WITH Cte_recomb (Id, Start_date, End_date, Hopcount) AS
(SELECT Id,
Start_date,
End_date,
1 AS Hopcount
FROM Table1
UNION ALL
SELECT Cte_recomb.Id,
Cte_recomb.Start_date,
Table1.End_date,
(Recomb.Hopcount + 1) AS Hopcount
FROM Cte_recomb, Table1
WHERE (Cte_recomb.Id = Table1.Id) AND
(Cte_recomb.End_date + 1 day = Table1.Start_date)),
Cte_maxenddate AS
(SELECT Id,
Start_date,
Max (End_date) AS End_date
FROM Cte_recomb
GROUP BY Id, Start_date
ORDER BY Id, Start_date)
SELECT Maxend.*
FROM Cte_maxenddate AS Maxend
LEFT JOIN
Cte_recomb AS Nextrec
ON (Nextrec.Id = Maxend.Id) AND
(Nextrec.Start_date < Maxend.Start_date) AND
(Nextrec.End_date >= Maxend.End_date)
WHERE Nextrec.Id IS NULL;
2 -
I created another dataset that created a record for each overlap between two datasets. You will need an additional step to search for cases when the specified record in table 1 does not have a corresponding record in table 2 at all.
SELECT Table1.Id,
Table1.Start_date AS Table1_start_date,
Table1.End_date AS Table1_end_date,
Table2.Start_date AS Table2_start_date,
Table2.End_date AS Table2_end_date
FROM Table1
INNER JOIN
Table2
ON (Table1.Plcy_id_sk = Id) AND
( (Table1.Start_date BETWEEN Table2.Start_date AND Table2.End_date) OR
(Table2.Start_date BETWEEN Table1.Start_date AND Table1.End_date)) AND
( (Table1.Start_date <> Table2.Start_date) OR
(Table1.End_date <> Table2.End_date))
ORDER BY Table1.Id, Table1.Start_date, Table2.Start_date;
Step 3 -
I take the above dataset and run the next SAS job. I tried to do this in pure SQL with recursive queries, but it became more and more ugly and ugly every time I looked at it.
Data Table1_Gaps;
Set Table1_Compare;
By ID Table1_Start_Date Table2_Start_Date;
format Gap_Start_Date yymmdd10.;
format Gap_End_Date yymmdd10.;
format Old_Start_Date yymmdd10.;
format Old_End_Date yymmdd10.;
Retain Old_Start_Date Old_End_Date;
IF (Table2_End_Date = .) then do;
Gap_Start_Date = Table1_Start_Date;
Gap_End_Date = Table1_End_Date;
output;
end;
else do;
If (Table2_Start_Date > Table1_Start_Date) then do;
if first.Table1_Start_Date then do;
Gap_Start_Date = Table1_Start_Date;
Gap_End_Date = Table2_Start_Date - 1;
output;
end;
else do;
Gap_Start_Date = Old_End_Date + 1;
Gap_End_Date = Table2_Start_Date - 1;
output;
end;
end;
If (Table2_End_Date < Table1_End_Date) then do;
if Last.Table1_Start_Date then do;
Gap_Start_Date = Table2_End_Date + 1;
Gap_End_Date = Table1_End_Date;
output;
end;
end;
end;
Old_Start_Date = Table2_Start_Date;
Old_End_Date = Table2_End_Date;
drop Old_Start_Date Old_End_Date;
run;
I have not yet fully confirmed this, but this approach seems to have given me the results I wanted. Any thoughts?