Date range with minimum and maximum dates from a dataset with records with a continuous date range

I have a data set with an identifier, status and date range for employees.
The introductory dataset below is the details of one employee.
The date ranges in the records are continuous (in exact order), so the start line of the second line will be the next end date of the first line.

If an employee leaves continuously for several months, then the table stores information with a date range divided into different months.
For example: in the input set, the employee took the leave from 16 to 10th 2016 to "31-12-2016" and joined the "1-1-2017".
So there are 3 entries for this item, but the dates are continuous. On output, I need this as a single record, as shown in the expected output dataset.

INPUT

Id  Status   StartDate   EndDate

1   Active   1-9-2007    15-10-2016
1   Sick     16-10-2016  31-10-2016
1   Sick     1-11-2016   30-11-2016
1   Sick     1-12-2016   31-12-2016
1   Active   1-1-2017    4-2-2017  
1   Unpaid   5-2-2017    9-2-2017  
1   Active   10-2-2017   11-2-2017 
1   Unpaid   12-2-2017   28-2-2017 
1   Unpaid   1-3-2017    31-3-2017 
1   Unpaid   1-4-2017    30-4-2017 
1   Active   1-5-2017    13-10-2017
1   Sick     14-10-2017  11-11-2017
1   Active   12-11-2017  NULL   

EXPECTED EXIT

Id   Status    StartDate    EndDate

1    Active    1-9-2007     15-10-2016
1    Sick      16-10-2016   31-12-2016
1    Active    1-1-2017     4-2-2017  
1    Unpaid    5-2-2017     9-2-2017  
1    Active    10-2-2017    11-2-2017 
1    Unpaid    12-2-2017    30-4-2017 
1    Active    1-5-2017     13-10-2017
1    Sick      14-10-2017   11-11-2017
1    Active    12-11-2017   NULL  

I can’t take the min (startdate) and max (EndDate) groups by id, status, because if the same employee took another sick leave, then the end date ('11 -11-2017 'in the example) comes as a date graduation.

can someone help me with a query on SQL Server 2014?

+4
source
4

GROUPING AND WINDOW.

  • reset
  • / .

;with x as
(
    select Id, Status, StartDate,  EndDate,
           iif (lag(Status) over (order by Id, StartDate) = Status, null, 1) rst
    from   emp
), y as
 (
    select Id, Status, StartDate, EndDate,
           sum(rst) over (order by Id, StartDate) grp
    from   x
 )

 select Id, 
        MIN(Status) as Status, 
        MIN(StartDate) StartDate, 
        MAX(EndDate) EndDate
 from   y
 group by Id, grp
 order by Id, grp

GO
Id | Status | StartDate           | EndDate            
-: | :----- | :------------------ | :------------------
 1 | Active | 01/09/2007 00:00:00 | 15/10/2016 00:00:00
 1 | Sick   | 16/10/2016 00:00:00 | 31/12/2016 00:00:00
 1 | Active | 01/01/2017 00:00:00 | 04/02/2017 00:00:00
 1 | Unpaid | 05/02/2017 00:00:00 | 09/02/2017 00:00:00
 1 | Active | 10/02/2017 00:00:00 | 11/02/2017 00:00:00
 1 | Unpaid | 12/02/2017 00:00:00 | 30/04/2017 00:00:00
 1 | Active | 01/05/2017 00:00:00 | 13/10/2017 00:00:00
 1 | Sick   | 14/10/2017 00:00:00 | 11/11/2017 00:00:00
 1 | Active | 12/11/2017 00:00:00 | null               

dbfiddle

+1

, , . , .

( ):

DECLARE @T AS TABLE
(
    Id int,
    Status varchar(10),
    StartDate date,
    EndDate date
);

SET DATEFORMAT DMY; -- This is needed because how you specified your dates.

INSERT INTO @T (Id, Status, StartDate, EndDate) VALUES
(1, 'Active', '1-9-2007', '15-10-2016'),
(1, 'Sick', '16-10-2016', '31-10-2016'),
(1, 'Sick', '1-11-2016', '30-11-2016'),
(1, 'Sick', '1-12-2016', '31-12-2016'),
(1, 'Active', '1-1-2017', '4-2-2017'),
(1, 'Unpaid', '5-2-2017', '9-2-2017'),
(1, 'Active', '10-2-2017', '11-2-2017'),
(1, 'Unpaid', '12-2-2017', '28-2-2017'),
(1, 'Unpaid', '1-3-2017', '31-3-2017'),
(1, 'Unpaid', '1-4-2017', '30-4-2017'),
(1, 'Active', '1-5-2017', '13-10-2017'),
(1, 'Sick', '14-10-2017', '11-11-2017'),
(1, 'Active', '12-11-2017',  NULL);

() :

;WITH CTE AS
(
SELECT  Id, 
        Status, 
        StartDate, 
        EndDate,
        ROW_NUMBER() OVER(PARTITION BY Id ORDER BY StartDate)
        - ROW_NUMBER() OVER(PARTITION BY Id, Status ORDER BY StartDate) As IslandId,
        ROW_NUMBER() OVER(PARTITION BY Id ORDER BY StartDate DESC)
        - ROW_NUMBER() OVER(PARTITION BY Id, Status ORDER BY StartDate DESC) As ReverseIslandId
FROM @T
)

():

SELECT  DISTINCT Id,
        Status,
        MIN(StartDate) OVER(PARTITION BY IslandId, ReverseIslandId) As StartDate,
        NULLIF(MAX(ISNULL(EndDate, '9999-12-31')) OVER(PARTITION BY IslandId, ReverseIslandId), '9999-12-31') As EndDate

FROM CTE 
ORDER BY StartDate

() :

Id  Status  StartDate   EndDate
1   Active  01.09.2007  15.10.2016
1   Sick    16.10.2016  31.12.2016
1   Active  01.01.2017  04.02.2017
1   Unpaid  05.02.2017  09.02.2017
1   Active  10.02.2017  11.02.2017
1   Unpaid  12.02.2017  30.04.2017
1   Active  01.05.2017  13.10.2017
1   Sick    14.10.2017  11.11.2017
1   Active  12.11.2017  NULL

.

, SQL ISO 8601 - yyyy-MM-dd, yyyyMMdd, SQL Server.

+3

, LAG.

:

DECLARE @table TABLE (Id INT, [Status] VARCHAR(50), StartDate DATE, EndDate DATE);
INSERT INTO @table SELECT 1, 'Active', '20070901', '20161015';
INSERT INTO @table SELECT 1, 'Sick', '20161016', '20161031';
INSERT INTO @table SELECT 1, 'Sick', '20161101', '20161130';
INSERT INTO @table SELECT 1, 'Sick', '20161201', '20161231';
INSERT INTO @table SELECT 1, 'Active', '20170101', '20170204';
INSERT INTO @table SELECT 1, 'Unpaid', '20170205', '20170209';
INSERT INTO @table SELECT 1, 'Active', '20170210', '20170211';
INSERT INTO @table SELECT 1, 'Unpaid', '20170212', '20170228';
INSERT INTO @table SELECT 1, 'Unpaid', '20170301', '20170331';
INSERT INTO @table SELECT 1, 'Unpaid', '20170401', '20170430';
INSERT INTO @table SELECT 1, 'Active', '20170501', '20171013';
INSERT INTO @table SELECT 1, 'Sick', '20171014', '20171111';
INSERT INTO @table SELECT 1, 'Active', '20171112', NULL;

:

WITH add_order AS (
    SELECT
        *,
        ROW_NUMBER() OVER (ORDER BY StartDate) AS order_id
    FROM
        @table),
links AS (
    SELECT
        a1.Id,
        a1.[Status],
        a1.order_id,
        MIN(a1.order_id) AS start_order_id,
        MAX(ISNULL(a2.order_id, a1.order_id)) AS end_order_id,
        MIN(a1.StartDate) AS StartDate,
        MAX(ISNULL(a2.EndDate, a1.EndDate)) AS EndDate
    FROM
        add_order a1
        LEFT JOIN add_order a2 ON a2.Id = a1.Id AND a2.[Status] = a1.[Status] AND a2.order_id = a1.order_id + 1 AND a2.StartDate = DATEADD(DAY, 1, a1.EndDate)
    GROUP BY
        a1.Id,
        a1.[Status],
        a1.order_id),
merged AS (
    SELECT
        l1.Id,
        l1.[Status],
        l1.[StartDate],
        ISNULL(l2.EndDate, l1.EndDate) AS EndDate,
        ROW_NUMBER() OVER (PARTITION BY l1.Id, l1.[Status], ISNULL(l2.EndDate, l1.EndDate) ORDER BY l1.order_id) AS link_id
    FROM
        links l1
        LEFT JOIN links l2 ON l2.order_id = l1.end_order_id)
SELECT
    Id,
    [Status],
    StartDate,
    EndDate
FROM
    merged
WHERE
    link_id = 1
ORDER BY
    StartDate;

:

Id  Status  StartDate   EndDate
1   Active  2007-09-01  2016-10-15
1   Sick    2016-10-16  2016-12-31
1   Active  2017-01-01  2017-02-04
1   Unpaid  2017-02-05  2017-02-09
1   Active  2017-02-10  2017-02-11
1   Unpaid  2017-02-12  2017-04-30
1   Active  2017-05-01  2017-10-13
1   Sick    2017-10-14  2017-11-11
1   Active  2017-11-12  NULL

? , . , , , , , , , CTE. , , , , LEFT JOIN ISNULL.

, , CTE, , link_id 1:

Id  Status  StartDate   EndDate link_id
1   Active  2007-09-01  2016-10-15  1
1   Sick    2016-10-16  2016-12-31  1
1   Sick    2016-11-01  2016-12-31  2
1   Sick    2016-12-01  2016-12-31  3
1   Active  2017-01-01  2017-02-04  1
1   Unpaid  2017-02-05  2017-02-09  1
1   Active  2017-02-10  2017-02-11  1
1   Unpaid  2017-02-12  2017-04-30  1
1   Unpaid  2017-03-01  2017-04-30  2
1   Unpaid  2017-04-01  2017-04-30  3
1   Active  2017-05-01  2017-10-13  1
1   Sick    2017-10-14  2017-11-11  1
1   Active  2017-11-12  NULL        1
+1

lag() lead() ,

WITH CTE AS
(
    select  *, 
            COALESCE(LEAD(status) OVER(ORDER BY (select 1)), '0') Nstatus,
            COALESCE(LAG(status) OVER(ORDER BY (select 1)), '0') Pstatus
   from table
)

SELECT * FROM CTE
WHERE (status <> Nstatus AND status <> Pstatus) OR
      (status <> Pstatus) 
0

Source: https://habr.com/ru/post/1694221/


All Articles