What is the best way to sum interval data?

Data in a table with arbitrary intervals (NOT date / time !!) is defined as follows:

START float
END float
VALUE varchar(40)

eg.

 START    END    VALUE
 -----    ---    ------
 0        1      Banana
 1        3      Banana
 3        4      Orange
 4        7      Orange
 7        8      Apple
 8        9      Apple
 9       10      Apple
10       15      Apple
20       22      Apple
22       23      Apple
23       28      Banana
28       30      Banana
etc..

How to summarize data so that only one value is indicated during certain intervals. That is, the query result should look like this:

START     END    VALUE
-----     ---    ------
 0        3      Banana
 3        7      Orange
 7       15      Apple
20       23      Apple
23       30      Banana

Pay attention to the gap between 15 and 20 above. I deal with a fairly large amount of data (~ 500 thousand lines), but often do not fulfill the request. Thus, the efficiency is very pleasant. Can this be done without using the cursor?

(Note: using SQL2008R2 therefore cannot use new features if they exist)

Thank!

+4
source share
4 answers
WITH TableWithPreviousAndNext AS (
    SELECT CA1.[Previous]
          ,Table1.[Start]
          ,Table1.[End]
          ,CA2.[Next]
          ,Table1.[Value]
          ,(1 + ROW_NUMBER() OVER (PARTITION BY [Value] ORDER BY Table1.[Start])) / 2 AS [Group]
    FROM Table1
         CROSS APPLY (
             SELECT MAX([End]) AS [Previous]
             FROM Table1 AS InnerTable1
             WHERE InnerTable1.[Value] = Table1.[Value]
                   AND InnerTable1.[Start] < Table1.[Start]
         ) AS CA1
         CROSS APPLY (
             SELECT MIN([Start]) AS Next
             FROM Table1 AS InnerTable1
             WHERE InnerTable1.[Value] = Table1.[Value]
                   AND InnerTable1.[Start] > Table1.[Start]
         ) AS CA2
        CROSS APPLY ( -- A little trick to create a 2 row group for isolated rows
            SELECT 1 AS Dummy
          UNION ALL
            SELECT 1
            WHERE ([Previous] IS NULL OR [Previous] <> [Start])
                  AND ([Next] IS NULL OR [Next] <> [End])
        ) AS CA3
    WHERE [Previous] IS NULL -- Remove all but first and last in sequence
          OR [Next] IS NULL
          OR [Previous] <> [Start]
          OR [End] <> [Next]
)
SELECT MIN([Start])
      ,MAX([End])
      ,[Value]
FROM TableWithPreviousAndNext
GROUP BY [Value]
        ,[Group]
ORDER BY MIN(Start)
+1

:

DECLARE @T TABLE (Start INT, [End] INT, Value VARCHAR(100));
INSERT @T (Start, [End], Value)
VALUES
    (0, 1, 'Banana'), (1, 3, 'Banana'), (3, 4, 'Orange'), (4, 7, 'Orange'),
    (7, 8, 'Apple'), (8, 9, 'Apple'), (9, 10, 'Apple'), (10, 15, 'Apple'), 
    (20, 22, 'Apple'), (22, 23, 'Apple'), (23, 28, 'Banana'), (28, 30, 'Banana');

WITH CTE AS
(   SELECT  t.[Start], 
            t.[End], 
            t.[value], 
            IsStart = ISNULL(c.IsStart, 1)
    FROM    @T AS T
            OUTER APPLY
            (   SELECT  TOP 1 IsStart = 0
                FROM    @T AS T2
                WHERE   T2.Value = T.Value
                AND     T2.[End] = T.Start
            ) AS c
)
SELECT  Value, Start = MIN(Start), [End] = MAX([End])
FROM    CTE AS T
        OUTER APPLY
        (   SELECT  SUM(IsStart)
            FROM    CTE AS T2
            WHERE   T2.Value = T.Value
            AND     T2.Start <= T.Start
        ) g (GroupingSet)
GROUP BY Value, GroupingSet
ORDER BY Start;

- , . :

SELECT  t.[Start], 
        t.[End], 
        t.[value], 
        IsStart = ISNULL(c.IsStart, 1)
FROM    @T AS T
        OUTER APPLY
        (   SELECT  TOP 1 IsStart = 0
            FROM    @T AS T2
            WHERE   T2.Value = T.Value
            AND     T2.[End] = T.Start
        ) AS c

:

Start   End value   IsStart
0       1   Banana  1
1       3   Banana  0
3       4   Orange  1
4       7   Orange  0
7       8   Apple   1
8       9   Apple   0
9       10  Apple   0
10      15  Apple   0
20      22  Apple   1

, , , IsStart, . :

SELECT  *
FROM    CTE AS T
        OUTER APPLY
        (   SELECT  SUM(IsStart)
            FROM    CTE AS T2
            WHERE   T2.Value = T.Value
            AND     T2.Start <= T.Start
        ) g (GroupingSet);

:

Start   End value   IsStart GroupingSet
0       1   Banana  1       1
1       3   Banana  0       1
3       4   Orange  1       1
4       7   Orange  0       1
7       8   Apple   1       1
8       9   Apple   0       1
9       10  Apple   0       1
10      15  Apple   0       1
20      22  Apple   1       2   -- SECOND NON CONTINUOUS RANGE FOR APPLES
22      23  Apple   0       2
23      28  Banana  1       2   -- SECOND NON CONTINUOUS RANGE FOR BANANAS
28      30  Banana  0       2

, .

, ( master..spt_values):

WITH CTE AS
(   SELECT  t.[value], 
            Number = t.Start + v.Number,
            GroupingSet = t.Start + v.Number - ROW_NUMBER() OVER(PARTITION BY t.[value] ORDER BY t.Start + v.Number)
    FROM    @T AS T
            INNER JOIN Master..spt_values v
                ON v.[Type] = 'P'
                AND v.Number < (t.[End] - t.[Start])
)
SELECT  Value, [Start] = MIN(Number), [End] = MAX(Number)
FROM    CTE
GROUP BY GroupingSet, Value;

, , , , / . , , -

+3

...

/, .

:

DECLARE @Fruits TABLE ([Start] FLOAT, [End] FLOAT, Value NVARCHAR(MAX))
INSERT INTO @Fruits
SELECT 0,1,'Banana' UNION
SELECT 1,3,'Banana' UNION
SELECT 3,4,'Orange' UNION
SELECT 4,7,'Orange' UNION
SELECT 7,8,'Apple' UNION
SELECT 8,9,'Apple' UNION
SELECT 9,10,'Apple' UNION
SELECT 10,15,'Apple' UNION
SELECT 20,22,'Apple' UNION
SELECT 22,23,'Apple' UNION
SELECT 23,28,'Banana' UNION
SELECT 28,30,'Banana'

;WITH ExpandCTE AS 
(
    SELECT 1 AS SPLITNUM,
           [End]-Start DURATION,
           Start,
           Start+1 AS [End],
           Value
    FROM @Fruits
    UNION ALL
    SELECT SPLITNUM+1,
           DURATION,
           Start+1 AS Start,
           Start+2 AS [End],
           Value
    FROM ExpandCTE
    WHERE SPLITNUM<DURATION
),
t1 AS
(
    SELECT *,
           START-ROW_NUMBER() OVER(PARTITION BY VALUE ORDER BY START) AS X
    FROM ExpandCTE
)

select MIN(Start) AS Start, MAX([End]) AS [End], Value
from t1
GROUP BY Value, X
ORDER BY Start
+1

SQLServer 2008

WITH I AS (
  SELECT ID = Row_Number() OVER (ORDER BY Start)
       , _Start = [Start]
       , _End = [End]
       , Value
  FROM   Data
), D AS (
  SELECT i.ID, i._Start, i._End, i.Value
       , m.id _id, m.value _value
       , R = CASE WHEN i.Value <> m.Value THEN 1 
                  WHEN m._End <> i._Start THEN 1 
                  ELSE 0 
             END
  FROM   I
         CROSS APPLY (SELECT TOP 1
                             id, _Start, _End, value
                      FROM   I m
                      WHERE  m.ID IN (i.ID, i.ID - 1)
                      ORDER BY ID) m
), B AS (
  SELECT i.ID, i._Start, i._End, i.Value
       , R = SUM(l.R)
  FROM   D i
         LEFT  JOIN D l ON i.id >= l.id
  GROUP BY i.ID, i._Start, i._End, i.Value
)
SELECT [START] = MIN(_Start)
     , [END] = MAX(_End)
     , Value
FROM   B
GROUP BY R, Value
ORDER BY 1

SQLFiddle Demo

CTE I (ID) , , ( JOIN).

CTE D () CROSS APPLY ( ), LAG, , , Value [START] [END] .

CTE B () JOIN D , , .

.

+1

Source: https://habr.com/ru/post/1545702/


All Articles