Find duplicate series in SQL

I have a 3-column table containing a variable number of records based on the first column, which is a foreign key. I am trying to determine if I can detect when there is duplicate for multiple lines for the whole series

declare @finddupseries table
(
    portid  int,
    asset_id int,
    allocation float
)
;
INSERT INTO @finddupseries
SELECT 250, 6, 0.05 UNION ALL
SELECT 250, 66, 0.8 UNION ALL
SELECT 250, 2, 0.105 UNION ALL
SELECT 250, 4, 0.0225 UNION ALL
SELECT 250, 5, 0.0225 UNION ALL
SELECT 251, 13, 0.6 UNION ALL
SELECT 251, 2, 0.3 UNION ALL
SELECT 251, 5, 0.1 UNION ALL
SELECT 252, 13, 0.8 UNION ALL
SELECT 252, 2, 0.15 UNION ALL
SELECT 252, 5, 0.05 UNION ALL
SELECT 253, 13, 0.4 UNION ALL
SELECT 253, 2, 0.45 UNION ALL
SELECT 253, 5, 0.15 UNION ALL
SELECT 254, 6, 0.05 UNION ALL
SELECT 254, 66, 0.8 UNION ALL
SELECT 254, 2, 0.105 UNION ALL
SELECT 254, 4, 0.0225 UNION ALL
SELECT 254, 5, 0.0225

select * from @finddupseries

Entries for portid 250 and 254 match.

Is there a way to write a query to detect this? edit: yes, the whole series should match. In addition, if there was a way to determine which one matches the DID, it would be useful since the actual table had about 10 thousand records.

thank!

+3
source share
2 answers

This query will give you all the values ​​converted to a string grouped by port_id

SELECT fus1.portid, 
(
    SELECT CONVERT (VARCHAR, fus2.asset_id) + CONVERT (VARCHAR, fus2.allocation) + ','
    FROM @finddupseries fus2
    WHERE 1=1
        AND fus1.portid = fus2.portid
    ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
    FOR XML PATH ('')
) AllValuesFromAllRows
FROM @finddupseries fus1
GROUP BY fus1.portid

the output should look like this:

portid      AllValuesFromAllRows
----------- ------------------------------------------------------
250         20.105,40.0225,50.0225,60.05,660.8,
251         20.3,50.1,130.6,
252         20.15,50.05,130.8,
253         20.45,50.15,130.4,
254         20.105,40.0225,50.0225,60.05,660.8,

!

;With DuplicateFinder as
(
    SELECT fus1.portid, 
    (
        SELECT CONVERT (VARCHAR, fus2.asset_id) + CONVERT (VARCHAR, fus2.allocation) + ','
        FROM @finddupseries fus2
        WHERE 1=1
            AND fus1.portid = fus2.portid
        ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
        FOR XML PATH ('')
    ) AllValuesFromAllRows
    FROM @finddupseries fus1
    GROUP BY fus1.portid
)
SELECT AllValuesFromAllRows, COUNT (*) NumDups
FROM DuplicateFinder
GROUP BY AllValuesFromAllRows
Having COUNT (*) > 1

AllValuesFromAllRows                           NumDups
----------------------------------------------- -----------
20.105,40.0225,50.0225,60.05,660.8,             2

:

, ,

SET NOCOUNT ON

declare @finddupseries table
(
    portid  int,
    asset_id int,
    allocation float
)
;
INSERT INTO @finddupseries
SELECT 250, 6, 0.05 UNION ALL
SELECT 250, 66, 0.8 UNION ALL
SELECT 250, 2, 0.105 UNION ALL
SELECT 250, 4, 0.0225 UNION ALL
SELECT 250, 5, 0.0225 UNION ALL
SELECT 251, 13, 0.6 UNION ALL
SELECT 251, 2, 0.3 UNION ALL
SELECT 251, 5, 0.1 UNION ALL
SELECT 252, 13, 0.8 UNION ALL
SELECT 252, 2, 0.15 UNION ALL
SELECT 252, 5, 0.05 UNION ALL
SELECT 253, 13, 0.4 UNION ALL
SELECT 253, 2, 0.45 UNION ALL
SELECT 253, 5, 0.15 UNION ALL
SELECT 254, 6, 0.05 UNION ALL
SELECT 254, 66, 0.8 UNION ALL
SELECT 254, 2, 0.105 UNION ALL
SELECT 254, 4, 0.0225 UNION ALL
SELECT 254, 5, 0.0225

;With PivotAssetIdAndAllocation as
(
    SELECT fus1.portid, 
    (
        SELECT CONVERT (VARCHAR, fus2.asset_id) + '_'+ CONVERT (VARCHAR, fus2.allocation) + '~~'
        FROM @finddupseries fus2
        WHERE 1=1
            AND fus1.portid = fus2.portid
        ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
        FOR XML PATH ('')
    ) AllValuesFromAllRows
    FROM @finddupseries fus1
    GROUP BY fus1.portid
)
,
ListOfDuplicates AS
(
    SELECT AllValuesFromAllRows, COUNT (*) NumDups
    FROM PivotAssetIdAndAllocation
    GROUP BY AllValuesFromAllRows
    Having COUNT (*) > 1
)
SELECT portid, AllValuesFromAllRows
FROM PivotAssetIdAndAllocation
WHERE AllValuesFromAllRows IN (SELECT AllValuesFromAllRows FROM ListOfDuplicates)

-

portid      AllValuesFromAllRows
----------- ----------------------------------------------------------------------
250         2_0.105~~4_0.0225~~5_0.0225~~6_0.05~~66_0.8~~
254         2_0.105~~4_0.0225~~5_0.0225~~6_0.05~~66_0.8~~
+4

:

declare @finddupseries table
(
    portid  int,
    asset_id int,
    allocation float
)
;
INSERT INTO @finddupseries
SELECT 250, 6, 0.05 UNION ALL
SELECT 250, 66, 0.8 UNION ALL
SELECT 250, 2, 0.105 UNION ALL
SELECT 250, 4, 0.0225 UNION ALL
SELECT 250, 5, 0.0225 UNION ALL
SELECT 251, 13, 0.6 UNION ALL
SELECT 251, 2, 0.3 UNION ALL
SELECT 251, 5, 0.1 UNION ALL
SELECT 252, 13, 0.8 UNION ALL
SELECT 252, 2, 0.15 UNION ALL
SELECT 252, 5, 0.05 UNION ALL
SELECT 253, 13, 0.4 UNION ALL
SELECT 253, 2, 0.45 UNION ALL
SELECT 253, 5, 0.15 UNION ALL
SELECT 254, 6, 0.05 UNION ALL
SELECT 254, 66, 0.8 UNION ALL
SELECT 254, 2, 0.105 UNION ALL
SELECT 254, 4, 0.0225 UNION ALL
SELECT 254, 5, 0.0225 UNION ALL
SELECT 255, 13, 0.6 UNION ALL
SELECT 255, 2, 0.3 UNION ALL
SELECT 255, 5, 0.1 

;with cteGetDupes
as
(
    SELECT row_number() over (partition by asset_id, allocation 
                        order by portid desc, asset_id desc, allocation desc)  
           AS RNDesc
           , row_number() over (partition by asset_id, allocation 
                          order by portid, asset_id, allocation) 
             AS RNAsc
           , *
    FROM   @finddupseries
)
SELECT  portid, asset_id, allocation
FROM    cteGetDupes
WHERE   RNDesc - RNAsc != 0
order by portid, asset_id, allocation 
0

Source: https://habr.com/ru/post/1783888/


All Articles