Identify Logically Duplicate Rows in a Table

Here is my input table.

   ID   | Player1 | Player2 | Player3
  ===================================
   101  |   Tom   |  Dick   | Harry
   102  |  Jack   | Martin  | Anna
   103  |  Harry  |  Tom    | Dick

Here we see that for ID 101 and 103 the players are essentially the same, albeit in a different order. Therefore, they should be considered as repeating lines.

How do I somehow identify duplicate lines by writing a query? Something that returns that "ID 101 and ID 103 are duplicates" ??

+4
source share
3 answers
/*
CREATE  TABLE myTest
    (ID INT,
    Player1 VARCHAR(50),
    Player2 VARCHAR(50),
    Player3 VARCHAR(50));
GO

INSERT INTO myTest
VALUES
    (101, 'Tom', 'Dick', 'Harry'),
    (102, 'Jack', 'Martin', 'Anna'),
    (103, 'Harry', 'Tom', 'Dick');
GO
*/
WITH
    Players AS (
        SELECT
            ID,
            Player1 AS Player
        FROM
            myTest
        UNION ALL
        SELECT
            ID,
            Player2 AS Player
        FROM
            myTest
        UNION ALL
        SELECT
            ID,
            Player3 AS Player
        FROM
            myTest),
    PlayersSorted AS (
        SELECT
            *,
            ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Player) AS RowNo
        FROM
            Players)
SELECT DISTINCT
    a.ID
FROM
    PlayersSorted a
JOIN
    PlayersSorted b
        ON a.ID <> b.ID
        AND a.Player = b.Player
        AND a.RowNo = b.RowNo;
+3
source

This should work:

WITH Src AS
(
    SELECT * FROM (VALUES
    (101, 'Tom', 'Dick', 'Harry'),
    (102, 'Jack', 'Martin', 'Anna'),
    (103, 'Harry', 'Tom', 'Dick'),
    (104, 'Dick', 'Tom', 'Harry'),
    (105, 'Anna', 'Martin', 'Jack'),
    (106, 'Anna', 'Martin', 'Joe')
    ) T(ID, Player1, Player2, Player3)
), Numbered AS
(
    SELECT ID, Name, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Name) N
    FROM
    (
        SELECT ID,
            CONVERT(nvarchar(MAX), Player1) Player1,
            CONVERT(nvarchar(MAX), Player2) Player2,
            CONVERT(nvarchar(MAX), Player3) Player3
        FROM Src
    ) T1
    UNPIVOT
    (Name FOR Player IN (Player1, Player2, Player3)) PV
), Grouped AS
(
    SELECT N1.ID, N2.ID DuplicateID
    FROM Numbered N1
    CROSS JOIN Numbered N2
    GROUP BY N1.ID,N2.ID
    HAVING SUM(CASE WHEN N1.N=N2.N AND N1.ID!=N2.ID AND N1.Name=N2.Name THEN 1 END)=MAX(N1.N)
)
SELECT *
FROM Grouped
WHERE ID<DuplicateID
+1
source

3

player1+player2+player3
player2+player3+player1
player3+player1+player2

,

Select sub.id, min(sub.name)
FROM (
    select
         t.id,
         CASE 
            WHEN rows.row=1 THEN t.player1+t.player2+t.player3
            WHEN rows.row=2 THEN t.player2+t.player3+t.player1
            WHEN rows.row=3 THEN t.player3+t.player1+t.player2
         END as name
    from t,
         (select 1 as row
          union
          select 2 as row
          union
          select 3 as row) rows) sub
GROUP by sub.id

,

ID concat_names
1 DickHarryTom
2 AnnaJackMartin
3 DickHarryTom

concat_names,

0

Source: https://habr.com/ru/post/1653658/


All Articles