Group by sql query by comma in column

My table structure is similar below, the Mail column may contain multiple e-mail messages, connected by comma

Data (Intermediate)

Mail (VARCHAR (200))

[Data] [Mail]


1 m1 @ gmail.com, m2 @ hotmail.com

2 m2 @ hotmail.com, @ m3 test.com

& I need to create a report as shown below, counting each row for each email address.

[Mail]                       [Count]


m1@gmail.com         1

m2@hotmail.com     2

m3@test.com           1

So what will be the sql (server) query to create as above? Also I can not change the structure of the table.

+2
6

, CHARINDEX XML CTE.

create table #tmp ([Data] int, [Mail] varchar(200))
insert #tmp SELECT 1,'m1@gmail.com,m2@hotmail.com,other, longer@test, fifth'
UNION ALL   SELECT 2,'m2@hotmail.com,m3@test.com'
UNION ALL   SELECT 3,'m3@single.com'
UNION ALL   SELECT 4,''
UNION ALL   SELECT 5,null

select single, count(*) [Count]
from
(
    select ltrim(rtrim(substring(t.mail, v.number+1,
        isnull(nullif(charindex(',',t.mail,v.number+1),0)-v.number-1,200)))) single
    from #tmp t
    inner join master..spt_values v on v.type='p'
        and v.number <= len(t.Mail)
        and (substring(t.mail,v.number,1) = ',' or v.number=0)
) X
group by single

, ,

  • #tmp:
  • #mail:
+1

SQL Server

WITH T ([Data], [Mail])
     AS (SELECT 1,'m1@gmail.com,m2@hotmail.com' UNION ALL
         SELECT 2,'m2@hotmail.com,m3@test.com')
SELECT address  AS Mail,
       COUNT(*) AS [Count]
FROM   T
       CROSS APPLY (SELECT CAST('<m>' + REPLACE([Mail], ',', '</m><m>') + '</m>'
                                AS XML
                           ) AS x) ca1
       CROSS APPLY (SELECT T.split.value('.', 'varchar(200)') AS address
                    FROM   x.nodes('/m') T(split)) ca
GROUP  BY address  
+3

SQL Server cte.

declare @Mail table (ID int, Mail varchar(200))

insert into @Mail values
(1, 'm1@gmail.com,m2@hotmail.com'),
(2, 'm2@hotmail.com,m3@test.com'),
(3, 'm2@hotmail.com')

;with cte1 as
(
  select Mail+',' as Mail
  from @Mail
),
cte2
as
(
  select
    left(Mail, charindex(',', Mail)-1) as Mail1,
    right(Mail, len(Mail)-charindex(',', Mail)) as Mail
  from cte1
  union all
  select
    left(Mail, charindex(',', Mail)-1) as Mail1,
    right(Mail, len(Mail)-charindex(',', Mail)) as Mail
  from cte2
  where charindex(',', Mail) > 1
)
select
  Mail1 as Mail,
  count(*) as [Count]
from cte2
group by Mail1

1 , , , Mail

+2

...
- "" LEN,
- UNION , 0 CHARINDEX NULL

, .


CROSS APPLY , SELECT , NULLIF (CHARINDEX) .


WITH
  source (
    Data,
    Mail
  )
AS
(
  SELECT 1,'m1@gmail.com,m2@hotmail.com' UNION ALL
  SELECT 2,'m2@hotmail.com,m3@test.com'
)
,
  split_cte
AS
(
  SELECT
    LEFT (mail, ISNULL(comma - 1, LEN(mail)))     AS "current_mail",
    RIGHT(mail, ISNULL(LEN(mail) - comma, 0))     AS "mail_data",
    ISNULL(LEN(mail) - comma, 0)                  AS "chars"
  FROM
    source
  CROSS APPLY
    (SELECT NULLIF(CHARINDEX(',', mail), 0) AS "comma") AS search

  UNION ALL

  SELECT
    LEFT (mail_data, ISNULL(comma - 1, chars))    AS "current_mail",
    RIGHT(mail_data, ISNULL(chars - comma, 0))    AS "mail_data",
    ISNULL(chars - comma, 0)                      AS "chars"
  FROM
    split_cte
  CROSS APPLY
    (SELECT NULLIF(CHARINDEX(',', mail_data), 0) AS "comma") AS search
  WHERE
    chars > 0
)

SELECT
  current_mail     AS "Mail",
  COUNT(*)         AS "Count"
FROM
  split_cte
GROUP BY
  current_mail
+1

. -, . , , " ". , , , .

. , , , , . - , , , . , , , , , , . , , , , .

+1

, :

Fill in the table with some data

create table tmp1 ([Data] int, [Mail] varchar(200))
insert tmp1 SELECT 1,'m1@gmail.com,m2@hotmail.com,other, longer@test, fifth'
UNION ALL   SELECT 2,'m2@hotmail.com,m3@test.com'
UNION ALL   SELECT 3,'m3@single.com'
UNION ALL   SELECT 4,''
UNION ALL   SELECT 5,null

insert tmp1
select data*10000 + number, mail
from tmp1, master..spt_values v
where v.type='P'

-- total rows: 10245

Test request:

set statistics io on
set statistics time on

dbcc dropcleanbuffers dbcc freeproccache

select single, count(*) [Count]
from
(
    select ltrim(rtrim(substring(t.mail, v.number+1,
        isnull(nullif(charindex(',',t.mail,v.number+1),0)-v.number-1,200)))) single
    from tmp1 t
    inner join master..spt_values v on v.type='p'
        and v.number <= len(t.Mail)
        and (substring(t.mail,v.number,1) = ',' or v.number=0)
) X
group by single

dbcc dropcleanbuffers dbcc freeproccache

;with cte1 as
(
  select Mail+',' as Mail
  from tmp1
),
cte2
as
(
  select
    left(Mail, charindex(',', Mail)-1) as Mail1,
    right(Mail, len(Mail)-charindex(',', Mail)) as Mail
  from cte1
  union all
  select
    left(Mail, charindex(',', Mail)-1) as Mail1,
    right(Mail, len(Mail)-charindex(',', Mail)) as Mail
  from cte2
  where charindex(',', Mail) > 1
)
select
  Mail1 as Mail,
  count(*) as [Count]
from cte2
group by Mail1

dbcc dropcleanbuffers dbcc freeproccache

--SET ANSI_DEFAULTS ON
--SET ANSI_NULLS ON
;
SELECT address  AS Mail,
       COUNT(*) AS [Count]
FROM   tmp1
       CROSS APPLY (SELECT CAST('<m>' + REPLACE([Mail], ',', '</m><m>') + '</m>'
                                AS XML
                           ) AS x) ca1
       CROSS APPLY (SELECT T.split.value('.', 'varchar(200)') AS address
                    FROM   x.nodes('/m') T(split)) ca
GROUP  BY address  

Statistics

Run a few times to feel the average

Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'spt_values'. Scan count 8196, logical reads 26637, physical reads 2, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'tmp1'. Scan count 3, logical reads 43, physical reads 0, read-ahead reads 14, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

 SQL Server Execution Times:
   CPU time = 641 ms,  elapsed time = 412 ms.

Table 'Worktable'. Scan count 2, logical reads 103271, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'tmp1'. Scan count 1, logical reads 43, physical reads 0, read-ahead reads 14, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

 SQL Server Execution Times:
   CPU time = 609 ms,  elapsed time = 614 ms.

Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'tmp1'. Scan count 3, logical reads 43, physical reads 0, read-ahead reads 14, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

 SQL Server Execution Times:
   CPU time = 2798 ms,  elapsed time = 1421 ms.

Table 'Worktable'. Scan count 2, logical reads 103334, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'tmp1'. Scan count 1, logical reads 43, physical reads 0, read-ahead reads 14, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

 SQL Server Execution Times:
   CPU time = 734 ms,  elapsed time = 742 ms.

Summary

First (CHARINDEX) : processor time = 344 ms, elapsed time = 198 ms.
Second (CTE) : processor time = 594 ms, elapsed time = 613 ms.
Third (XML) : processor time = 2812 ms, elapsed time = 1418 ms.
Fourth (CTE2) : processor time = 719 ms, elapsed time = 750 ms.

0
source

Source: https://habr.com/ru/post/1795534/


All Articles