Removing duplicate words from a column cell using SQL

Remove duplicate words from a column cell, for example:

A data column is one field: ' BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'

But I want this: "BLACKHEATH" should be only once, if any other entry is found, it should be deleted

BLACKHEATH COLCHESTER CO2 0AA

I can do this with code, but I don't know how to remove duplicate words through SQL in SQL Server. I was instructed to do this through SQL. Any help would be appreciated.

+5
source share
3 answers
declare @text varchar(max) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'
declare @i int = 1;
declare @nextCharacter varchar(max)
declare @word varchar(max)=''

declare @lastChar varchar(1) = substring(@text,len(@text),1)
--select @lastChar

declare @lastIndex varchar(max) = charindex(@lastChar,@text,len(@text))
select @lastIndex
--select len(@text)

create table #tmp (id int,word varchar(max))

while (@i <= len(@text))
begin

    select @nextCharacter= substring(@text, @i, 1)
    --select @nextCharacter,@i, @lastChar, len(@text)

    if (@nextCharacter !=' ')
        begin       
            select @word = @word + @nextCharacter
        end

     else
        begin
            insert into #tmp
            values(@i,@word)
            set @word=''
        end
         if (@i = len(@text) and @nextCharacter= @lastChar)
        begin
        insert into #tmp
            values(@i,@word)                    
        end

    set @i = @i +1

end;

select id,word from #tmp order by id;

WITH tblTemp as
(
SELECT ROW_NUMBER() Over(PARTITION BY word ORDER BY id)
   As RowNumber,* FROM #tmp
) --select * from tblTemp
select * into #tmp2 FROM tblTemp where RowNumber =1 

declare @newString varchar(max)=''
select @newString = @newString +word+' ' from #tmp2 order by id 

select rtrim(@newString)

drop table #tmp2
drop table #tmp
+2
source

If the order doesn't matter, you can do it pretty easily:

DECLARE @string VARCHAR(100) = 'BLACKHEATH 0AA  BLACKHEATH COLCHESTER CO2 0AA';

SELECT @string AS Source
    , LTRIM((
            SELECT DISTINCT ' ' + column1 AS [text()]
            FROM dbo.SplitString(@string, ' ')
            FOR XML PATH('')
            )) AS UniqueWords;

What is the idea here?

  1. dbo.SplitString (space ). . , SplitString, .
  2. DISTINCT
  3. FOR XML PATH('') .

:

╔═══════════════════════════════════════════════╦═══════════════════════════════╗
β•‘                    Source                     β•‘          UniqueWords          β•‘
╠═══════════════════════════════════════════════╬═══════════════════════════════╣
β•‘ BLACKHEATH 0AA  BLACKHEATH COLCHESTER CO2 0AA β•‘ 0AA BLACKHEATH CO2 COLCHESTER β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•©β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•

, , (, ROW_NUMBER()) ( ), , . , :

╔═══════════╦═══════════╦════════════╗
β•‘ WordOrder β•‘ TermOrder β•‘    Term    β•‘
╠═══════════╬═══════════╬════════════╣
β•‘         1 β•‘         1 β•‘ BLACKHEATH β•‘
β•‘         2 β•‘         1 β•‘ 0AA        β•‘
β•‘         3 β•‘         2 β•‘ BLACKHEATH β•‘
β•‘         4 β•‘         1 β•‘ COLCHESTER β•‘
β•‘         5 β•‘         1 β•‘ CO2        β•‘
β•‘         6 β•‘         2 β•‘ 0AA        β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•©β•β•β•β•β•β•β•β•β•β•β•β•©β•β•β•β•β•β•β•β•β•β•β•β•β•

(@Splitted - ):

SELECT @string AS Source
    , LTRIM((
            SELECT ' ' + Term AS [text()]
            FROM @Splitted
            WHERE TermOrder = 1
            ORDER BY WordOrder
            FOR XML PATH('')
            )) AS UniqueWords;

:

╔═══════════════════════════════════════════════╦═══════════════════════════════╗
β•‘                    Source                     β•‘          UniqueWords          β•‘
╠═══════════════════════════════════════════════╬═══════════════════════════════╣
β•‘ BLACKHEATH 0AA  BLACKHEATH COLCHESTER CO2 0AA β•‘ BLACKHEATH 0AA COLCHESTER CO2 β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•©β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•

PS SQL? SQL Server , , .

+4
declare @text varchar(max) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'
declare @i int = 1;
declare @nextCharacter varchar(max)
declare @word varchar(max)=''

declare @lastChar varchar(1) = substring(@text,len(@text),1)
--select @lastChar

declare @lastIndex varchar(max) = charindex(@lastChar,@text,len(@text))
select @lastIndex
--select len(@text)

create table #tmp (id int,word varchar(max))

while (@i <= len(@text))
begin

    select @nextCharacter= substring(@text, @i, 1)
    --select @nextCharacter,@i, @lastChar, len(@text)

    if (@nextCharacter !=' ')
        begin       
            select @word = @word + @nextCharacter
        end

     else
        begin
            insert into #tmp
            values(@i,@word)
            set @word=''
        end
         if (@i = len(@text) and @nextCharacter= @lastChar)
        begin
        insert into #tmp
            values(@i,@word)                    
        end

    set @i = @i +1

end;

--select id,word from #tmp order by id;

WITH tblTemp as
(
SELECT ROW_NUMBER() Over(PARTITION BY word ORDER BY id)
   As RowNumber,* FROM #tmp
) --select * from tblTemp
select * into #tmp2 FROM tblTemp where RowNumber =1 

declare @newString varchar(max)=''
select @newString = @newString +word+' ' from #tmp2 order by id 

select rtrim(@newString) as FromAddress

drop table #tmp2
drop table #tmp

Remove the identifier in the select clause.

-2
source

Source: https://habr.com/ru/post/1612452/


All Articles