Select rows that exist in another table - how to improve performance?

I have three tables in SQL Server 2008 R2:

PRODUCTS (id int, title varchar(100), ....)  
WORDS (id int,word varchar(100) )  
WORDS_IN_TITLES (product_id int, word_id int)  

and now I want to select all products that have specific words used in its name.

Now I do this:

declare  @words tp_intList  
insert into @words values(154)  
insert into @words values(172)  
declare @wordsCnt int = (select count(*) from @words)    

select * from products where id IN  
(
 select product_id from WORDS_IN_TITLES inner join 
 (select id from @words) wrds ON wrds.id=WORDS_IN_TITLES.word_id 
 group by product_id HAVING count(*)=@wordsCnt
)  

It works, but it is slow. The table contains 600 thousand rows, and the returned 3,5 thousand rows take about 4 seconds. I need it to go well below 1 second. How to increase productivity?

+3
source share
2 answers

It doesn't look like your request could be significantly improved.

, 600 . 600 . word_in_titles. 2 , , 3 10 , .

. words_in_titles (word_id)

create table products (id int identity primary key clustered, title varchar(100))
insert into products
select convert(varchar(max),NEWID())
from master..spt_values a
inner join master..spt_values b on b.type='p' and b.number between 0 and 999
where a.type='P' and a.number between 0 and 600

create table words_in_titles (product_id int, word_id int,
    primary key clustered(product_id, word_id))
insert words_in_titles
select distinct a,b
from
(
select floor(convert(bigint,convert(varbinary(max),newid())) % 60000) a, floor(convert(bigint,convert(varbinary(max),newid())) % 1000) b
from master..spt_values a
inner join master..spt_values b on b.type='p' and b.number between 0 and 999
where a.type='P' and a.number between 0 and 600
) x

create index ix_words_in_titles on words_in_titles(word_id)

. SET STATISTICS . ( - ). DBCC , @clean 1 0 , .

declare @clean bit set @clean = 1
if(@clean=1) exec ('dbcc dropcleanbuffers dbcc freeproccache')

set statistics io off
set statistics time off

-- pick two random word_id as generated (@word1 and @word2 used below)
declare @word1 int, @word2 int
select top 1 @word1 = word_id from words_in_titles order by NEWID()
select top 1 @word2 = word_id from words_in_titles where word_id <> @word1 order by NEWID()

declare  @words table (id int)  
insert into @words values(@word1)  
insert into @words values(@word2)  
declare @wordsCnt int = (select count(*) from @words)    

set statistics io on
set statistics time on

if(@clean=1) exec ('dbcc dropcleanbuffers dbcc freeproccache')

select *
from
(
select w.product_id
from words_in_titles w
where w.word_id = @word1
  and exists (select * from words_in_titles t where t.word_id=@word2 and t.product_id=w.product_id)
  -- expand with more EXISTS clauses
) q inner join products p on p.id = q.product_id

if(@clean=1) exec ('dbcc dropcleanbuffers dbcc freeproccache')

select *
from
(
select w1.product_id
from words_in_titles w1
where w1.word_id = @word1
intersect
select w2.product_id
from words_in_titles w2
where w2.word_id = @word2
) q inner join products p on p.id = q.product_id

if(@clean=1) exec ('dbcc dropcleanbuffers dbcc freeproccache')

select * from products where id IN  
(
 select product_id from WORDS_IN_TITLES inner join 
 (select id from @words) wrds ON wrds.id=WORDS_IN_TITLES.word_id 
 group by product_id HAVING count(*)=@wordsCnt
)

if(@clean=1) exec ('dbcc dropcleanbuffers dbcc freeproccache')

select products.*
    from products
        inner join (select p.id
                        from products p
                            inner join words_in_titles wit
                                on p.id = wit.product_id
                        where wit.word_id in (@word1,@word2)
                        group by p.id
                        having count(distinct wit.word_id) = 2) q
           on products.id = q.id
  • , , .
  • words_in_titles , _,
  • words_in_titles , tempdb /.
  • Joe .

. , Table SQL Server Execution Times . 4 , .

Table 'products'. Scan count 0, logical reads 30, physical reads 0, read-ahead reads 51, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'words_in_titles'. Scan count 2, logical reads 8, physical reads 2, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#4D5F7D71'. Scan count 1, logical reads 1, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

 SQL Server Execution Times:
   CPU time = 0 ms,  elapsed time = 47 ms.

( , ).
, , , - , , () .

, .

+1
select products.*
    from products
        inner join (select p.id
                        from products p
                            inner join words_in_titles wit
                                on p.id = wit.product_id
                        where wit.word_id in (154,172)
                        group by p.id
                        having count(distinct wit.word_id) = 2) q
           on products.id = q.id
+2

Source: https://habr.com/ru/post/1792333/


All Articles