How to stop the reactor after starting several spiders in the same screening process?

I have several different spiders and want to launch them right away. Based on this and this , I can run multiple spiders in the same process. However, I do not know how to create a signal system to stop the reactor when all the spiders are finished.

I tried:

crawler.signals.connect(reactor.stop, signal=signals.spider_closed)

and

crawler.signals.connect(reactor.stop, signal=signals.spider_idle)

In both cases, the reactor stops when the first tracked mechanism is closed. Of course, I want the reactor to stop after all spiders have completed.

Can someone show me how to do the trick?

+4
source share
1 answer

, , , . , , :

from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy import log, signals
from scrapy.utils.project import get_project_settings

class ReactorControl:

    def __init__(self):
        self.crawlers_running = 0

    def add_crawler(self):
        self.crawlers_running += 1

    def remove_crawler(self):
        self.crawlers_running -= 1
        if self.crawlers_running == 0 :
            reactor.stop()

def setup_crawler(spider_name):
    crawler = Crawler(settings)
    crawler.configure()
    crawler.signals.connect(reactor_control.remove_crawler, signal=signals.spider_closed)
    spider = crawler.spiders.create(spider_name)
    crawler.crawl(spider)
    reactor_control.add_crawler()
    crawler.start()

reactor_control = ReactorControl()
log.start()
settings = get_project_settings()
crawler = Crawler(settings)

for spider_name in crawler.spiders.list():
    setup_crawler(spider_name)

reactor.run()

, Scrapy .

, , !

: . . @Jean-Robert.

+6

Source: https://habr.com/ru/post/1534747/


All Articles