Link http://crawl.blog/scrapy-loop/
import scrapy from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from twisted.internet import reactor from twisted.internet.task import deferLater def sleep(self, *args, seconds): """Non blocking sleep callback""" return deferLater(reactor, seconds, lambda: None) process = CrawlerProcess(get_project_settings()) def _crawl(result, spider): deferred = process.crawl(spider) deferred.addCallback(lambda results: print('waiting 100 seconds before restart...')) deferred.addCallback(sleep, seconds=100) deferred.addCallback(_crawl, spider) return deferred _crawl(None, MySpider) process.start()
source share