You mean scrapy.spiders.Rulewhich is most often used inscrapy.CrawlSpider
They do pretty much what the names say or in other words, which act as a kind of middleware between the moments when the link is extracted and processed / loaded.
process_links , . , , :
:
def process_links(self, link):
for link in links:
if 'foo' in link.text:
continue
yield link
link.url = link.url + '/'
yield link
process_requests , , . process_links, , :
:
def process_req(self, req):
req = req.replace(headers={'Cookie':'foobar'})
return req
if 'foo' in req.url:
return req.replace(callback=self.parse_foo)
elif 'bar' in req.url:
return req.replace(callback=self.parse_bar)
return req
, , , .
source
share