@paul trmbrth, -, . Scrapy , . , link 'a' html javascript:
<a href='javascript:gtm.traceProductClick("/en-sa/mobiles/smartphones/samsung-galaxy-s7-32gb-dual-sim-lte-gold-188024">
, :
def _process_value(value):
m = re.search('javascript:gtm.traceProductClick\("(.*?)"', value)
if m:
return m.group(1)
rules = (
Rule(LinkExtractor(restrict_css=('.resultspagenum'))),
Rule(LinkExtractor(
allow=('\/mobiles\/smartphones\/[a-zA-Z0-9_.-]*',),
process_value=_process_value
), callback='parse_product'),
)
URL-, 'href' 'a', , .
@paul trmbrth