- ItemLoader, loader.selector.
def parse_video_page(self, response):
loader = response.meta['loader']
sel = Selector(response)
loader.selector = sel
loader.add_xpath(
'original_description',
'//*[@id="videoInfo"]//td[@class="desc"]/h2/text()'
)
- , , - - . , - , - -, .
( ):
def parse(self, response):
sel = Selector(response)
videos = sel.xpath('//div[@class="video"]')
for video in videos:
try:
url = video.xpath('.//a[@class="hRotator"]/@href').extract()[0]
except IndexError:
continue
loader = ItemLoader(VideoItem(), videos)
loader.add_xpath('original_title', './/u/text()')
loader.add_xpath(
'original_id',
'.//a[@class="hRotator"]/@href',
re=r'movies/(\d+)/.+\.html'
)
item = loader.load_item()
yield Request(
urlparse.urljoin(response.url, url),
callback=self.parse_video_page,
meta={'item': item}
)
pages = sel.xpath('//div[@class="pager"]//a/@href').extract()
for page in pages:
url = urlparse.urljoin('http://www.mysite.com/', page)
yield Request(url, callback=self.parse)
def parse_video_page(self, response):
item = response.meta['item']
loader = ItemLoader(item, response=response)
loader.add_xpath(
'original_description',
'//*[@id="videoInfo"]//td[@class="desc"]/h2/text()'
)
loader.add_xpath(
'duration',
'//*[@id="video-info"]/div[2]/text()'
)
loader.add_xpath('tags', '//*[@id="tags"]//a/text()')
return loader.load_item()