You can avoid the need to use Splash
in the first place and make a corresponding GET request to get the phone number yourself. Work Spider:
import json import re import scrapy class OlxSpider(scrapy.Spider): name = "olx" rotate_user_agent = True allowed_domains = ["olx.pt"] start_urls = [ "https://olx.pt/imoveis/" ] def parse(self, response): for href in response.css('.link.linkWithHash.detailsLink::attr(href)'): url = response.urljoin(href.extract()) yield scrapy.Request(url, callback=self.parse_house_contents) for next_page in response.css('.pager .br3.brc8::attr(href)'): url = response.urljoin(next_page.extract()) yield scrapy.Request(url, self.parse) def parse_house_contents(self, response): property_id = re.search(r"ID(\w+)\.", response.url).group(1) phone_url = "https://olx.pt/ajax/misc/contact/phone/%s/" % property_id yield scrapy.Request(phone_url, callback=self.parse_phone) def parse_phone(self, response): phone_number = json.loads(response.body)["value"] print(phone_number)
If there is anything else on this βdynamicβ website, see if Splash is really enough, and if not, check out browser automation and selenium
.
source share