Unable to add custom query headers in PyQT4

I am writing a python program that uses PyQT4 to load some URL and process its contents / DOM (after changing it using javascript). I also need special headers to request this page.

The code below works, except that it cannot get the URL using my custom headers, which I define using QNetworkRequest.

import sys import signal from optparse import OptionParser from PyQt4.QtCore import * from PyQt4.QtGui import * from PyQt4.QtWebKit import QWebPage from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkReply class MyNetworkAccessManager(QNetworkAccessManager): def __init__(self, url): QNetworkAccessManager.__init__(self) self.request = QNetworkRequest(QUrl(url)) self.request.setRawHeader('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US)') self.request.setRawHeader("Accept-Language","en-us,en;q=0.5"); self.request.setRawHeader("Accept-Charset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"); self.request.setRawHeader("Connection","keep-alive"); self.reply = self.get(self.request) def createRequest(self, operation, request, data): print "mymanager handles ", request.url() return QNetworkAccessManager.createRequest( self, operation, request, data ) class Crawler( QWebPage ): def __init__(self, url, file): QWebPage.__init__( self ) self._url = url self._file = file manager = MyNetworkAccessManager(url) self.setNetworkAccessManager(manager) def userAgentForUrl(self, url): return "Mozilla/122.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1" def crawl( self ): signal.signal( signal.SIGINT, signal.SIG_DFL ) self.connect( self, SIGNAL( 'loadFinished(bool)' ), self._finished_loading ) self.mainFrame().load( QUrl( self._url ) ) def _finished_loading( self, result ): file = open( self._file, 'w' ) file.write( self.mainFrame().toHtml() ) file.close() sys.exit( 0 ) def main(): app = QApplication( sys.argv ) options = get_cmd_options() crawler = Crawler( options.url, options.file ) crawler.crawl() sys.exit( app.exec_() ) def get_cmd_options(): """ gets and validates the input from the command line """ usage = "usage: %prog [options] args" parser = OptionParser(usage) parser.add_option('-u', '--url', dest = 'url', help = 'URL to fetch data from') parser.add_option('-f', '--file', dest = 'file', help = 'Local file path to save data to') (options,args) = parser.parse_args() if not options.url: print 'You must specify an URL.',sys.argv[0],'--help for more details' exit(1) if not options.file: print 'You must specify a destination file.',sys.argv[0],'--help for more details' exit(1) return options if __name__ == '__main__': main() 

Can someone tell me why he is not choosing header settings?

+6
source share
1 answer

Move the setRawHeader function inside createRequest and it will work. You can send a request here for testing.

 def __init__(self, url): QNetworkAccessManager.__init__(self) request = QNetworkRequest(QUrl(url)) self.reply = self.get(request) def createRequest(self, operation, request, data): print("mymanager handles ", request.url()) request.setRawHeader('User-Agent', 'Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101') request.setRawHeader("Accept-Language","en-us,en;q=0.5"); request.setRawHeader("Accept-Charset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"); request.setRawHeader("Connection","keep-alive"); return QNetworkAccessManager.createRequest( self, operation, request, data ) 

Note I also changed User-agent to User-agent

+4
source

Source: https://habr.com/ru/post/955190/


All Articles