How to deal with urllib2 deflated answer?

I am currently using the following code to unpack urllib2 gzip response:

opener = urllib2.build_opener()
response = opener.open(req)
data = response.read()
if response.headers.get('content-encoding', '') == 'gzip':
    data = StringIO.StringIO(data)
    gzipper = gzip.GzipFile(fileobj=data)
    html = gzipper.read()

Does it also handle a deflated response, or do I need to write separate code to handle a deflated response?

+3
source share
4 answers

There is a better way described below:

The author explains how to decompress a fragment with a piece, and not all at once in memory. This is the preferred method when larger files are involved.

Also found this useful site for testing:

+4
source

if response.headers.get('content-encoding', '') == 'deflate':
    html = zlib.decompress(response.read())

if fail, , ,

if response.headers.get('content-encoding', '') == 'deflate':
    html = zlib.decompressobj(-zlib.MAX_WBITS).decompress(response.read())
+4

urllib3

class DeflateDecoder(object):

    def __init__(self):
        self._first_try = True
        self._data = binary_type()
        self._obj = zlib.decompressobj()

    def __getattr__(self, name):
        return getattr(self._obj, name)

    def decompress(self, data):
        if not data:
            return data

        if not self._first_try:
            return self._obj.decompress(data)

        self._data += data
        try:
            return self._obj.decompress(data)
        except zlib.error:
            self._first_try = False
            self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
            try:
                return self.decompress(self._data)
            finally:
                self._data = None


class GzipDecoder(object):

    def __init__(self):
        self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

    def __getattr__(self, name):
        return getattr(self._obj, name)

    def decompress(self, data):
        if not data:
            return data
        return self._obj.decompress(data)
+1

, HTTP (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.3) :

Accept-Encoding, , . , "" -, "", , .

, , . , .

0

Source: https://habr.com/ru/post/1725011/


All Articles