From 9c6ffc530319d3b6d75bb160267d76f0224b0743 Mon Sep 17 00:00:00 2001 From: Chase Sterling Date: Fri, 30 Dec 2011 22:18:08 -0500 Subject: [PATCH] Refactor stream_decode_gzip and _deflate into one function, stream_decompress. stream_decompress will now iterate over the raw data if there is a problem with decompression Remove gzip decoding from Response.content, as urllib3 was doing it anyway. --- requests/models.py | 17 +++++------------ requests/utils.py | 40 +++++++++++++++++++++------------------- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/requests/models.py b/requests/models.py index 49664963..17403a69 100644 --- a/requests/models.py +++ b/requests/models.py @@ -29,8 +29,8 @@ from .exceptions import ( ConnectionError, HTTPError, RequestException, Timeout, TooManyRedirects, URLRequired, SSLError) from .utils import ( - get_encoding_from_headers, stream_decode_response_unicode, decode_gzip, - stream_decode_gzip, stream_decode_deflate, guess_filename, requote_path) + get_encoding_from_headers, stream_decode_response_unicode, + stream_decompress, guess_filename, requote_path) REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved) @@ -488,7 +488,7 @@ class Request(object): redirect=False, assert_same_host=False, preload_content=False, - decode_content=False, + decode_content=True, retries=self.config.get('max_retries', 0), timeout=self.timeout, ) @@ -614,9 +614,9 @@ class Response(object): gen = generate() if 'gzip' in self.headers.get('content-encoding', ''): - gen = stream_decode_gzip(gen) + gen = stream_decompress(gen, mode='gzip') elif 'deflate' in self.headers.get('content-encoding', ''): - gen = stream_decode_deflate(gen) + gen = stream_decompress(gen, mode='deflate') if decode_unicode is None: decode_unicode = self.config.get('decode_unicode') @@ -675,13 +675,6 @@ class Response(object): content = self._content - # Decode GZip'd content. - if 'gzip' in self.headers.get('content-encoding', ''): - try: - content = decode_gzip(self._content) - except zlib.error: - pass - # Decode unicode content. if self.config.get('decode_unicode'): diff --git a/requests/utils.py b/requests/utils.py index 28b198ad..c7ab0a4e 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -354,36 +354,38 @@ def decode_gzip(content): return zlib.decompress(content, 16 + zlib.MAX_WBITS) -def stream_decode_gzip(iterator): - """Stream decodes a gzip-encoded iterator""" +def stream_decompress(iterator, mode='gzip'): + """ + Stream decodes an iterator over compressed data + + :param iterator: An iterator over compressed data + :param mode: 'gzip' or 'deflate' + :return: An iterator over decompressed data + """ + + if mode not in ['gzip', 'deflate']: + raise ValueError('stream_decompress mode must be gzip or deflate') + + zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS + dec = zlib.decompressobj(zlib_mode) try: - dec = zlib.decompressobj(16 + zlib.MAX_WBITS) for chunk in iterator: rv = dec.decompress(chunk) if rv: yield rv - buf = dec.decompress('') - rv = buf + dec.flush() - if rv: - yield rv except zlib.error: - pass - - -def stream_decode_deflate(iterator): - """Stream decodes a deflate-encoded iterator""" - try: - dec = zlib.decompressobj(-zlib.MAX_WBITS) + # If there was an error decompressing, just return the raw chunk + yield chunk + # Continue to return the rest of the raw data for chunk in iterator: - rv = dec.decompress(chunk) - if rv: - yield rv + yield chunk + else: + # Make sure everything has been returned from the decompression object buf = dec.decompress('') rv = buf + dec.flush() if rv: yield rv - except zlib.error: - pass + def requote_path(path): """Re-quote the given URL path component.