Refactor stream_decode_gzip and _deflate into one function, stream_decompress.

stream_decompress will now iterate over the raw data if there is a problem with decompression
Remove gzip decoding from Response.content, as urllib3 was doing it anyway.
This commit is contained in:
Chase Sterling
2011-12-30 22:18:08 -05:00
parent f80984f377
commit 9c6ffc5303
2 changed files with 26 additions and 31 deletions
+5 -12
View File
@@ -29,8 +29,8 @@ from .exceptions import (
ConnectionError, HTTPError, RequestException, Timeout, TooManyRedirects,
URLRequired, SSLError)
from .utils import (
get_encoding_from_headers, stream_decode_response_unicode, decode_gzip,
stream_decode_gzip, stream_decode_deflate, guess_filename, requote_path)
get_encoding_from_headers, stream_decode_response_unicode,
stream_decompress, guess_filename, requote_path)
REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved)
@@ -488,7 +488,7 @@ class Request(object):
redirect=False,
assert_same_host=False,
preload_content=False,
decode_content=False,
decode_content=True,
retries=self.config.get('max_retries', 0),
timeout=self.timeout,
)
@@ -614,9 +614,9 @@ class Response(object):
gen = generate()
if 'gzip' in self.headers.get('content-encoding', ''):
gen = stream_decode_gzip(gen)
gen = stream_decompress(gen, mode='gzip')
elif 'deflate' in self.headers.get('content-encoding', ''):
gen = stream_decode_deflate(gen)
gen = stream_decompress(gen, mode='deflate')
if decode_unicode is None:
decode_unicode = self.config.get('decode_unicode')
@@ -675,13 +675,6 @@ class Response(object):
content = self._content
# Decode GZip'd content.
if 'gzip' in self.headers.get('content-encoding', ''):
try:
content = decode_gzip(self._content)
except zlib.error:
pass
# Decode unicode content.
if self.config.get('decode_unicode'):
+21 -19
View File
@@ -354,36 +354,38 @@ def decode_gzip(content):
return zlib.decompress(content, 16 + zlib.MAX_WBITS)
def stream_decode_gzip(iterator):
"""Stream decodes a gzip-encoded iterator"""
def stream_decompress(iterator, mode='gzip'):
"""
Stream decodes an iterator over compressed data
:param iterator: An iterator over compressed data
:param mode: 'gzip' or 'deflate'
:return: An iterator over decompressed data
"""
if mode not in ['gzip', 'deflate']:
raise ValueError('stream_decompress mode must be gzip or deflate')
zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS
dec = zlib.decompressobj(zlib_mode)
try:
dec = zlib.decompressobj(16 + zlib.MAX_WBITS)
for chunk in iterator:
rv = dec.decompress(chunk)
if rv:
yield rv
buf = dec.decompress('')
rv = buf + dec.flush()
if rv:
yield rv
except zlib.error:
pass
def stream_decode_deflate(iterator):
"""Stream decodes a deflate-encoded iterator"""
try:
dec = zlib.decompressobj(-zlib.MAX_WBITS)
# If there was an error decompressing, just return the raw chunk
yield chunk
# Continue to return the rest of the raw data
for chunk in iterator:
rv = dec.decompress(chunk)
if rv:
yield rv
yield chunk
else:
# Make sure everything has been returned from the decompression object
buf = dec.decompress('')
rv = buf + dec.flush()
if rv:
yield rv
except zlib.error:
pass
def requote_path(path):
"""Re-quote the given URL path component.