From e752455b6f1b5913305a9014f3c0b535ba054069 Mon Sep 17 00:00:00 2001 From: Ovidiu Negrut Date: Mon, 11 Mar 2013 10:28:37 +0200 Subject: [PATCH 1/7] Digest auth: case insensitive replacement of 'digest ' string with '' from WWW-Authenticate --- requests/auth.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requests/auth.py b/requests/auth.py index 805f2400..16546f29 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -8,6 +8,7 @@ This module contains the authentication handlers for Requests. """ import os +import re import time import hashlib import logging @@ -151,7 +152,7 @@ class HTTPDigestAuth(AuthBase): if 'digest' in s_auth.lower() and num_401_calls < 2: setattr(self, 'num_401_calls', num_401_calls + 1) - self.chal = parse_dict_header(s_auth.replace('Digest ', '')) + self.chal = parse_dict_header(re.sub(r'digest ', '', s_auth, flags=re.IGNORECASE)) # Consume content and release the original connection # to allow our new request to reuse the same one. From 9d16c7276791fb7e5e75f2fbcddec95fa8f15b2b Mon Sep 17 00:00:00 2001 From: Ovidiu Negrut Date: Mon, 25 Mar 2013 12:28:25 +0200 Subject: [PATCH 2/7] compiled regex expression in digest auth, this also works in python 2.6.x --- requests/auth.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requests/auth.py b/requests/auth.py index 16546f29..a6405d32 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -152,7 +152,8 @@ class HTTPDigestAuth(AuthBase): if 'digest' in s_auth.lower() and num_401_calls < 2: setattr(self, 'num_401_calls', num_401_calls + 1) - self.chal = parse_dict_header(re.sub(r'digest ', '', s_auth, flags=re.IGNORECASE)) + pat = re.compile(r'digest ', flags=re.IGNORECASE) + self.chal = parse_dict_header(pat.sub('', s_auth)) # Consume content and release the original connection # to allow our new request to reuse the same one. From 6d6252aa9fc4bb38d7f68073b41092c31ddb146b Mon Sep 17 00:00:00 2001 From: schlamar Date: Wed, 10 Apr 2013 08:00:36 +0200 Subject: [PATCH 3/7] Update urllib3 to 71f84f9. --- requests/packages/urllib3/filepost.py | 2 +- requests/packages/urllib3/response.py | 79 ++++++++++++++++++--------- 2 files changed, 54 insertions(+), 27 deletions(-) diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py index 8d900bd8..470309a0 100644 --- a/requests/packages/urllib3/filepost.py +++ b/requests/packages/urllib3/filepost.py @@ -93,6 +93,6 @@ def encode_multipart_formdata(fields, boundary=None): body.write(b('--%s--\r\n' % (boundary))) - content_type = b('multipart/form-data; boundary=%s' % boundary) + content_type = str('multipart/form-data; boundary=%s' % boundary) return body.getvalue(), content_type diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 0761dc03..2fa40788 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -4,29 +4,48 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import gzip + import logging import zlib -from io import BytesIO - from .exceptions import DecodeError -from .packages.six import string_types as basestring +from .packages.six import string_types as basestring, binary_type log = logging.getLogger(__name__) -def decode_gzip(data): - gzipper = gzip.GzipFile(fileobj=BytesIO(data)) - return gzipper.read() +class DeflateDecoder(object): + + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + return self._obj.decompress(data) + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None -def decode_deflate(data): - try: - return zlib.decompress(data) - except zlib.error: - return zlib.decompress(data, -zlib.MAX_WBITS) +def _get_decoder(mode): + if mode == 'gzip': + return zlib.decompressobj(16 + zlib.MAX_WBITS) + + return DeflateDecoder() class HTTPResponse(object): @@ -52,10 +71,7 @@ class HTTPResponse(object): otherwise unused. """ - CONTENT_DECODERS = { - 'gzip': decode_gzip, - 'deflate': decode_deflate, - } + CONTENT_DECODERS = ['gzip', 'deflate'] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -65,8 +81,9 @@ class HTTPResponse(object): self.version = version self.reason = reason self.strict = strict + self.decode_content = decode_content - self._decode_content = decode_content + self._decoder = None self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response @@ -115,13 +132,13 @@ class HTTPResponse(object): parameters: ``decode_content`` and ``cache_content``. :param amt: - How much of the content to read. If specified, decoding and caching - is skipped because we can't decode partial content nor does it make - sense to cache partial content as the full response. + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. :param decode_content: If True, will attempt to decode the body based on the - 'content-encoding' header. (Overridden if ``amt`` is set.) + 'content-encoding' header. :param cache_content: If True, will save the returned data such that the same result is @@ -133,18 +150,24 @@ class HTTPResponse(object): # Note: content-encoding value should be case-insensitive, per RFC 2616 # Section 3.5 content_encoding = self.headers.get('content-encoding', '').lower() - decoder = self.CONTENT_DECODERS.get(content_encoding) + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) if decode_content is None: - decode_content = self._decode_content + decode_content = self.decode_content if self._fp is None: return + flush_decoder = False + try: if amt is None: # cStringIO doesn't like amt=None data = self._fp.read() + flush_decoder = True else: + cache_content = False data = self._fp.read(amt) if amt != 0 and not data: # Platform-specific: Buggy versions of Python. # Close the connection when no data is returned @@ -155,15 +178,19 @@ class HTTPResponse(object): # properly close the connection in all cases. There is no harm # in redundantly calling close. self._fp.close() - return data + flush_decoder = True try: - if decode_content and decoder: - data = decoder(data) + if decode_content and self._decoder: + data = self._decoder.decompress(data) except (IOError, zlib.error): raise DecodeError("Received response with content-encoding: %s, but " "failed to decode it." % content_encoding) + if flush_decoder and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + if cache_content: self._body = data From 59f916ca4a6a8e55528988cfc1bee445e71c4602 Mon Sep 17 00:00:00 2001 From: schlamar Date: Wed, 10 Apr 2013 08:08:33 +0200 Subject: [PATCH 4/7] Use streaming decompression feature of urllib3. --- requests/models.py | 6 +++--- requests/utils.py | 42 ------------------------------------------ 2 files changed, 3 insertions(+), 45 deletions(-) diff --git a/requests/models.py b/requests/models.py index 10528775..4cf69b57 100644 --- a/requests/models.py +++ b/requests/models.py @@ -20,7 +20,7 @@ from .cookies import cookiejar_from_dict, get_cookie_header from .packages.urllib3.filepost import encode_multipart_formdata from .exceptions import HTTPError, RequestException, MissingSchema, InvalidURL from .utils import ( - stream_untransfer, guess_filename, get_auth_from_url, requote_uri, + guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, iter_slices, guess_json_utf, super_len) from .compat import ( @@ -528,13 +528,13 @@ class Response(object): def generate(): while 1: - chunk = self.raw.read(chunk_size) + chunk = self.raw.read(chunk_size, decode_content=True) if not chunk: break yield chunk self._content_consumed = True - gen = stream_untransfer(generate(), self) + gen = generate() if decode_unicode: gen = stream_decode_response_unicode(gen, self) diff --git a/requests/utils.py b/requests/utils.py index a2d434ef..68f3e625 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -346,48 +346,6 @@ def get_unicode_from_response(r): return r.content -def stream_decompress(iterator, mode='gzip'): - """Stream decodes an iterator over compressed data - - :param iterator: An iterator over compressed data - :param mode: 'gzip' or 'deflate' - :return: An iterator over decompressed data - """ - - if mode not in ['gzip', 'deflate']: - raise ValueError('stream_decompress mode must be gzip or deflate') - - zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS - dec = zlib.decompressobj(zlib_mode) - try: - for chunk in iterator: - rv = dec.decompress(chunk) - if rv: - yield rv - except zlib.error: - # If there was an error decompressing, just return the raw chunk - yield chunk - # Continue to return the rest of the raw data - for chunk in iterator: - yield chunk - else: - # Make sure everything has been returned from the decompression object - buf = dec.decompress(bytes()) - rv = buf + dec.flush() - if rv: - yield rv - - -def stream_untransfer(gen, resp): - ce = resp.headers.get('content-encoding', '').lower() - if 'gzip' in ce: - gen = stream_decompress(gen, mode='gzip') - elif 'deflate' in ce: - gen = stream_decompress(gen, mode='deflate') - - return gen - - # The unreserved URI characters (RFC 3986) UNRESERVED_SET = frozenset( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" From 4c3432b759ccd58ebdaf443167cca07ff17c63ea Mon Sep 17 00:00:00 2001 From: schlamar Date: Wed, 10 Apr 2013 08:20:34 +0200 Subject: [PATCH 5/7] Fix test with StringIO. --- test_requests.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test_requests.py b/test_requests.py index 5a0ed980..5440347a 100644 --- a/test_requests.py +++ b/test_requests.py @@ -380,6 +380,11 @@ class RequestsTestCase(unittest.TestCase): def test_response_is_iterable(self): r = requests.Response() io = StringIO.StringIO('abc') + read_ = io.read + + def read_mock(amt, decode_content=None): + return read_(amt) + setattr(io, 'read', read_mock) r.raw = io self.assertTrue(next(iter(r))) io.close() From 17ecb6891c515511ce33250a3f39f752a9c59b55 Mon Sep 17 00:00:00 2001 From: Sorin Sbarnea Date: Wed, 10 Apr 2013 16:15:28 +0100 Subject: [PATCH 6/7] * Documented the logging, requested in #1297 * Added build directory and *.egg to .gitignore * Added sphinx as setup requirement in order to be able to build documentation with `pyhton setup.py build_sphinx` modified: .gitignore modified: docs/api.rst modified: setup.py --- .gitignore | 2 ++ docs/api.rst | 20 +++++++++++++++++--- setup.py | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index a88418db..e1ea2a60 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,12 @@ pylint.txt toy.py violations.pyflakes.txt cover/ +build/ docs/_build requests.egg-info/ *.pyc *.swp +*.egg env/ .workon diff --git a/docs/api.rst b/docs/api.rst index 771d7a82..dce19d3e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -165,9 +165,23 @@ API Changes :: - # Verbosity should now be configured with logging - my_config = {'verbose': sys.stderr} - requests.get('http://httpbin.org/headers', config=my_config) # bad! + import requests + import logging + + # these two lines enable debugging at httplib level (requests->urllib3->httplib) + # you will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA. + # the only thing missing will be the response.body which is not logged. + import httplib + httplib.HTTPConnection.debuglevel = 1 + + logging.basicConfig() # you need to initialize logging, otherwise you will not see anything from requests + logging.getLogger().setLevel(logging.DEBUG) + requests_log = logging.getLogger("requests") + requests_log.setLevel(logging.DEBUG) + requests_log.propagate = True + + requests.get('http://httpbin.org/headers') + Licensing diff --git a/setup.py b/setup.py index 52dfe025..64e305bc 100755 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ setup( package_dir={'requests': 'requests'}, include_package_data=True, install_requires=requires, + setup_requires=['sphinx'], license=open('LICENSE').read(), zip_safe=False, classifiers=( From fb49481ddfae75840271bbeff5987a791170d76d Mon Sep 17 00:00:00 2001 From: Sorin Sbarnea Date: Wed, 10 Apr 2013 16:46:05 +0100 Subject: [PATCH 7/7] Updated documentation indicating that logging is done via requests.packages.urllib3 instead of requests. modified: docs/api.rst --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index dce19d3e..89733a29 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -176,7 +176,7 @@ API Changes logging.basicConfig() # you need to initialize logging, otherwise you will not see anything from requests logging.getLogger().setLevel(logging.DEBUG) - requests_log = logging.getLogger("requests") + requests_log = logging.getLogger("requests.packages.urllib3") requests_log.setLevel(logging.DEBUG) requests_log.propagate = True