From 713f56ea5325396e62ff41378f3b21037e1ecf67 Mon Sep 17 00:00:00 2001 From: Michael Hunsinger Date: Sun, 11 Sep 2016 15:50:37 -0600 Subject: [PATCH 1/2] Decode response requires encoding to be set --- requests/models.py | 18 +++++++++++++++--- requests/utils.py | 5 ----- tests/test_requests.py | 16 ++++++++++++++++ 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/requests/models.py b/requests/models.py index 072336b2..c70b56f5 100644 --- a/requests/models.py +++ b/requests/models.py @@ -9,6 +9,7 @@ This module contains the primary objects that power Requests. import collections import datetime +import codecs from io import BytesIO, UnsupportedOperation from .hooks import default_hooks @@ -580,7 +581,8 @@ class Response(object): #: Final URL location of Response. self.url = None - #: Encoding to decode with when accessing r.text. + #: Encoding to decode with when accessing r.text or + #: r.iter_content(decode_unicode=True) self.encoding = None #: A list of :class:`Response ` objects from @@ -670,8 +672,8 @@ class Response(object): chunks are received. If stream=False, data is returned as a single chunk. - If decode_unicode is True, content will be decoded using the best - available encoding based on the response. + If using decode_unicode, the encoding must be set to a valid encoding + enumeration before invoking iter_content. """ def generate(): @@ -708,6 +710,16 @@ class Response(object): chunks = reused_chunks if self._content_consumed else stream_chunks if decode_unicode: + if self.encoding is None: + raise TypeError( + 'encoding must be set before consuming streaming ' + 'responses' + ) + + # check encoding value here, don't wait for the generator to be + # consumed before raising an exception + codecs.lookup(self.encoding) + chunks = stream_decode_response_unicode(chunks, self) return chunks diff --git a/requests/utils.py b/requests/utils.py index 6a3ed90e..7d7a516b 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -368,11 +368,6 @@ def get_encoding_from_headers(headers): def stream_decode_response_unicode(iterator, r): """Stream decodes a iterator.""" - if r.encoding is None: - for item in iterator: - yield item - return - decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') for chunk in iterator: rv = decoder.decode(chunk) diff --git a/tests/test_requests.py b/tests/test_requests.py index d8d7ea54..a458bf9d 100755 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -1042,6 +1042,22 @@ class TestRequests: chunks = r.iter_content(decode_unicode=True) assert all(isinstance(chunk, str) for chunk in chunks) + # raise an exception if encoding isn't set + r = requests.Response() + r.raw = io.BytesIO(b'the content') + r.encoding = None + + with pytest.raises(TypeError): + chunks = r.iter_content(decode_unicode=True) + + # raises an exception if the encoding is garbage + r = requests.Response() + r.raw = io.BytesIO(b'the content') + r.encoding = 'invalid encoding' + + with pytest.raises(LookupError): + chunks = r.iter_content(decode_unicode=True) + def test_response_reason_unicode(self): # check for unicode HTTP status r = requests.Response() From 55e511dd10bc696fce683b4d507fb66d6ef09704 Mon Sep 17 00:00:00 2001 From: Michael Hunsinger Date: Thu, 15 Sep 2016 21:25:23 -0600 Subject: [PATCH 2/2] Parametrized decode response tests --- tests/test_requests.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/test_requests.py b/tests/test_requests.py index a458bf9d..cc621a20 100755 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -1039,23 +1039,22 @@ class TestRequests: r = requests.Response() r.raw = io.BytesIO(b'the content') r.encoding = 'ascii' + chunks = r.iter_content(decode_unicode=True) assert all(isinstance(chunk, str) for chunk in chunks) + @pytest.mark.parametrize( + 'encoding, exception', ( + (None, TypeError), + ('invalid encoding', LookupError), + )) + def test_decode_unicode_encoding(self, encoding, exception): # raise an exception if encoding isn't set r = requests.Response() r.raw = io.BytesIO(b'the content') - r.encoding = None + r.encoding = encoding - with pytest.raises(TypeError): - chunks = r.iter_content(decode_unicode=True) - - # raises an exception if the encoding is garbage - r = requests.Response() - r.raw = io.BytesIO(b'the content') - r.encoding = 'invalid encoding' - - with pytest.raises(LookupError): + with pytest.raises(exception): chunks = r.iter_content(decode_unicode=True) def test_response_reason_unicode(self):