diff --git a/requests/models.py b/requests/models.py index 072336b2..c70b56f5 100644 --- a/requests/models.py +++ b/requests/models.py @@ -9,6 +9,7 @@ This module contains the primary objects that power Requests. import collections import datetime +import codecs from io import BytesIO, UnsupportedOperation from .hooks import default_hooks @@ -580,7 +581,8 @@ class Response(object): #: Final URL location of Response. self.url = None - #: Encoding to decode with when accessing r.text. + #: Encoding to decode with when accessing r.text or + #: r.iter_content(decode_unicode=True) self.encoding = None #: A list of :class:`Response ` objects from @@ -670,8 +672,8 @@ class Response(object): chunks are received. If stream=False, data is returned as a single chunk. - If decode_unicode is True, content will be decoded using the best - available encoding based on the response. + If using decode_unicode, the encoding must be set to a valid encoding + enumeration before invoking iter_content. """ def generate(): @@ -708,6 +710,16 @@ class Response(object): chunks = reused_chunks if self._content_consumed else stream_chunks if decode_unicode: + if self.encoding is None: + raise TypeError( + 'encoding must be set before consuming streaming ' + 'responses' + ) + + # check encoding value here, don't wait for the generator to be + # consumed before raising an exception + codecs.lookup(self.encoding) + chunks = stream_decode_response_unicode(chunks, self) return chunks diff --git a/requests/utils.py b/requests/utils.py index 6a3ed90e..7d7a516b 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -368,11 +368,6 @@ def get_encoding_from_headers(headers): def stream_decode_response_unicode(iterator, r): """Stream decodes a iterator.""" - if r.encoding is None: - for item in iterator: - yield item - return - decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') for chunk in iterator: rv = decoder.decode(chunk) diff --git a/tests/test_requests.py b/tests/test_requests.py index d8d7ea54..cc621a20 100755 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -1039,9 +1039,24 @@ class TestRequests: r = requests.Response() r.raw = io.BytesIO(b'the content') r.encoding = 'ascii' + chunks = r.iter_content(decode_unicode=True) assert all(isinstance(chunk, str) for chunk in chunks) + @pytest.mark.parametrize( + 'encoding, exception', ( + (None, TypeError), + ('invalid encoding', LookupError), + )) + def test_decode_unicode_encoding(self, encoding, exception): + # raise an exception if encoding isn't set + r = requests.Response() + r.raw = io.BytesIO(b'the content') + r.encoding = encoding + + with pytest.raises(exception): + chunks = r.iter_content(decode_unicode=True) + def test_response_reason_unicode(self): # check for unicode HTTP status r = requests.Response()