diff --git a/requests/models.py b/requests/models.py index b4cdead4..fe554ffc 100644 --- a/requests/models.py +++ b/requests/models.py @@ -780,7 +780,6 @@ class Response(object): for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode): - # Skip any null responses if not chunk: continue @@ -796,15 +795,23 @@ class Response(object): else: lines = chunk.splitlines() - # The split(delimiter) will always end with whatever remains past - # the delimiter ('' if nothing more). However splitlines() will - # not end with a '' if the final text is a line delimiter. - - # Therefore, if we're in delimiter mode, always pop the final - # item to prepend to the next chunk. However, only do this for - # non-delimiter mode if the chunk does not match the end of the - # last line. - if delimiter or (lines[-1] and lines[-1][-1] == chunk[-1]): + # Calling `.split(delimiter)` will always end with whatever text + # remains beyond the delimiter, or '' if the delimiter is the end + # of the text. On the other hand, `.splitlines()` doesn't include + # a '' if the text ends in a line delimiter. + # + # For example: + # + # 'abc\ndef\n'.split('\n') ~> ['abc', 'def', ''] + # 'abc\ndef\n'.splitlines() ~> ['abc', 'def'] + # + # So if we have a specified delimiter, we always pop the final + # item and prepend it to the next chunk. + # + # If we're using `splitlines()`, we only do this if the chunk + # ended midway through a line. + incomplete_line = (lines[-1] and lines[-1].endswith(chunk[-1])) + if delimiter or incomplete_line: pending = lines.pop() for line in lines: diff --git a/tests/test_requests.py b/tests/test_requests.py index d56e365a..76a2d45a 100755 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -1296,10 +1296,11 @@ class TestRequests: assert r.request.url == pr.request.url assert r.request.headers == pr.request.headers + def test_response_lines(self): """ - iter_lines should be able to handle data dribbling in which might - not be lined up ideally. + iter_lines should be able to handle data dribbling in which delimiters + might not be lined up ideally. """ mock_chunks = [ 'This \r\n', @@ -1315,6 +1316,7 @@ class TestRequests: '\n', 'end.', ] + mock_data = ''.join(mock_chunks) def mock_iter_content(*args, **kwargs): '''Fake difficult data.''' @@ -1325,16 +1327,17 @@ class TestRequests: r._content_consumed = True r.iter_content = mock_iter_content - assert list(r.iter_lines(delimiter='\r\n')) == \ - ''.join(mock_chunks).split('\r\n') + assert list(r.iter_lines(delimiter='\r\n')) == mock_data.split('\r\n') - # This test can't pass because '\n' by itself is a single line-end, but - # '\r\n' is also a single line-end - assert not (list(r.iter_lines()) == ''.join(mock_chunks).splitlines()) + # Because '\n' is a single line-end, when `iter_lines()` receives + # the chunks containing a single '\n', it emits '' as a line -- whereas + # `.splitlines()` combines with the '\r' and splits on `\r\n`. + assert list(r.iter_lines()) != mock_data.splitlines() - # However, this should pass if everything is '\r' + # If we change all the line breaks to `\r`, we should be okay. mock_chunks = [chunk.replace('\n', '\r') for chunk in mock_chunks] - assert list(r.iter_lines()) == ''.join(mock_chunks).splitlines() + mock_data = ''.join(mock_chunks) + assert list(r.iter_lines()) == mock_data.splitlines() def test_prepared_request_is_pickleable(self, httpbin): p = requests.Request('GET', httpbin('get')).prepare()