mirror of
https://github.com/kennethreitz/requests.git
synced 2026-06-05 22:50:18 +00:00
Merge pull request #3923 from vbarbaresi/3.0.0-iter_lines
Rebase #3745 and add some tests
This commit is contained in:
+28
-5
@@ -776,23 +776,46 @@ class Response(object):
|
||||
|
||||
.. note:: This method is not reentrant safe.
|
||||
"""
|
||||
|
||||
pending = None
|
||||
|
||||
for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode):
|
||||
for chunk in self.iter_content(chunk_size=chunk_size,
|
||||
decode_unicode=decode_unicode):
|
||||
# Skip any null responses: if there is pending data it is necessarily an
|
||||
# incomplete chunk, so if we don't have more data we don't want to bother
|
||||
# trying to get it. Unconsumed pending data will be yielded anyway in the
|
||||
# end of the loop if the stream ends.
|
||||
if not chunk:
|
||||
continue
|
||||
|
||||
# Consume any pending data
|
||||
if pending is not None:
|
||||
chunk = pending + chunk
|
||||
pending = None
|
||||
|
||||
# Either split on a line, or split on a specified delimiter
|
||||
if delimiter:
|
||||
lines = chunk.split(delimiter)
|
||||
else:
|
||||
lines = chunk.splitlines()
|
||||
|
||||
if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
|
||||
# Calling `.split(delimiter)` will always end with whatever text
|
||||
# remains beyond the delimiter, or '' if the delimiter is the end
|
||||
# of the text. On the other hand, `.splitlines()` doesn't include
|
||||
# a '' if the text ends in a line delimiter.
|
||||
#
|
||||
# For example:
|
||||
#
|
||||
# 'abc\ndef\n'.split('\n') ~> ['abc', 'def', '']
|
||||
# 'abc\ndef\n'.splitlines() ~> ['abc', 'def']
|
||||
#
|
||||
# So if we have a specified delimiter, we always pop the final
|
||||
# item and prepend it to the next chunk.
|
||||
#
|
||||
# If we're using `splitlines()`, we only do this if the chunk
|
||||
# ended midway through a line.
|
||||
incomplete_line = lines[-1] and lines[-1][-1] == chunk[-1]
|
||||
if delimiter or incomplete_line:
|
||||
pending = lines.pop()
|
||||
else:
|
||||
pending = None
|
||||
|
||||
for line in lines:
|
||||
yield line
|
||||
|
||||
+79
-3
@@ -1296,6 +1296,81 @@ class TestRequests:
|
||||
assert r.request.url == pr.request.url
|
||||
assert r.request.headers == pr.request.headers
|
||||
|
||||
|
||||
def test_response_lines(self):
|
||||
"""
|
||||
iter_lines should be able to handle data dribbling in which delimiters
|
||||
might not be lined up ideally.
|
||||
"""
|
||||
mock_chunks = [
|
||||
'This \r\n',
|
||||
'',
|
||||
'is\r',
|
||||
'\n',
|
||||
'a',
|
||||
' ',
|
||||
'',
|
||||
'',
|
||||
'test.',
|
||||
'\r',
|
||||
'\n',
|
||||
'end.',
|
||||
]
|
||||
mock_data = ''.join(mock_chunks)
|
||||
|
||||
mock_iter_content = lambda *args, **kwargs: (e for e in mock_chunks)
|
||||
|
||||
r = requests.Response()
|
||||
r._content_consumed = True
|
||||
r.iter_content = mock_iter_content
|
||||
|
||||
assert list(r.iter_lines(delimiter='\r\n')) == mock_data.split('\r\n')
|
||||
|
||||
# Because '\n' is a single line-end, when `iter_lines()` receives
|
||||
# the chunks containing a single '\n', it emits '' as a line -- whereas
|
||||
# `.splitlines()` combines with the '\r' and splits on `\r\n`.
|
||||
result = list(r.iter_lines())
|
||||
assert result != mock_data.splitlines()
|
||||
assert result[2] == ''
|
||||
assert result[4] == ''
|
||||
# If we change all the line breaks to `\r`, we should be okay.
|
||||
mock_chunks = [chunk.replace('\n', '\r') for chunk in mock_chunks]
|
||||
mock_data = ''.join(mock_chunks)
|
||||
assert list(r.iter_lines()) == mock_data.splitlines()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'content, expected_no_delimiter, expected_delimiter', (
|
||||
([''], [], []),
|
||||
(['line\n'], ['line'], ['line\n']),
|
||||
(['line', '\n'], ['line'], ['line\n']),
|
||||
(['line\r\n'], ['line'], ['line', '']),
|
||||
# Empty chunk in the end of stream, same behavior as the previous
|
||||
(['line\r\n', ''], ['line'], ['line', '']),
|
||||
(['line', '\r\n'], ['line'], ['line', '']),
|
||||
(['a\r', '\nb\r'], ['a', '', 'b'], ['a', 'b\r']),
|
||||
(['a\n', '\nb'], ['a', '', 'b'], ['a\n\nb']),
|
||||
(['a\r\n','\rb\n'], ['a', '', 'b'], ['a', '\rb\n']),
|
||||
(['a\nb', 'c'], ['a', 'bc'], ['a\nbc']),
|
||||
(['a\n', '\rb', '\r\nc'], ['a', '', 'b', 'c'], ['a\n\rb', 'c']),
|
||||
(['a\r\nb', '', 'c'], ['a', 'bc'], ['a', 'bc']) # Empty chunk with pending data
|
||||
))
|
||||
def test_response_lines_parametrized(self, content, expected_no_delimiter, expected_delimiter):
|
||||
"""
|
||||
Test a lot of potential chunk splits to ensure consistency of
|
||||
iter_lines(delimiter=x), as well as the legacy behavior of
|
||||
iter_lines() without delimiter
|
||||
https://github.com/kennethreitz/requests/pull/2431#issuecomment-72333964
|
||||
"""
|
||||
mock_chunks = content
|
||||
mock_iter_content = lambda *args, **kwargs: (e for e in mock_chunks)
|
||||
|
||||
r = requests.Response()
|
||||
r._content_consumed = True
|
||||
r.iter_content = mock_iter_content
|
||||
assert list(r.iter_lines()) == expected_no_delimiter
|
||||
assert list(r.iter_lines(delimiter='\r\n')) == expected_delimiter
|
||||
|
||||
def test_prepared_request_is_pickleable(self, httpbin):
|
||||
p = requests.Request('GET', httpbin('get')).prepare()
|
||||
|
||||
@@ -1741,11 +1816,12 @@ class TestRequests:
|
||||
prep = r.prepare()
|
||||
assert 'stuff=elixr' == prep.body
|
||||
|
||||
def test_response_iter_lines(self, httpbin):
|
||||
@pytest.mark.parametrize('decode_unicode', (True, False))
|
||||
def test_response_iter_lines(self, httpbin, decode_unicode):
|
||||
r = requests.get(httpbin('stream/4'), stream=True)
|
||||
assert r.status_code == 200
|
||||
|
||||
it = r.iter_lines()
|
||||
r.encoding = 'utf-8'
|
||||
it = r.iter_lines(decode_unicode=decode_unicode)
|
||||
next(it)
|
||||
assert len(list(it)) == 3
|
||||
|
||||
|
||||
Reference in New Issue
Block a user