From 02031e3e14eecab831c7aeb62befbe9ada3ad641 Mon Sep 17 00:00:00 2001
From: Ian Epperson <ian@epperson.com>
Date: Fri, 30 Jan 2015 19:22:19 -0800
Subject: [PATCH 1/8] Test to show bug when delimiter is split between reads

---
 tests/test_requests.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/tests/test_requests.py b/tests/test_requests.py
index 4d91dd20..d56e365a 100755
--- a/tests/test_requests.py
+++ b/tests/test_requests.py
@@ -1296,6 +1296,46 @@ class TestRequests:
         assert r.request.url == pr.request.url
         assert r.request.headers == pr.request.headers
 
+    def test_response_lines(self):
+        """
+        iter_lines should be able to handle data dribbling in which might
+        not be lined up ideally.
+        """
+        mock_chunks = [
+            'This \r\n',
+            '',
+            'is\r',
+            '\n',
+            'a',
+            ' ',
+            '',
+            '',
+            'test.',
+            '\r',
+            '\n',
+            'end.',
+        ]
+
+        def mock_iter_content(*args, **kwargs):
+            '''Fake difficult data.'''
+            for chunk in mock_chunks:
+                yield chunk
+
+        r = requests.Response()
+        r._content_consumed = True
+        r.iter_content = mock_iter_content
+
+        assert list(r.iter_lines(delimiter='\r\n')) == \
+            ''.join(mock_chunks).split('\r\n')
+
+        # This test can't pass because '\n' by itself is a single line-end, but
+        # '\r\n' is also a single line-end
+        assert not (list(r.iter_lines()) == ''.join(mock_chunks).splitlines())
+
+        # However, this should pass if everything is '\r'
+        mock_chunks = [chunk.replace('\n', '\r') for chunk in mock_chunks]
+        assert list(r.iter_lines()) == ''.join(mock_chunks).splitlines()
+
     def test_prepared_request_is_pickleable(self, httpbin):
         p = requests.Request('GET', httpbin('get')).prepare()
 

From 9174925916b797e30d1f402ced4d835149af9efe Mon Sep 17 00:00:00 2001
From: Ian Epperson <ian@epperson.com>
Date: Fri, 30 Jan 2015 19:23:04 -0800
Subject: [PATCH 2/8] Fix bug when delimiter is split between responses

---
 requests/models.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/requests/models.py b/requests/models.py
index ac348e9d..b4cdead4 100644
--- a/requests/models.py
+++ b/requests/models.py
@@ -776,23 +776,36 @@ class Response(object):
 
         .. note:: This method is not reentrant safe.
         """
-
         pending = None
 
-        for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode):
+        for chunk in self.iter_content(chunk_size=chunk_size,
+                                       decode_unicode=decode_unicode):
 
+            # Skip any null responses
+            if not chunk:
+                continue
+
+            # Consume any pending data
             if pending is not None:
                 chunk = pending + chunk
+                pending = None
 
+            # Either split on a line, or split on a specified delimiter
             if delimiter:
                 lines = chunk.split(delimiter)
             else:
                 lines = chunk.splitlines()
 
-            if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
+            # The split(delimiter) will always end with whatever remains past
+            # the delimiter ('' if nothing more).  However splitlines() will
+            # not end with a '' if the final text is a line delimiter.
+
+            # Therefore, if we're in delimiter mode, always pop the final
+            # item to prepend to the next chunk. However, only do this for
+            # non-delimiter mode if the chunk does not match the end of the
+            # last line.
+            if delimiter or (lines[-1] and lines[-1][-1] == chunk[-1]):
                 pending = lines.pop()
-            else:
-                pending = None
 
             for line in lines:
                 yield line

From 9881be25f3d88e8cfcb1dd006cc7eeca37c08c83 Mon Sep 17 00:00:00 2001
From: Alex Chan <alex@alexwlchan.net>
Date: Sun, 4 Dec 2016 23:06:03 +0000
Subject: [PATCH 3/8] Review markups for @Lukasa

---
 requests/models.py     | 27 +++++++++++++++++----------
 tests/test_requests.py | 21 ++++++++++++---------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/requests/models.py b/requests/models.py
index b4cdead4..fe554ffc 100644
--- a/requests/models.py
+++ b/requests/models.py
@@ -780,7 +780,6 @@ class Response(object):
 
         for chunk in self.iter_content(chunk_size=chunk_size,
                                        decode_unicode=decode_unicode):
-
             # Skip any null responses
             if not chunk:
                 continue
@@ -796,15 +795,23 @@ class Response(object):
             else:
                 lines = chunk.splitlines()
 
-            # The split(delimiter) will always end with whatever remains past
-            # the delimiter ('' if nothing more).  However splitlines() will
-            # not end with a '' if the final text is a line delimiter.
-
-            # Therefore, if we're in delimiter mode, always pop the final
-            # item to prepend to the next chunk. However, only do this for
-            # non-delimiter mode if the chunk does not match the end of the
-            # last line.
-            if delimiter or (lines[-1] and lines[-1][-1] == chunk[-1]):
+            # Calling `.split(delimiter)` will always end with whatever text
+            # remains beyond the delimiter, or '' if the delimiter is the end
+            # of the text.  On the other hand, `.splitlines()` doesn't include
+            # a '' if the text ends in a line delimiter.
+            #
+            # For example:
+            #
+            #     'abc\ndef\n'.split('\n')  ~> ['abc', 'def', '']
+            #     'abc\ndef\n'.splitlines() ~> ['abc', 'def']
+            #
+            # So if we have a specified delimiter, we always pop the final
+            # item and prepend it to the next chunk.
+            #
+            # If we're using `splitlines()`, we only do this if the chunk
+            # ended midway through a line.
+            incomplete_line = (lines[-1] and lines[-1].endswith(chunk[-1]))
+            if delimiter or incomplete_line:
                 pending = lines.pop()
 
             for line in lines:
diff --git a/tests/test_requests.py b/tests/test_requests.py
index d56e365a..76a2d45a 100755
--- a/tests/test_requests.py
+++ b/tests/test_requests.py
@@ -1296,10 +1296,11 @@ class TestRequests:
         assert r.request.url == pr.request.url
         assert r.request.headers == pr.request.headers
 
+
     def test_response_lines(self):
         """
-        iter_lines should be able to handle data dribbling in which might
-        not be lined up ideally.
+        iter_lines should be able to handle data dribbling in which delimiters
+        might not be lined up ideally.
         """
         mock_chunks = [
             'This \r\n',
@@ -1315,6 +1316,7 @@ class TestRequests:
             '\n',
             'end.',
         ]
+        mock_data = ''.join(mock_chunks)
 
         def mock_iter_content(*args, **kwargs):
             '''Fake difficult data.'''
@@ -1325,16 +1327,17 @@ class TestRequests:
         r._content_consumed = True
         r.iter_content = mock_iter_content
 
-        assert list(r.iter_lines(delimiter='\r\n')) == \
-            ''.join(mock_chunks).split('\r\n')
+        assert list(r.iter_lines(delimiter='\r\n')) == mock_data.split('\r\n')
 
-        # This test can't pass because '\n' by itself is a single line-end, but
-        # '\r\n' is also a single line-end
-        assert not (list(r.iter_lines()) == ''.join(mock_chunks).splitlines())
+        # Because '\n' is a single line-end, when `iter_lines()` receives
+        # the chunks containing a single '\n', it emits '' as a line -- whereas
+        # `.splitlines()` combines with the '\r' and splits on `\r\n`.
+        assert list(r.iter_lines()) != mock_data.splitlines()
 
-        # However, this should pass if everything is '\r'
+        # If we change all the line breaks to `\r`, we should be okay.
         mock_chunks = [chunk.replace('\n', '\r') for chunk in mock_chunks]
-        assert list(r.iter_lines()) == ''.join(mock_chunks).splitlines()
+        mock_data = ''.join(mock_chunks)
+        assert list(r.iter_lines()) == mock_data.splitlines()
 
     def test_prepared_request_is_pickleable(self, httpbin):
         p = requests.Request('GET', httpbin('get')).prepare()

From 0380ac5893ffeb52e65ded5e22c785dd4f659496 Mon Sep 17 00:00:00 2001
From: Vincent Barbaresi <vincent.barbaresi@gmail.com>
Date: Tue, 14 Mar 2017 19:26:55 +0100
Subject: [PATCH 4/8] add some parametrized tests for iter_lines()

Write a list of different chunk splits and their expected results
to test against, using ianepperson's breakdown as specification:
https://github.com/kennethreitz/requests/pull/2431#issuecomment-72333964
---
 tests/test_requests.py | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/tests/test_requests.py b/tests/test_requests.py
index 76a2d45a..96769830 100755
--- a/tests/test_requests.py
+++ b/tests/test_requests.py
@@ -1318,10 +1318,7 @@ class TestRequests:
         ]
         mock_data = ''.join(mock_chunks)
 
-        def mock_iter_content(*args, **kwargs):
-            '''Fake difficult data.'''
-            for chunk in mock_chunks:
-                yield chunk
+        mock_iter_content = lambda *args, **kwargs: (e for e in mock_chunks)
 
         r = requests.Response()
         r._content_consumed = True
@@ -1332,13 +1329,45 @@ class TestRequests:
         # Because '\n' is a single line-end, when `iter_lines()` receives
         # the chunks containing a single '\n', it emits '' as a line -- whereas
         # `.splitlines()` combines with the '\r' and splits on `\r\n`.
-        assert list(r.iter_lines()) != mock_data.splitlines()
-
+        result = list(r.iter_lines())
+        assert result != mock_data.splitlines()
+        assert result[2] == ''
+        assert result[4] == ''
         # If we change all the line breaks to `\r`, we should be okay.
         mock_chunks = [chunk.replace('\n', '\r') for chunk in mock_chunks]
         mock_data = ''.join(mock_chunks)
         assert list(r.iter_lines()) == mock_data.splitlines()
 
+
+    @pytest.mark.parametrize(
+        'content, expected_no_delimiter, expected_delimiter', (
+            ([''], [], []),
+            (['line\n'], ['line'], ['line\n']),
+            (['line', '\n'], ['line'], ['line\n']),
+            (['line\r\n'], ['line'], ['line', '']),
+            (['line', '\r\n'], ['line'], ['line', '']),
+            (['a\r', '\nb\r'], ['a', '', 'b'], ['a', 'b\r']),
+            (['a\n', '\nb'], ['a', '', 'b'], ['a\n\nb']),
+            (['a\r\n','\rb\n'], ['a', '', 'b'], ['a', '\rb\n']),
+            (['a\nb', 'c'], ['a', 'bc'], ['a\nbc']),
+            (['a\n', '\rb', '\r\nc'], ['a', '', 'b', 'c'], ['a\n\rb', 'c'])
+        ))
+    def test_response_lines_parametrized(self, content, expected_no_delimiter, expected_delimiter):
+        """
+        Test a lot of potential chunk splits to ensure consistency of
+        iter_lines(delimiter=x), as well as the legacy behavior of
+        iter_lines() without delimiter
+        https://github.com/kennethreitz/requests/pull/2431#issuecomment-72333964
+        """
+        mock_chunks = content
+        mock_iter_content = lambda *args, **kwargs: (e for e in mock_chunks)
+
+        r = requests.Response()
+        r._content_consumed = True
+        r.iter_content = mock_iter_content
+        assert list(r.iter_lines()) == expected_no_delimiter
+        assert list(r.iter_lines(delimiter='\r\n')) == expected_delimiter
+
     def test_prepared_request_is_pickleable(self, httpbin):
         p = requests.Request('GET', httpbin('get')).prepare()
 

From 5a8bc193844b1d46d89784c11e62b3a4882afa25 Mon Sep 17 00:00:00 2001
From: Vincent Barbaresi <vincent.barbaresi@gmail.com>
Date: Wed, 15 Mar 2017 00:52:32 +0100
Subject: [PATCH 5/8] add more tests for iter_lines()

check the case of an empty chunk somewhere in the stream
---
 tests/test_requests.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_requests.py b/tests/test_requests.py
index 96769830..ac4bc2d5 100755
--- a/tests/test_requests.py
+++ b/tests/test_requests.py
@@ -1345,12 +1345,15 @@ class TestRequests:
             (['line\n'], ['line'], ['line\n']),
             (['line', '\n'], ['line'], ['line\n']),
             (['line\r\n'], ['line'], ['line', '']),
+            # Empty chunk in the end of stream, same behavior as the previous
+            (['line\r\n', ''], ['line'], ['line', '']),
             (['line', '\r\n'], ['line'], ['line', '']),
             (['a\r', '\nb\r'], ['a', '', 'b'], ['a', 'b\r']),
             (['a\n', '\nb'], ['a', '', 'b'], ['a\n\nb']),
             (['a\r\n','\rb\n'], ['a', '', 'b'], ['a', '\rb\n']),
             (['a\nb', 'c'], ['a', 'bc'], ['a\nbc']),
-            (['a\n', '\rb', '\r\nc'], ['a', '', 'b', 'c'], ['a\n\rb', 'c'])
+            (['a\n', '\rb', '\r\nc'], ['a', '', 'b', 'c'], ['a\n\rb', 'c']),
+            (['a\r\nb', '', 'c'], ['a', 'bc'], ['a', 'bc'])  # Empty chunk with pending data
         ))
     def test_response_lines_parametrized(self, content, expected_no_delimiter, expected_delimiter):
         """

From cc2ac23c0d0253f10060c9bce3826b12122953ff Mon Sep 17 00:00:00 2001
From: Vincent Barbaresi <vincent.barbaresi@gmail.com>
Date: Wed, 15 Mar 2017 00:54:04 +0100
Subject: [PATCH 6/8] remove useless brackets in iter_lines boolean condition

---
 requests/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requests/models.py b/requests/models.py
index fe554ffc..a3958f73 100644
--- a/requests/models.py
+++ b/requests/models.py
@@ -810,7 +810,7 @@ class Response(object):
             #
             # If we're using `splitlines()`, we only do this if the chunk
             # ended midway through a line.
-            incomplete_line = (lines[-1] and lines[-1].endswith(chunk[-1]))
+            incomplete_line = lines[-1] and lines[-1].endswith(chunk[-1])
             if delimiter or incomplete_line:
                 pending = lines.pop()
 

From 052595ffbfd0464a5272a33927cd183c357beab0 Mon Sep 17 00:00:00 2001
From: Vincent Barbaresi <vincent.barbaresi@opendatasoft.com>
Date: Wed, 15 Mar 2017 12:16:32 +0100
Subject: [PATCH 7/8] add explanatory comment about skipping null chunks in
 iter_lines

---
 requests/models.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/requests/models.py b/requests/models.py
index a3958f73..375aae7a 100644
--- a/requests/models.py
+++ b/requests/models.py
@@ -780,7 +780,10 @@ class Response(object):
 
         for chunk in self.iter_content(chunk_size=chunk_size,
                                        decode_unicode=decode_unicode):
-            # Skip any null responses
+            # Skip any null responses: if there is pending data it is necessarily an
+            # incomplete chunk, so if we don't have more data we don't want to bother
+            # trying to get it. Unconsumed pending data will be yielded anyway in the
+            # end of the loop if the stream ends.
             if not chunk:
                 continue
 

From d491e9f9b28815343d4114be51832960907b12d8 Mon Sep 17 00:00:00 2001
From: Vincent Barbaresi <vincent.barbaresi@gmail.com>
Date: Wed, 15 Mar 2017 22:27:36 +0100
Subject: [PATCH 8/8] use [-1] instead of endswith() to work with bytes or
 string

Also add a parametrize on decode_unicode for iter_lines() test
to check with bytestrings and str content
---
 requests/models.py     | 2 +-
 tests/test_requests.py | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/requests/models.py b/requests/models.py
index 375aae7a..46a809da 100644
--- a/requests/models.py
+++ b/requests/models.py
@@ -813,7 +813,7 @@ class Response(object):
             #
             # If we're using `splitlines()`, we only do this if the chunk
             # ended midway through a line.
-            incomplete_line = lines[-1] and lines[-1].endswith(chunk[-1])
+            incomplete_line = lines[-1] and lines[-1][-1] == chunk[-1]
             if delimiter or incomplete_line:
                 pending = lines.pop()
 
diff --git a/tests/test_requests.py b/tests/test_requests.py
index ac4bc2d5..8f68e0c4 100755
--- a/tests/test_requests.py
+++ b/tests/test_requests.py
@@ -1816,11 +1816,12 @@ class TestRequests:
         prep = r.prepare()
         assert 'stuff=elixr' == prep.body
 
-    def test_response_iter_lines(self, httpbin):
+    @pytest.mark.parametrize('decode_unicode', (True, False))
+    def test_response_iter_lines(self, httpbin, decode_unicode):
         r = requests.get(httpbin('stream/4'), stream=True)
         assert r.status_code == 200
-
-        it = r.iter_lines()
+        r.encoding = 'utf-8'
+        it = r.iter_lines(decode_unicode=decode_unicode)
         next(it)
         assert len(list(it)) == 3