From 29271a4100b24008719cde605a4b641745dd4678 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Tue, 17 Jan 2012 12:47:22 +0100 Subject: [PATCH 1/5] make Response.iter_lines yield the pending buffer if its actually a complete line --- requests/models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/requests/models.py b/requests/models.py index c45b18a7..c3f1b432 100644 --- a/requests/models.py +++ b/requests/models.py @@ -633,7 +633,7 @@ class Response(object): avoids reading the content at once into memory for large responses. """ - + #XXX: why rstrip by default pending = None for chunk in self.iter_content(chunk_size, decode_unicode=decode_unicode): if pending is not None: @@ -643,6 +643,10 @@ class Response(object): yield line.rstrip() # Save the last part of the chunk for next iteration, to keep full line together pending = lines[-1] + #if pending is a complete line, give it baack + if pending[-1] == '\n': + yield pending.rstrip() + pending = None # Yield the last line if pending is not None: From 5cfbb0aaf50e451179ede8bcd43ca55fbdfebad5 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Tue, 17 Jan 2012 13:16:09 +0100 Subject: [PATCH 2/5] insanity for chunked iteration --- requests/models.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/requests/models.py b/requests/models.py index c3f1b432..728dbc90 100644 --- a/requests/models.py +++ b/requests/models.py @@ -611,8 +611,30 @@ class Response(object): break yield chunk self._content_consumed = True + + def generate_chunked(): + resp = self.raw._original_response + fp = resp.fp + yield fp.read(resp.chunk_left) + while 1: + #XXX correct line size + pending_bytes = fp.readline(80).strip() + if not pending_bytes: + break + pending_bytes = int(pending_bytes, 16) + if pending_bytes == 0: + break + while pending_bytes: + chunk = fp.read(min(chunk_size, pending_bytes)) + pending_bytes-=len(chunk) + yield chunk + self._content_consumed = True - gen = generate() + + if getattr(self.raw._original_response, 'chunked', False): + gen = generate_chunked() + else: + gen = generate() if 'gzip' in self.headers.get('content-encoding', ''): gen = stream_decompress(gen, mode='gzip') @@ -642,10 +664,14 @@ class Response(object): for line in lines[:-1]: yield line.rstrip() # Save the last part of the chunk for next iteration, to keep full line together - pending = lines[-1] - #if pending is a complete line, give it baack - if pending[-1] == '\n': - yield pending.rstrip() + # lines may be empty for the last chunk of a chunked response + if lines: + pending = lines[-1] + #if pending is a complete line, give it baack + if pending[-1] == '\n': + yield pending.rstrip() + pending = None + else: pending = None # Yield the last line From 4d38790b5bd076ca38802147a2ad827a958eb767 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Tue, 17 Jan 2012 17:38:52 +0100 Subject: [PATCH 3/5] even more evil, fixes my misstakes with chunked reading --- requests/models.py | 17 ++++++++++++----- test_requests.py | 4 ++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/requests/models.py b/requests/models.py index 728dbc90..4196376b 100644 --- a/requests/models.py +++ b/requests/models.py @@ -615,12 +615,12 @@ class Response(object): def generate_chunked(): resp = self.raw._original_response fp = resp.fp - yield fp.read(resp.chunk_left) + if resp.chunk_left: + yield fp.read(resp.chunk_left) + fp.read(2) #throw away crlf while 1: #XXX correct line size - pending_bytes = fp.readline(80).strip() - if not pending_bytes: - break + pending_bytes = fp.readline(40).strip() pending_bytes = int(pending_bytes, 16) if pending_bytes == 0: break @@ -628,11 +628,18 @@ class Response(object): chunk = fp.read(min(chunk_size, pending_bytes)) pending_bytes-=len(chunk) yield chunk + fp.read(2) # throw away crlf self._content_consumed = True - if getattr(self.raw._original_response, 'chunked', False): + if getattr(getattr(self.raw, '_original_response', None), 'chunked', False): gen = generate_chunked() + + def hack_gen(gen=gen): + for item in gen: + print repr(item) + yield item + gen = hack_gen() else: gen = generate() diff --git a/test_requests.py b/test_requests.py index f6547de0..dab3dd43 100755 --- a/test_requests.py +++ b/test_requests.py @@ -608,9 +608,9 @@ class RequestsTestSuite(unittest.TestCase): lines = (0, 2, 10, 100) for i in lines: - r = requests.get(httpbin('stream', str(i)), prefetch=False) - len_lines = len([l for l in r.iter_lines()]) + lines = list(r.iter_lines()) + len_lines = len(lines) self.assertEqual(i, len_lines) From 1c8a1417115c31ead7891bf89951de194b20b5e1 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Tue, 17 Jan 2012 17:39:37 +0100 Subject: [PATCH 4/5] remove debug print --- requests/models.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/requests/models.py b/requests/models.py index 4196376b..5d45d7a4 100644 --- a/requests/models.py +++ b/requests/models.py @@ -634,12 +634,6 @@ class Response(object): if getattr(getattr(self.raw, '_original_response', None), 'chunked', False): gen = generate_chunked() - - def hack_gen(gen=gen): - for item in gen: - print repr(item) - yield item - gen = hack_gen() else: gen = generate() From 7f9cef011407cd38d618279af04632b982037a17 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Tue, 17 Jan 2012 18:14:47 +0100 Subject: [PATCH 5/5] close the chunked fd at the end and honor content chunksizes for a potential first chunk --- requests/models.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/requests/models.py b/requests/models.py index 5d45d7a4..2e97694d 100644 --- a/requests/models.py +++ b/requests/models.py @@ -615,11 +615,15 @@ class Response(object): def generate_chunked(): resp = self.raw._original_response fp = resp.fp - if resp.chunk_left: - yield fp.read(resp.chunk_left) - fp.read(2) #throw away crlf + if resp.chunk_left is not None: + pending_bytes = resp.chunk_left + while pending_bytes: + chunk = fp.read(min(chunk_size, pending_bytes)) + pending_bytes-=len(chunk) + yield chunk + fp.read(2) # throw away crlf while 1: - #XXX correct line size + #XXX correct line size? (httplib has 64kb, seems insane) pending_bytes = fp.readline(40).strip() pending_bytes = int(pending_bytes, 16) if pending_bytes == 0: @@ -630,6 +634,7 @@ class Response(object): yield chunk fp.read(2) # throw away crlf self._content_consumed = True + fp.close() if getattr(getattr(self.raw, '_original_response', None), 'chunked', False):