Implemented content streaming for responses.

This commit is contained in:
Armin Ronacher
2011-09-03 17:59:30 +02:00
parent 6f84ce72f2
commit 41876fd8c6
2 changed files with 64 additions and 2 deletions
+33 -1
View File
@@ -9,6 +9,7 @@ requests.models
import urllib
import urllib2
import socket
import codecs
import zlib
@@ -21,7 +22,7 @@ from .monkeys import Request as _Request, HTTPBasicAuthHandler, HTTPForcedBasicA
from .structures import CaseInsensitiveDict
from .packages.poster.encode import multipart_encode
from .packages.poster.streaminghttp import register_openers, get_handlers
from .utils import dict_from_cookiejar, get_unicode_from_response, decode_gzip
from .utils import dict_from_cookiejar, get_unicode_from_response, stream_decode_response_unicode, decode_gzip, stream_decode_gzip
from .status_codes import codes
from .exceptions import RequestException, AuthenticationError, Timeout, URLRequired, InvalidMethod, TooManyRedirects
@@ -393,6 +394,7 @@ class Response(object):
def __init__(self):
self._content = None
self._content_consumed = False
#: Integer Code of responded HTTP Status.
self.status_code = None
@@ -435,6 +437,31 @@ class Response(object):
return not self.error
def iter_content(self, chunk_size=10 * 1024, decode_unicode=None):
"""Iterates over the response data. This avoids reading the content
at once into memory for large responses. The chunk size is the number
of bytes it should read into memory. This is not necessarily the
length of each item returned as decoding can take place.
"""
if self._content_consumed:
raise RuntimeError('The content for this response was '
'already consumed')
def generate():
while 1:
chunk = self.fo.read(chunk_size)
if not chunk:
break
yield chunk
self._content_consumed = True
gen = generate()
if 'gzip' in self.headers.get('content-encoding', ''):
gen = stream_decode_gzip(gen)
if decode_unicode is None:
decode_unicode = settings.decode_unicode
if decode_unicode:
gen = stream_decode_response_unicode(gen, self)
return gen
@property
def content(self):
@@ -445,6 +472,10 @@ class Response(object):
if self._content is not None:
return self._content
if self._content_consumed:
raise RuntimeError('The content for this response was '
'already consumed')
# Read the contents.
self._content = self.fo.read()
@@ -459,6 +490,7 @@ class Response(object):
if settings.decode_unicode:
self._content = get_unicode_from_response(self)
self._content_consumed = True
return self._content
+31 -1
View File
@@ -10,6 +10,7 @@ that are also useful for external consumption.
"""
import cgi
import codecs
import cookielib
import re
import zlib
@@ -177,6 +178,19 @@ def unicode_from_html(content):
return content
def stream_decode_response_unicode(iterator, r):
"""Stream decodes a iterator."""
encoding = get_encoding_from_headers(r.headers)
decoder = codecs.getincrementaldecoder(encoding)(errors='replace')
for chunk in iterator:
rv = decoder.decode(chunk)
if rv:
yield rv
rv = decoder.decode('', final=True)
if rv:
yield rv
def get_unicode_from_response(r):
"""Returns the requested content back in unicode.
@@ -216,4 +230,20 @@ def decode_gzip(content):
:param content: bytestring to gzip-decode.
"""
return zlib.decompress(content, 16+zlib.MAX_WBITS)
return zlib.decompress(content, 16 + zlib.MAX_WBITS)
def stream_decode_gzip(iterator):
"""Stream decodes a gzip-encoded iterator"""
try:
dec = zlib.decompressobj(16 + zlib.MAX_WBITS)
for chunk in iterator:
rv = dec.decompress(chunk)
if rv:
yield rv
buf = dec.decompress('')
rv = buf + dec.flush()
if rv:
yield rv
except zlib.error:
pass