From 6d071f43312e6585c32208eeda6918be36d3649c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 21 Jan 2012 01:23:35 -0500 Subject: [PATCH 1/6] kill decode_unicode config --- requests/defaults.py | 1 - 1 file changed, 1 deletion(-) diff --git a/requests/defaults.py b/requests/defaults.py index 1d6716df..f0516f72 100644 --- a/requests/defaults.py +++ b/requests/defaults.py @@ -35,7 +35,6 @@ defaults['base_headers'] = { defaults['verbose'] = None defaults['max_redirects'] = 30 -defaults['decode_unicode'] = True defaults['pool_connections'] = 10 defaults['pool_maxsize'] = 10 defaults['max_retries'] = 0 From 49a38ac89a7bc7cbbeaa2322d4a5d45fcd590054 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 21 Jan 2012 01:24:21 -0500 Subject: [PATCH 2/6] New bytes/unicode separation. Response.content for bytes. New Response.text for unicode. --- requests/models.py | 54 ++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/requests/models.py b/requests/models.py index f407751c..5da4d7e2 100644 --- a/requests/models.py +++ b/requests/models.py @@ -528,7 +528,7 @@ class Request(object): if prefetch: # Save the response. self.response.content - + if self.config.get('danger_mode'): self.response.raise_for_status() @@ -598,7 +598,7 @@ class Response(object): return True - def iter_content(self, chunk_size=10 * 1024, decode_unicode=None): + def iter_content(self, chunk_size=10 * 1024, decode_unicode=False): """Iterates over the response data. This avoids reading the content at once into memory for large responses. The chunk size is the number of bytes it should read into memory. This is not necessarily the @@ -616,7 +616,7 @@ class Response(object): break yield chunk self._content_consumed = True - + def generate_chunked(): resp = self.raw._original_response fp = resp.fp @@ -652,9 +652,6 @@ class Response(object): elif 'deflate' in self.headers.get('content-encoding', ''): gen = stream_decompress(gen, mode='deflate') - if decode_unicode is None: - decode_unicode = self.config.get('decode_unicode') - if decode_unicode: gen = stream_decode_response_unicode(gen, self) @@ -692,9 +689,7 @@ class Response(object): @property def content(self): - """Content of the response, in bytes or unicode - (if available). - """ + """Content of the response, in bytes.""" if self._content is None: # Read the contents. @@ -707,26 +702,29 @@ class Response(object): except AttributeError: self._content = None - content = self._content - - # Decode unicode content. - if self.config.get('decode_unicode'): - - # Try charset from content-type - - if self.encoding: - try: - content = unicode(content, self.encoding) - except UnicodeError: - pass - - # Fall back: - try: - content = unicode(content, self.encoding, errors='replace') - except TypeError: - pass - self._content_consumed = True + return self._content + + + @property + def text(self): + """Content of the response, in unicode.""" + + # Try charset from content-type + + content = u'' + + try: + content = unicode(self.content, self.encoding) + except UnicodeError: + pass + + # Try to fall back: + try: + content = unicode(content, self.encoding, errors='replace') + except TypeError: + pass + return content From f5a2b75924805f10f8c42945ad96f9e7d9a3f33e Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 21 Jan 2012 01:28:26 -0500 Subject: [PATCH 3/6] only try to decode if self.encoding is set --- requests/models.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/requests/models.py b/requests/models.py index 5da4d7e2..c116e36b 100644 --- a/requests/models.py +++ b/requests/models.py @@ -711,19 +711,19 @@ class Response(object): """Content of the response, in unicode.""" # Try charset from content-type - content = u'' - try: - content = unicode(self.content, self.encoding) - except UnicodeError: - pass + if self.encoding: + try: + content = unicode(self.content, self.encoding) + except UnicodeError: + pass - # Try to fall back: - try: - content = unicode(content, self.encoding, errors='replace') - except TypeError: - pass + # Try to fall back: + try: + content = unicode(content, self.encoding, errors='replace') + except TypeError: + pass return content From 47aba0cc20a3d44aa83bdde849ff39fc8b20d094 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 21 Jan 2012 01:31:23 -0500 Subject: [PATCH 4/6] remove is_error --- requests/models.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/requests/models.py b/requests/models.py index c116e36b..72564cad 100644 --- a/requests/models.py +++ b/requests/models.py @@ -144,7 +144,7 @@ class Request(object): return '' % (self.method) - def _build_response(self, resp, is_error=False): + def _build_response(self, resp): """Build internal :class:`Response ` object from given response. """ @@ -184,9 +184,6 @@ class Request(object): # Save original response for later. response.raw = resp - if is_error: - response.error = resp - response.url = self.full_url.decode('utf-8') return response From efd2b57b920ec5b3dab6eca506bcc82d5090ba08 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 21 Jan 2012 01:44:53 -0500 Subject: [PATCH 5/6] cleanup docs for request defaults --- requests/defaults.py | 5 +---- requests/models.py | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/requests/defaults.py b/requests/defaults.py index f0516f72..424d3731 100644 --- a/requests/defaults.py +++ b/requests/defaults.py @@ -10,16 +10,13 @@ Configurations: :base_headers: Default HTTP headers. :verbose: Stream to write request logging to. -:timeout: Seconds until request timeout. -:max_redirects: Maximum number of redirects allowed within a request. -:decode_unicode: Decode unicode responses automatically? +:max_redirects: Maximum number of redirects allowed within a request.s :keep_alive: Reuse HTTP Connections? :max_retries: The number of times a request should be retried in the event of a connection failure. :danger_mode: If true, Requests will raise errors immediately. :safe_mode: If true, Requests will catch all errors. :pool_maxsize: The maximium size of an HTTP connection pool. :pool_connections: The number of active HTTP connection pools to use. - """ from . import __version__ diff --git a/requests/models.py b/requests/models.py index 72564cad..9912d414 100644 --- a/requests/models.py +++ b/requests/models.py @@ -183,7 +183,6 @@ class Request(object): # Save original response for later. response.raw = resp - response.url = self.full_url.decode('utf-8') return response From a0ae2e6c7b46eaa93fa972623d0fdfc576de242b Mon Sep 17 00:00:00 2001 From: Johannes Gorset Date: Sat, 21 Jan 2012 11:01:45 +0100 Subject: [PATCH 6/6] Default the encoding of "text" media subtypes to "ISO-8859-1" Ref. RFC2616 (HyperText Transfer Protocol), section 3.7.1 (Canonicalization and Text Defaults). --- requests/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requests/utils.py b/requests/utils.py index c7ab0a4e..95dea4bb 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -276,6 +276,9 @@ def get_encoding_from_headers(headers): if 'charset' in params: return params['charset'].strip("'\"") + if 'text' in content_type: + return 'ISO-8859-1' + def unicode_from_html(content): """Attempts to decode an HTML string into unicode.