From 5ee8b348ebab9a7c427a87355dd089c83ee74be9 Mon Sep 17 00:00:00 2001 From: Martijn Pieters Date: Mon, 3 Feb 2014 12:00:14 +0000 Subject: [PATCH] Reinstate falling back to self.text for JSON responses A JSON response that has no encoding specified will be decoded with a detected UTF codec (compliant with the JSON RFC), but if that fails, we guessed wrong and need to fall back to charade character detection (via `self.text`). Kenneth removed this functionality (by accident?) in 1451ba0c6d395c41f86da35036fa361c3a41bc90, this reinstates it again and adds a log warning. Fixes #1674 --- requests/models.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/requests/models.py b/requests/models.py index 34dce181..2adc5492 100644 --- a/requests/models.py +++ b/requests/models.py @@ -725,11 +725,20 @@ class Response(object): if not self.encoding and len(self.content) > 3: # No encoding set. JSON RFC 4627 section 3 states we should expect # UTF-8, -16 or -32. Detect which one to use; If the detection or - # decoding fails, fall back to `self.text` (using chardet to make + # decoding fails, fall back to `self.text` (using charade to make # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: - return json.loads(self.content.decode(encoding), **kwargs) + try: + return json.loads(self.content.decode(encoding), **kwargs) + except UnicodeDecodeError: + # Wrong UTF codec detected; usually because it's not UTF-8 + # but some other 8-bit codec. This is an RFC violation, + # and the server didn't bother to tell us what codec *was* + # used. + pass + log.warn('No encoding specified for JSON response, and no ' + 'UTF codec detected. Falling back to charade best guess.') return json.loads(self.text, **kwargs) @property