From 9a56bb8dd02ad121cc02c4beb1d8238f6eb749f1 Mon Sep 17 00:00:00 2001 From: Radomir Stevanovic Date: Tue, 14 Aug 2012 22:57:35 +0200 Subject: [PATCH] bugfix: JSON-safe encoding of raw binary data/files Encoding of the received files and/or body data was failing in case of raw binary data being POSTed/PUT/etc. (The exact trigger was actually a non-UTF-8 data stream, since Flask's `jsonify`, i.e. `simplejson.dumps` assumes UTF-8.) Binary data in a JSON response are now encoded as "data" URLs, according to RFC 2397. "Data" URL scheme was chosen for its simplicity and clarity. MIME type is included. Plain text is passed thru unmodified, as before. Also, tests demonstrating the bug/fix added to `test_httpbin.py`. --- AUTHORS | 3 ++- httpbin/helpers.py | 28 +++++++++++++++++++++++++--- test_httpbin.py | 16 ++++++++++++++++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/AUTHORS b/AUTHORS index d031220..2ca4276 100644 --- a/AUTHORS +++ b/AUTHORS @@ -14,4 +14,5 @@ Patches and Suggestions - Andrey Petrov - Lispython - Kyle Conroy -- Flavio Percoco \ No newline at end of file +- Flavio Percoco +- Radomir Stevanovic (http://github.com/randomir) \ No newline at end of file diff --git a/httpbin/helpers.py b/httpbin/helpers.py index 6bb5740..ea45924 100644 --- a/httpbin/helpers.py +++ b/httpbin/helpers.py @@ -7,7 +7,8 @@ httpbin.helpers This module provides helper functions for httpbin. """ -import json +import base64 +import simplejson as json from hashlib import md5 from werkzeug.http import parse_authorization_header @@ -62,13 +63,34 @@ ANGRY_ASCII =""" YOU SHOUDN'T BE HERE """ + +def json_safe(string, content_type='application/octet-stream'): + """Returns JSON-safe version of `string`. + + If `string` is a Unicode string or a valid UTF-8, it is returned unmodified, + as it can safely be encoded to JSON string. + + If `string` contains raw/binary data, it is Base64-encoded, formatted and + returned according to "data" URL scheme (RFC2397). Since JSON is not + suitable for binary data, some additional encoding was necessary; "data" + URL scheme was chosen for its simplicity. + """ + + try: + _encoded = json.dumps(string) + return string + except ValueError: + return ''.join(['data:%s;base64,' % content_type, + base64.b64encode(string)]) + + def get_files(): """Returns files dict from request context.""" files = dict() for k, v in request.files.items(): - files[k] = v.read() + files[k] = json_safe(v.read(), request.files[k].content_type) return files @@ -120,7 +142,7 @@ def get_dict(*keys, **extras): url=request.url, args=request.args, form=form, - data=data, + data=json_safe(data), origin=request.remote_addr, headers=get_headers(), files=get_files(), diff --git a/test_httpbin.py b/test_httpbin.py index ba39aac..8d7149c 100755 --- a/test_httpbin.py +++ b/test_httpbin.py @@ -25,6 +25,22 @@ class HttpbinTestCase(unittest.TestCase): content = response.data.decode('utf-8') self.assertEquals(greeting, content) + def test_post_binary(self): + response = self.app.post('/post', + data='\x01\x02\x03\x81\x82\x83', + content_type='application/octet-stream') + self.assertEquals(response.status_code, 200) + + def test_post_file_text(self): + with open('httpbin/core.py') as f: + response = self.app.post('/post', data={"file": f}) + self.assertEquals(response.status_code, 200) + + def test_post_file_binary(self): + with open('httpbin/core.pyc') as f: + response = self.app.post('/post', data={"file": f}) + self.assertEquals(response.status_code, 200) + if __name__ == '__main__': unittest.main()