From f507a3ef12ce6e100f68741f68d39bcaf8a70c7a Mon Sep 17 00:00:00 2001 From: Eduardo Rodrigues Date: Sat, 9 Feb 2019 11:56:06 +0100 Subject: [PATCH 1/3] add utf8 with bom to test --- .gitignore | 3 +++ tests/test_utils.py | 1 + 2 files changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index cd0c32e9..e127c60a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,9 @@ requests.egg-info/ *.swp *.egg env/ +.venv/ +.eggs/ +.tox/ .workon diff --git a/tests/test_utils.py b/tests/test_utils.py index 59b0b0ef..ba0668f3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -332,6 +332,7 @@ class TestGuessJSONUTF: @pytest.mark.parametrize( ('encoding', 'expected'), ( + ('utf-8-sig', 'utf-8'), ('utf-16-be', 'utf-16'), ('utf-16-le', 'utf-16'), ('utf-32-be', 'utf-32'), From 9e27326d6843e7c26e9847776159eeb68a999675 Mon Sep 17 00:00:00 2001 From: Eduardo Rodrigues Date: Sat, 9 Feb 2019 22:00:32 +0100 Subject: [PATCH 2/3] add failing tests for bom --- .gitignore | 2 ++ tests/test_testserver.py | 30 ++++++++++++++++++++++++++++++ tests/test_utils.py | 2 +- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e127c60a..e005aba4 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,8 @@ env/ .venv/ .eggs/ .tox/ +.pytest_cache/ +.vscode/ .workon diff --git a/tests/test_testserver.py b/tests/test_testserver.py index aac52926..9d9def2f 100644 --- a/tests/test_testserver.py +++ b/tests/test_testserver.py @@ -56,6 +56,36 @@ class TestTestServer: assert r.text == u'roflol' assert r.headers['Content-Length'] == '6' + def test_text_bom_response(self): + """the text_response_server sends the given text with UTF-8 BOM""" + server = Server.text_response_server( + "HTTP/1.1 200 OK\r\n" + + "Content-Type: text/html; charset=UTF-8\r\n" + + u'\r\n\ufeffジェーピーニック' + ) + + with server as (host, port): + r = requests.get('http://{}:{}'.format(host, port)) + + assert r.status_code == 200 + assert r.text == u'ジェーピーニック' + assert r.headers['Content-Type'] == 'text/html; charset=UTF-8' + + def test_json_bom_response(self): + """the text_response_server sends the given JSON with UTF-8 BOM""" + server = Server.text_response_server( + "HTTP/1.1 200 OK\r\n" + + "Content-Type: application/json; charset=utf-8\r\n" + + u'\r\n\ufeff{"success": true}' + ) + + with server as (host, port): + r = requests.get('http://{}:{}'.format(host, port)) + + assert r.status_code == 200 + assert r.json() == {'success': True} + assert r.headers['Content-Type'] == 'application/json; charset=utf-8' + def test_basic_response(self): """the basic response server returns an empty http response""" with Server.basic_response_server() as (host, port): diff --git a/tests/test_utils.py b/tests/test_utils.py index ba0668f3..027b26fb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -332,7 +332,7 @@ class TestGuessJSONUTF: @pytest.mark.parametrize( ('encoding', 'expected'), ( - ('utf-8-sig', 'utf-8'), + ('utf-8-sig', 'utf-8-sig'), ('utf-16-be', 'utf-16'), ('utf-16-le', 'utf-16'), ('utf-32-be', 'utf-32'), From 19cff44ec1b8eeec99459b504a681aa440f6344c Mon Sep 17 00:00:00 2001 From: Eduardo Rodrigues Date: Sat, 9 Feb 2019 22:14:22 +0100 Subject: [PATCH 3/3] fix response with utf8 bom --- requests/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requests/models.py b/requests/models.py index 62dcd0b7..4eb52d94 100644 --- a/requests/models.py +++ b/requests/models.py @@ -855,6 +855,9 @@ class Response(object): # Fallback to auto-detected encoding. if self.encoding is None: encoding = self.apparent_encoding + # Forcefully remove BOM from UTF-8 + elif self.encoding.lower() == 'utf-8': + encoding = 'utf-8-sig' # Decode unicode from given encoding. try: