diff --git a/.gitignore b/.gitignore index 0f175468..dd9e006f 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,11 @@ requests.egg-info/ *.swp *.egg env/ +.venv/ +.eggs/ +.tox/ +.pytest_cache/ +.vscode/ .eggs/ .workon diff --git a/requests/models.py b/requests/models.py index ea332e7e..429c30f4 100644 --- a/requests/models.py +++ b/requests/models.py @@ -873,6 +873,9 @@ class Response(object): # Fallback to auto-detected encoding. if self.encoding is None: encoding = self.apparent_encoding + # Forcefully remove BOM from UTF-8 + elif self.encoding.lower() == 'utf-8': + encoding = 'utf-8-sig' # Decode unicode from given encoding. try: diff --git a/tests/test_testserver.py b/tests/test_testserver.py index aac52926..9d9def2f 100644 --- a/tests/test_testserver.py +++ b/tests/test_testserver.py @@ -56,6 +56,36 @@ class TestTestServer: assert r.text == u'roflol' assert r.headers['Content-Length'] == '6' + def test_text_bom_response(self): + """the text_response_server sends the given text with UTF-8 BOM""" + server = Server.text_response_server( + "HTTP/1.1 200 OK\r\n" + + "Content-Type: text/html; charset=UTF-8\r\n" + + u'\r\n\ufeffジェーピーニック' + ) + + with server as (host, port): + r = requests.get('http://{}:{}'.format(host, port)) + + assert r.status_code == 200 + assert r.text == u'ジェーピーニック' + assert r.headers['Content-Type'] == 'text/html; charset=UTF-8' + + def test_json_bom_response(self): + """the text_response_server sends the given JSON with UTF-8 BOM""" + server = Server.text_response_server( + "HTTP/1.1 200 OK\r\n" + + "Content-Type: application/json; charset=utf-8\r\n" + + u'\r\n\ufeff{"success": true}' + ) + + with server as (host, port): + r = requests.get('http://{}:{}'.format(host, port)) + + assert r.status_code == 200 + assert r.json() == {'success': True} + assert r.headers['Content-Type'] == 'application/json; charset=utf-8' + def test_basic_response(self): """the basic response server returns an empty http response""" with Server.basic_response_server() as (host, port): diff --git a/tests/test_utils.py b/tests/test_utils.py index 463516b2..ae517705 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -333,6 +333,7 @@ class TestGuessJSONUTF: @pytest.mark.parametrize( ('encoding', 'expected'), ( + ('utf-8-sig', 'utf-8-sig'), ('utf-16-be', 'utf-16'), ('utf-16-le', 'utf-16'), ('utf-32-be', 'utf-32'),