Merge pull request #4976 from eduardomourar/feature/strip-utf8-bom

Fix for response with UTF-8 BOM
This commit is contained in:
2019-08-20 00:26:59 -04:00
committed by GitHub
4 changed files with 39 additions and 0 deletions
+5
View File
@@ -14,6 +14,11 @@ requests.egg-info/
*.swp
*.egg
env/
.venv/
.eggs/
.tox/
.pytest_cache/
.vscode/
.eggs/
.workon
+3
View File
@@ -873,6 +873,9 @@ class Response(object):
# Fallback to auto-detected encoding.
if self.encoding is None:
encoding = self.apparent_encoding
# Forcefully remove BOM from UTF-8
elif self.encoding.lower() == 'utf-8':
encoding = 'utf-8-sig'
# Decode unicode from given encoding.
try:
+30
View File
@@ -56,6 +56,36 @@ class TestTestServer:
assert r.text == u'roflol'
assert r.headers['Content-Length'] == '6'
def test_text_bom_response(self):
"""the text_response_server sends the given text with UTF-8 BOM"""
server = Server.text_response_server(
"HTTP/1.1 200 OK\r\n" +
"Content-Type: text/html; charset=UTF-8\r\n" +
u'\r\n\ufeff<doctype html><html><body>ジェーピーニック</body></html>'
)
with server as (host, port):
r = requests.get('http://{}:{}'.format(host, port))
assert r.status_code == 200
assert r.text == u'<doctype html><html><body>ジェーピーニック</body></html>'
assert r.headers['Content-Type'] == 'text/html; charset=UTF-8'
def test_json_bom_response(self):
"""the text_response_server sends the given JSON with UTF-8 BOM"""
server = Server.text_response_server(
"HTTP/1.1 200 OK\r\n" +
"Content-Type: application/json; charset=utf-8\r\n" +
u'\r\n\ufeff{"success": true}'
)
with server as (host, port):
r = requests.get('http://{}:{}'.format(host, port))
assert r.status_code == 200
assert r.json() == {'success': True}
assert r.headers['Content-Type'] == 'application/json; charset=utf-8'
def test_basic_response(self):
"""the basic response server returns an empty http response"""
with Server.basic_response_server() as (host, port):
+1
View File
@@ -333,6 +333,7 @@ class TestGuessJSONUTF:
@pytest.mark.parametrize(
('encoding', 'expected'), (
('utf-8-sig', 'utf-8-sig'),
('utf-16-be', 'utf-16'),
('utf-16-le', 'utf-16'),
('utf-32-be', 'utf-32'),