Merge pull request #3791 from evgen231/master

Fixed detection of utf-32-be by BOM.
This commit is contained in:
Cory Benfield
2016-12-29 09:41:23 +00:00
committed by GitHub
2 changed files with 12 additions and 1 deletions
+1 -1
View File
@@ -714,7 +714,7 @@ def guess_json_utf(data):
# easy as counting the nulls and from their location and count
# determine the encoding. Also detect a BOM, if present.
sample = data[:4]
if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE):
if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
return 'utf-32' # BOM included
if sample[:3] == codecs.BOM_UTF8:
return 'utf-8-sig' # BOM included, MS style (discouraged)
+11
View File
@@ -274,6 +274,17 @@ class TestGuessJSONUTF:
def test_bad_utf_like_encoding(self):
assert guess_json_utf(b'\x00\x00\x00\x00') is None
@pytest.mark.parametrize(
('encoding', 'expected'), (
('utf-16-be', 'utf-16'),
('utf-16-le', 'utf-16'),
('utf-32-be', 'utf-32'),
('utf-32-le', 'utf-32')
))
def test_guess_by_bom(self, encoding, expected):
data = u'\ufeff{}'.encode(encoding)
assert guess_json_utf(data) == expected
USER = PASSWORD = "%!*'();:@&=+$,/?#[] "
ENCODED_USER = compat.quote(USER, '')