Merge pull request #3791 from evgen231/master

Fixed detection of utf-32-be by BOM.
2026-06-05 22:50:18 +00:00 · 2016-12-29 09:41:23 +00:00
parent 8e0a532cad 0cb6ddaecf
commit b9d282b6b7
2 changed files with 12 additions and 1 deletions
@@ -714,7 +714,7 @@ def guess_json_utf(data):
    # easy as counting the nulls and from their location and count
    # determine the encoding. Also detect a BOM, if present.
    sample = data[:4]
-    if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE):
+    if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
        return 'utf-32'     # BOM included
    if sample[:3] == codecs.BOM_UTF8:
        return 'utf-8-sig'  # BOM included, MS style (discouraged)
@@ -274,6 +274,17 @@ class TestGuessJSONUTF:
    def test_bad_utf_like_encoding(self):
        assert guess_json_utf(b'\x00\x00\x00\x00') is None

+    @pytest.mark.parametrize(
+        ('encoding', 'expected'), (
+            ('utf-16-be', 'utf-16'),
+            ('utf-16-le', 'utf-16'),
+            ('utf-32-be', 'utf-32'),
+            ('utf-32-le', 'utf-32')
+        ))
+    def test_guess_by_bom(self, encoding, expected):
+        data = u'\ufeff{}'.encode(encoding)
+        assert guess_json_utf(data) == expected
+

 USER = PASSWORD = "%!*'();:@&=+$,/?#[] "
 ENCODED_USER = compat.quote(USER, '')