requests3/tests/test_utils.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys
import unittest
import random

# Path hack.
sys.path.insert(0, os.path.abspath('..'))
import requests.utils
from requests.compat import is_py3, bytes


class GuessJSONUTFTests(unittest.TestCase):
    """Tests for the JSON UTF encoding guessing code."""

    codecs = (
        'utf-8', 'utf-8-sig',
        'utf-16', 'utf-16-le', 'utf-16-be',
        'utf-32', 'utf-32-le', 'utf-32-be'
    )

    def test_guess_encoding(self):
        # Throw 4-character ASCII strings (encoded to a UTF encoding)
        # at the guess routine; it should correctly guess all codecs.
        unichr = chr if is_py3 else __builtins__.unichr
        guess = requests.utils.guess_json_utf
        for c in range(33, 127):  # printable only
            sample = unichr(c) * 4
            for codec in self.codecs:
                res = guess(sample.encode(codec))
                self.assertEqual(res, codec)

    def test_smoke_encoding(self):
        # Throw random 4-byte strings at the guess function.
        # Any guess for a UTF encoding is verified, a decode exception
        # is a test failure.
        chr = (lambda c: bytes([c])) if is_py3 else __builtins__.chr
        guess = requests.utils.guess_json_utf
        for i in range(1000):
            sample = bytes().join(
                [chr(random.randrange(256)) for _ in range(4)])
            res = guess(sample)
            if res is not None and res != 'utf-8':
                # This should decode without errors if this is *really*
                # something in this encoding. Skip UTF-8, it is more
                # picky about valid data.
                sample.decode(res)


if __name__ == '__main__':
    unittest.main()