From f6707042d8b18d2a3737380bf58ff020872fb07b Mon Sep 17 00:00:00 2001 From: Bruce Adams Date: Mon, 27 Nov 2023 17:27:43 -0500 Subject: [PATCH] Unit test for string containing multi-byte UTF-8 There are two tests here. One demonstrating existing, correct behavior for `data=bytes`, and another, failing, test for the case where `data=string` and the string contains multi-byte UTF-8. --- tests/test_requests.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_requests.py b/tests/test_requests.py index a71fe7d6..b6fb84d1 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -1808,6 +1808,23 @@ class TestRequests: assert p.headers["Content-Length"] == length + def test_content_length_for_bytes_data(self, httpbin): + data = "This is a string containing multi-byte UTF-8 ☃️" + encoded_data = data.encode("utf-8") + length = str(len(encoded_data)) + req = requests.Request("POST", httpbin("post"), data=encoded_data) + p = req.prepare() + + assert p.headers["Content-Length"] == length + + def test_content_length_for_string_data_counts_bytes(self, httpbin): + data = "This is a string containing multi-byte UTF-8 ☃️" + length = str(len(data.encode("utf-8"))) + req = requests.Request("POST", httpbin("post"), data=data) + p = req.prepare() + + assert p.headers["Content-Length"] == length + def test_nonhttp_schemes_dont_check_URLs(self): test_urls = ( "data:image/gif;base64,R0lGODlhAQABAHAAACH5BAUAAAAALAAAAAABAAEAAAICRAEAOw==",