From 3ae0ff36853d14b3bb23f1344df4889f0f44ed4c Mon Sep 17 00:00:00 2001 From: Matt Giuca Date: Wed, 15 Feb 2012 11:47:02 +1100 Subject: [PATCH 1/4] Added test cases for URI cleaning in the query part (Issue #429). --- test_requests.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test_requests.py b/test_requests.py index 2783a23b..0b5edcce 100644 --- a/test_requests.py +++ b/test_requests.py @@ -160,6 +160,29 @@ class RequestsTestSuite(TestSetup, unittest.TestCase): response = get(url) self.assertEqual(response.url, httpbin('get/' + path_unreserved)) + # Re-run all of the same tests on the query part of the URI + query_fully_escaped = '%3Ca%25b%23c%2Fd%3E=%C3%98%20%C3%A5' + url = httpbin('get/?' + query_fully_escaped) + response = get(url) + self.assertEqual(response.url, httpbin('get/?' + query_fully_escaped)) + + query = u' Date: Wed, 15 Feb 2012 12:00:47 +1100 Subject: [PATCH 2/4] Now requotes the entire URL, not just the path (Issue #429). --- requests/models.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/requests/models.py b/requests/models.py index dcc51b09..6b8743be 100644 --- a/requests/models.py +++ b/requests/models.py @@ -321,20 +321,30 @@ class Request(object): path = '/' if is_py2: + if isinstance(scheme, str): + scheme = scheme.encode('utf-8') + if isinstance(netloc, str): + netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') - - path = requote_path(path) + if isinstance(params, str): + params = params.encode('utf-8') + if isinstance(query, str): + query = query.encode('utf-8') + if isinstance(fragment, str): + fragment = fragment.encode('utf-8') url = (urlunparse([ scheme, netloc, path, params, query, fragment ])) if self._enc_params: if urlparse(url).query: - return '%s&%s' % (url, self._enc_params) + url = '%s&%s' % (url, self._enc_params) else: - return '%s?%s' % (url, self._enc_params) - else: - return url + url = '%s?%s' % (url, self._enc_params) + + url = requote_path(url) + + return url @property def path_url(self): From b8298b071032e90dd9de3af0a96cbb9ec9cb2e13 Mon Sep 17 00:00:00 2001 From: Matt Giuca Date: Wed, 15 Feb 2012 12:03:08 +1100 Subject: [PATCH 3/4] Rename utils.requote_path to requote_uri. It is now applied to a whole URI, not just the path. --- requests/models.py | 4 ++-- requests/utils.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/requests/models.py b/requests/models.py index 6b8743be..ae64f29d 100644 --- a/requests/models.py +++ b/requests/models.py @@ -26,7 +26,7 @@ from .exceptions import ( URLRequired, SSLError) from .utils import ( get_encoding_from_headers, stream_decode_response_unicode, - stream_decompress, guess_filename, requote_path, dict_from_string) + stream_decompress, guess_filename, requote_uri, dict_from_string) from .compat import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, str, bytes, SimpleCookie, is_py3, is_py2 @@ -342,7 +342,7 @@ class Request(object): else: url = '%s?%s' % (url, self._enc_params) - url = requote_path(url) + url = requote_uri(url) return url diff --git a/requests/utils.py b/requests/utils.py index abbc752a..806d2d54 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -419,14 +419,14 @@ def unquote_unreserved(uri): parts[i] = '%' + parts[i] return ''.join(parts) -def requote_path(path): - """Re-quote the given URL path component. +def requote_uri(uri): + """Re-quote the given URI. - This function passes the given path through an unquote/quote cycle to + This function passes the given URI through an unquote/quote cycle to ensure that it is fully and consistently quoted. """ # Unquote only the unreserved characters # Then quote only illegal characters (do not quote reserved, unreserved, # or '%') - return quote(unquote_unreserved(path), safe="!#$%&'()*+,/:;=?@[]~") + return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") return "/".join(parts) From 690426ac86bb3423fcd6e57a1443c6d4ca3a8952 Mon Sep 17 00:00:00 2001 From: Matt Giuca Date: Wed, 15 Feb 2012 12:06:34 +1100 Subject: [PATCH 4/4] Added Matt Giuca to AUTHORS. --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 8cf76e9d..363a74c7 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -81,3 +81,4 @@ Patches and Suggestions - Brendon Crawford - Denis (Telofy) - Cory Benfield (Lukasa) +- Matt Giuca