diff --git a/requests/models.py b/requests/models.py index b72ac64c..dcc51b09 100644 --- a/requests/models.py +++ b/requests/models.py @@ -324,7 +324,7 @@ class Request(object): if isinstance(path, str): path = path.encode('utf-8') - path = requote_path(path) + path = requote_path(path) url = (urlunparse([ scheme, netloc, path, params, query, fragment ])) @@ -352,9 +352,6 @@ class Request(object): if not path: path = '/' - if is_py3: - path = quote(path.encode('utf-8')) - url.append(path) query = p.query diff --git a/requests/utils.py b/requests/utils.py index 0f23a527..abbc752a 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -396,6 +396,28 @@ def stream_decompress(iterator, mode='gzip'): if rv: yield rv +# The unreserved URI characters (RFC 3986) +UNRESERVED_SET = frozenset( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + + "0123456789-._~") + +def unquote_unreserved(uri): + """Un-escape any percent-escape sequences in a URI that are unreserved + characters. + This leaves all reserved, illegal and non-ASCII bytes encoded. + """ + parts = uri.split('%') + for i in range(1, len(parts)): + h = parts[i][0:2] + if len(h) == 2: + c = chr(int(h, 16)) + if c in UNRESERVED_SET: + parts[i] = c + parts[i][2:] + else: + parts[i] = '%' + parts[i] + else: + parts[i] = '%' + parts[i] + return ''.join(parts) def requote_path(path): """Re-quote the given URL path component. @@ -403,6 +425,8 @@ def requote_path(path): This function passes the given path through an unquote/quote cycle to ensure that it is fully and consistently quoted. """ - parts = path.split(b"/") - parts = (quote(unquote(part), safe=b"") for part in parts) - return b"/".join(parts) + # Unquote only the unreserved characters + # Then quote only illegal characters (do not quote reserved, unreserved, + # or '%') + return quote(unquote_unreserved(path), safe="!#$%&'()*+,/:;=?@[]~") + return "/".join(parts) diff --git a/test_requests.py b/test_requests.py index eda9f791..2783a23b 100644 --- a/test_requests.py +++ b/test_requests.py @@ -74,9 +74,9 @@ class RequestsTestSuite(TestSetup, unittest.TestCase): def test_path_is_not_double_encoded(self): - request = requests.Request("http://0.0.0.0/get/~test") + request = requests.Request("http://0.0.0.0/get/test case") - assert request.path_url == "/get/%7Etest" + assert request.path_url == "/get/test%20case" def test_HTTP_200_OK_GET(self): r = get(httpbin('/get')) @@ -117,6 +117,50 @@ class RequestsTestSuite(TestSetup, unittest.TestCase): requests.get(url=httpbin('get'), headers=heads) + def test_session_with_escaped_url(self): + # Test a URL that contains percent-escaped characters + # This URL should not be modified (double-escaped) + # Tests: + # - Quoted illegal characters ("%20" (' '), "%3C" ('<'), "%3E" ('>')) + # - Quoted reserved characters ("%25" ('%'), "%23" ('#'), "%2F" ('/')) + # - Quoted non-ASCII characters ("%C3%98", "%C3%A5") + path_fully_escaped = '%3Ca%25b%23c%2Fd%3E/%C3%98%20%C3%A5' + url = httpbin('get/' + path_fully_escaped) + response = get(url) + self.assertEqual(response.url, httpbin('get/' + path_fully_escaped)) + + # Test that illegal characters in a path get properly percent-escaped + # Tests: + # - Bare illegal characters (space, '<') + # - Bare non-ASCII characters ('\u00d8') + path = u'