Merge branch 'py3-uri-encoding' of https://github.com/mgiuca/requests into develop

This commit is contained in:
Kenneth Reitz
2012-02-14 04:30:10 -05:00
3 changed files with 74 additions and 9 deletions
+1 -4
View File
@@ -324,7 +324,7 @@ class Request(object):
if isinstance(path, str):
path = path.encode('utf-8')
path = requote_path(path)
path = requote_path(path)
url = (urlunparse([ scheme, netloc, path, params, query, fragment ]))
@@ -352,9 +352,6 @@ class Request(object):
if not path:
path = '/'
if is_py3:
path = quote(path.encode('utf-8'))
url.append(path)
query = p.query
+27 -3
View File
@@ -396,6 +396,28 @@ def stream_decompress(iterator, mode='gzip'):
if rv:
yield rv
# The unreserved URI characters (RFC 3986)
UNRESERVED_SET = frozenset(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ "0123456789-._~")
def unquote_unreserved(uri):
"""Un-escape any percent-escape sequences in a URI that are unreserved
characters.
This leaves all reserved, illegal and non-ASCII bytes encoded.
"""
parts = uri.split('%')
for i in range(1, len(parts)):
h = parts[i][0:2]
if len(h) == 2:
c = chr(int(h, 16))
if c in UNRESERVED_SET:
parts[i] = c + parts[i][2:]
else:
parts[i] = '%' + parts[i]
else:
parts[i] = '%' + parts[i]
return ''.join(parts)
def requote_path(path):
"""Re-quote the given URL path component.
@@ -403,6 +425,8 @@ def requote_path(path):
This function passes the given path through an unquote/quote cycle to
ensure that it is fully and consistently quoted.
"""
parts = path.split(b"/")
parts = (quote(unquote(part), safe=b"") for part in parts)
return b"/".join(parts)
# Unquote only the unreserved characters
# Then quote only illegal characters (do not quote reserved, unreserved,
# or '%')
return quote(unquote_unreserved(path), safe="!#$%&'()*+,/:;=?@[]~")
return "/".join(parts)
+46 -2
View File
@@ -74,9 +74,9 @@ class RequestsTestSuite(TestSetup, unittest.TestCase):
def test_path_is_not_double_encoded(self):
request = requests.Request("http://0.0.0.0/get/~test")
request = requests.Request("http://0.0.0.0/get/test case")
assert request.path_url == "/get/%7Etest"
assert request.path_url == "/get/test%20case"
def test_HTTP_200_OK_GET(self):
r = get(httpbin('/get'))
@@ -117,6 +117,50 @@ class RequestsTestSuite(TestSetup, unittest.TestCase):
requests.get(url=httpbin('get'), headers=heads)
def test_session_with_escaped_url(self):
# Test a URL that contains percent-escaped characters
# This URL should not be modified (double-escaped)
# Tests:
# - Quoted illegal characters ("%20" (' '), "%3C" ('<'), "%3E" ('>'))
# - Quoted reserved characters ("%25" ('%'), "%23" ('#'), "%2F" ('/'))
# - Quoted non-ASCII characters ("%C3%98", "%C3%A5")
path_fully_escaped = '%3Ca%25b%23c%2Fd%3E/%C3%98%20%C3%A5'
url = httpbin('get/' + path_fully_escaped)
response = get(url)
self.assertEqual(response.url, httpbin('get/' + path_fully_escaped))
# Test that illegal characters in a path get properly percent-escaped
# Tests:
# - Bare illegal characters (space, '<')
# - Bare non-ASCII characters ('\u00d8')
path = u'<a%25b%23c%2Fd%3E/\u00d8 %C3%A5'
url = httpbin('get/' + path)
response = get(url)
self.assertEqual(response.url, httpbin('get/' + path_fully_escaped))
# Test that reserved characters in a path do not get percent-escaped
# Tests:
# - All reserved characters (RFC 3986), except '?', '#', '[' and ']',
# which are not allowed in the path, and ';' which delimits
# parameters.
# All such characters must be allowed bare in path, and must not be
# encoded.
# - Special unreserved characters (RFC 3986), which should not be
# encoded (even though it wouldn't hurt).
path_reserved = '!$&\'()*+,/:=@-._~'
url = httpbin('get/' + path_reserved)
response = get(url)
self.assertEqual(response.url, httpbin('get/' + path_reserved))
# Test that percent-encoded unreserved characters in a path get
# normalised to their un-encoded forms.
path_unreserved = 'ABCDwxyz1234-._~'
path_unreserved_escaped = '%41%42%43%44%77%78%79%7A%31%32%33%34%2D%2E%5F%7E'
url = httpbin('get/' + path_unreserved_escaped)
response = get(url)
self.assertEqual(response.url, httpbin('get/' + path_unreserved))
def test_user_agent_transfers(self):
"""Issue XX"""