Rewrite unquote_unreserved based on urllib.unquote

This is almost entirely taken from the unquote implementation in urllib,
slightly modified for the case in hand. It now deals with invalid %encodings
rather than exploding, and the code is somewhat simpler.
This commit is contained in:
Jonty Wareing
2012-04-04 09:34:39 +00:00
parent e28c1c9bfa
commit 06e4971fe7
2 changed files with 12 additions and 14 deletions
+2 -1
View File
@@ -93,4 +93,5 @@ Patches and Suggestions
- Jiri Machalek
- Steve Pulec
- Michael Kelly
- Michael Newman <newmaniese@gmail.com>
- Michael Newman <newmaniese@gmail.com>
- Jonty Wareing <jonty@jonty.co.uk>
+10 -13
View File
@@ -441,28 +441,25 @@ def stream_untransfer(gen, resp):
# The unreserved URI characters (RFC 3986)
UNRESERVED_SET = frozenset(
_unreserved_set = frozenset(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ "0123456789-._~")
_unreserved_hextochr = dict((c.encode('hex'), c) for c in _unreserved_set)
def unquote_unreserved(uri):
"""Un-escape any percent-escape sequences in a URI that are unreserved
characters.
This leaves all reserved, illegal and non-ASCII bytes encoded.
"""
parts = uri.split('%')
for i in range(1, len(parts)):
h = parts[i][0:2]
if len(h) == 2:
c = chr(int(h, 16))
if c in UNRESERVED_SET:
parts[i] = c + parts[i][2:]
else:
parts[i] = '%' + parts[i]
else:
parts[i] = '%' + parts[i]
return ''.join(parts)
res = uri.split('%')
s = res[0]
for item in res[1:]:
try:
s += _unreserved_hextochr[item[:2].lower()] + item[2:]
except (KeyError, UnicodeDecodeError):
s += '%' + item
return s
def requote_uri(uri):