Rewrite unquote_unreserved based on urllib.unquote

This is almost entirely taken from the unquote implementation in urllib, slightly modified for the case in hand. It now deals with invalid %encodings rather than exploding, and the code is somewhat simpler.
2026-06-05 22:50:18 +00:00 · 2012-04-04 09:34:39 +00:00
parent e28c1c9bfa
commit 06e4971fe7
2 changed files with 12 additions and 14 deletions
@@ -93,4 +93,5 @@ Patches and Suggestions
 - Jiri Machalek
 - Steve Pulec
 - Michael Kelly
- Michael Newman <newmaniese@gmail.com>
+- Michael Newman <newmaniese@gmail.com>
+- Jonty Wareing <jonty@jonty.co.uk>
@@ -441,28 +441,25 @@ def stream_untransfer(gen, resp):


 # The unreserved URI characters (RFC 3986)
-UNRESERVED_SET = frozenset(
+_unreserved_set = frozenset(
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    + "0123456789-._~")

+_unreserved_hextochr = dict((c.encode('hex'), c) for c in _unreserved_set)

 def unquote_unreserved(uri):
    """Un-escape any percent-escape sequences in a URI that are unreserved
    characters.
    This leaves all reserved, illegal and non-ASCII bytes encoded.
    """
-    parts = uri.split('%')
-    for i in range(1, len(parts)):
-        h = parts[i][0:2]
-        if len(h) == 2:
-            c = chr(int(h, 16))
-            if c in UNRESERVED_SET:
-                parts[i] = c + parts[i][2:]
-            else:
-                parts[i] = '%' + parts[i]
-        else:
-            parts[i] = '%' + parts[i]
-    return ''.join(parts)
+    res = uri.split('%')
+    s = res[0]
+    for item in res[1:]:
+        try:
+            s += _unreserved_hextochr[item[:2].lower()] + item[2:]
+        except (KeyError, UnicodeDecodeError):
+            s += '%' + item
+    return s


 def requote_uri(uri):