diff --git a/requests/utils.py b/requests/utils.py index a773f109..f4f98c45 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -396,6 +396,28 @@ def stream_decompress(iterator, mode='gzip'): if rv: yield rv +# The unreserved URI characters (RFC 3986) +UNRESERVED_SET = frozenset( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + + "0123456789-._~") + +def unquote_unreserved(uri): + """Un-escape any percent-escape sequences in a URI that are unreserved + characters. + This leaves all reserved, illegal and non-ASCII bytes encoded. + """ + parts = uri.split('%') + for i in range(1, len(parts)): + h = parts[i][0:2] + if len(h) == 2: + c = chr(int(h, 16)) + if c in UNRESERVED_SET: + parts[i] = c + parts[i][2:] + else: + parts[i] = '%' + parts[i] + else: + parts[i] = '%' + parts[i] + return ''.join(parts) def requote_path(path): """Re-quote the given URL path component. @@ -404,5 +426,9 @@ def requote_path(path): ensure that it is fully and consistently quoted. """ parts = path.split("/") - parts = (quote(unquote(part), safe="") for part in parts) + # Unquote only the unreserved characters + # Then quote only illegal characters (do not quote reserved, unreserved, + # or '%') + parts = (quote(unquote_unreserved(part), safe="!#$%&'()*+,/:;=?@[]~") + for part in parts) return "/".join(parts)