From f7fdfe8b9ab1aec824c61ead321263d5c90fa599 Mon Sep 17 00:00:00 2001 From: Ryan Kelly Date: Thu, 17 Nov 2011 11:00:02 +1100 Subject: [PATCH 1/2] Correct handling of URLs with quoted slashes. --- requests/models.py | 6 +++--- requests/utils.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/requests/models.py b/requests/models.py index 97237e77..dd2dbc0a 100644 --- a/requests/models.py +++ b/requests/models.py @@ -27,7 +27,7 @@ from .exceptions import ( Timeout, URLRequired, TooManyRedirects, HTTPError, ConnectionError) from .utils import ( get_unicode_from_response, stream_decode_response_unicode, - decode_gzip, stream_decode_gzip, guess_filename) + decode_gzip, stream_decode_gzip, guess_filename, requote_path) REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved) @@ -214,7 +214,7 @@ class Request(object): # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') if not urlparse(url).netloc: - url = urljoin(r.url, urllib.quote(urllib.unquote(url))) + url = urljoin(r.url, requote_path(url)) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if r.status_code is codes.see_other: @@ -299,7 +299,7 @@ class Request(object): if isinstance(path, unicode): path = path.encode('utf-8') - path = urllib.quote(urllib.unquote(path)) + path = requote_path(path) url = str(urlunparse([ scheme, netloc, path, params, query, fragment ])) diff --git a/requests/utils.py b/requests/utils.py index 0249e9d6..a24327e9 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -16,6 +16,7 @@ import os import random import re import zlib +import urllib from urllib2 import parse_http_list as _parse_list_header @@ -367,3 +368,14 @@ def stream_decode_gzip(iterator): yield rv except zlib.error: pass + + +def requote_path(path): + """Re-quote the given URL path component. + + This function passes the given path through an unquote/quote cycle to + ensure that it is fully and consistenty quoted. + """ + parts = path.split("/") + parts = (urllib.quote(urllib.unquote(part), safe="") for part in parts) + return "/".join(parts) From 390024329756db774d5b044ebb124f58ef2ba503 Mon Sep 17 00:00:00 2001 From: Ryan Kelly Date: Fri, 18 Nov 2011 09:03:28 +1100 Subject: [PATCH 2/2] Dont use requote_path when dealing with redirects. This was breaking redirect locations with a query string, by quoting the question-mark. Since the path will be passed through requote_path on the next request anyway, doing it here seems unnecessary. --- requests/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requests/models.py b/requests/models.py index dd2dbc0a..8d33c32b 100644 --- a/requests/models.py +++ b/requests/models.py @@ -214,7 +214,7 @@ class Request(object): # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') if not urlparse(url).netloc: - url = urljoin(r.url, requote_path(url)) + url = urljoin(r.url, url) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if r.status_code is codes.see_other: