diff --git a/requests/models.py b/requests/models.py index 5235f36e..2ac8c33a 100644 --- a/requests/models.py +++ b/requests/models.py @@ -199,21 +199,7 @@ class Request(object): # Add the old request to the history collector. history.append(r) - # Redirect to... - url = r.headers['location'] - - # Handle redirection without scheme (see: RFC 1808 Section 4) - if url.startswith('//'): - parsed_rurl = urlparse(r.url) - url = '%s:%s' % (parsed_rurl.scheme, url) - - # Facilitate non-RFC2616-compliant 'location' headers - # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') - parsed_url = urlparse(url) - if not parsed_url.netloc: - parsed_url = list(parsed_url) - parsed_url[2] = urllib.quote(parsed_url[2], safe="%/:=&?~#+!$,;'@()*[]") - url = urljoin(r.url, str(urlunparse(parsed_url))) + url = cleanup_url(r.headers['location'], parent_url=self.url) # If 303, convert to idempotent GET. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 @@ -256,7 +242,6 @@ class Request(object): self.response.request = self - def send(self, anyway=False): """Sends the shit.""" diff --git a/requests/utils.py b/requests/utils.py index c422ac5c..0a114768 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -15,7 +15,7 @@ import cookielib import re import urllib import zlib -from urlparse import urlparse, urlunparse +from urlparse import urlparse, urlunparse, urljoin def encode_params(params): @@ -44,22 +44,36 @@ def encode_params(params): else: return params +def cleanup_url(url, parent_url=None): + # Handle redirection without scheme (see: RFC 1808 Section 4) + if url.startswith('//'): + parsed_rurl = urlparse(parent_url) + url = '%s:%s' % (parsed_rurl.scheme, url) + + scheme, netloc, path, params, query, fragment = urlparse(url) + if netloc: + netloc = netloc.encode('idna') + + if isinstance(path, unicode): + path = path.encode('utf-8') + + path = urllib.quote(path, safe="%/:=&?~#+!$,;'@()*[]") + params = urllib.quote(params, safe="%/:=&?~#+!$,;'@()*[]") + query = urllib.quote(query, safe="%/:=&?~#+!$,;'@()*[]") + + url = str(urlunparse([scheme, netloc, path, params, query, fragment])) + + # Facilitate non-RFC2616-compliant 'location' headers + # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') + if not netloc and parent_url: + url = urljoin(parent_url, url) + + return url def build_url(url, query_params): """Build the actual URL to use.""" - # Support for unicode domain names and paths. - scheme, netloc, path, params, query, fragment = urlparse(url) - netloc = netloc.encode('idna') - - if isinstance(path, unicode): - path = path.encode('utf-8') - - path = urllib.quote(path, safe="%/:=&?~#+!$,;'@()*[]") - - url = str(urlunparse( - [scheme, netloc, path, params, query, fragment] - )) + url = cleanup_url(url) query_params = encode_params(query_params) @@ -71,7 +85,6 @@ def build_url(url, query_params): else: return url - def header_expand(headers): """Returns an HTTP Header value string from a dictionary.