From 62d29d510625aa6c5415f4ba35579059b1c842fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Mon, 26 Sep 2011 10:57:45 +0200 Subject: [PATCH 1/3] Fixed #174 and refactored urls quoting/concatenation in one function in utils.py. --- requests/models.py | 30 +++--------------------------- requests/utils.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/requests/models.py b/requests/models.py index f6d84ebd..18ae26ab 100644 --- a/requests/models.py +++ b/requests/models.py @@ -22,7 +22,7 @@ from .monkeys import Request as _Request, HTTPBasicAuthHandler, HTTPForcedBasicA from .structures import CaseInsensitiveDict from .packages.poster.encode import multipart_encode from .packages.poster.streaminghttp import register_openers, get_handlers -from .utils import dict_from_cookiejar, get_unicode_from_response, stream_decode_response_unicode, decode_gzip, stream_decode_gzip +from .utils import get_clean_url, dict_from_cookiejar, get_unicode_from_response, stream_decode_response_unicode, decode_gzip, stream_decode_gzip from .status_codes import codes from .exceptions import RequestException, AuthenticationError, Timeout, URLRequired, InvalidMethod, TooManyRedirects @@ -215,20 +215,7 @@ class Request(object): history.append(r) - url = r.headers['location'] - - # Handle redirection without scheme (see: RFC 1808 Section 4) - if url.startswith('//'): - parsed_rurl = urlparse(r.url) - url = '%s:%s' % (parsed_rurl.scheme, url) - - # Facilitate non-RFC2616-compliant 'location' headers - # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') - parsed_url = urlparse(url) - if not parsed_url.netloc: - parsed_url = list(parsed_url) - parsed_url[2] = urllib.quote(parsed_url[2], safe="%/:=&?~#+!$,;'@()*[]") - url = urljoin(r.url, str(urlunparse(parsed_url))) + url = get_clean_url(r.headers['location'], parent_url=r.url) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if r.status_code is codes.see_other: @@ -276,18 +263,7 @@ class Request(object): def _build_url(self): """Build the actual URL to use.""" - # Support for unicode domain names and paths. - scheme, netloc, path, params, query, fragment = urlparse(self.url) - netloc = netloc.encode('idna') - - if isinstance(path, unicode): - path = path.encode('utf-8') - - path = urllib.quote(path, safe="%/:=&?~#+!$,;'@()*[]") - - self.url = str(urlunparse( - [scheme, netloc, path, params, query, fragment] - )) + self.url = get_clean_url(self.url) if self._enc_params: if urlparse(self.url).query: diff --git a/requests/utils.py b/requests/utils.py index 75357ae7..e5036e44 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -13,8 +13,36 @@ import cgi import codecs import cookielib import re +import urllib import zlib +from urlparse import urlparse, urlunparse, urljoin + +def get_clean_url(url, parent_url=None): + # Handle redirection without scheme (see: RFC 1808 Section 4) + if url.startswith('//'): + parsed_rurl = urlparse(parent_url) + url = '%s:%s' % (parsed_rurl.scheme, url) + + scheme, netloc, path, params, query, fragment = urlparse(url) + if netloc: + netloc = netloc.encode('idna') + + if isinstance(path, unicode): + path = path.encode('utf-8') + + path = urllib.quote(path, safe="%/:=&?~#+!$,;'@()*[]") + params = urllib.quote(params, safe="%/:=&?~#+!$,;'@()*[]") + query = urllib.quote(query, safe="%/:=&?~#+!$,;'@()*[]") + + url = str(urlunparse([scheme, netloc, path, params, query, fragment])) + + # Facilitate non-RFC2616-compliant 'location' headers + # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') + if not netloc and parent_url: + url = urljoin(parent_url, url) + + return url def header_expand(headers): """Returns an HTTP Header value string from a dictionary. From ffde764a910e64398d01a8058a540853b63d60d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Mon, 26 Sep 2011 11:00:25 +0200 Subject: [PATCH 2/3] Removed unused imports. --- requests/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requests/models.py b/requests/models.py index 18ae26ab..43b437ee 100644 --- a/requests/models.py +++ b/requests/models.py @@ -14,7 +14,7 @@ import zlib from urllib2 import HTTPError -from urlparse import urlparse, urlunparse, urljoin +from urlparse import urlparse from datetime import datetime from .config import settings From 2361a6fc79f3af84550478cd43e0f9ab26e714f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Mon, 3 Oct 2011 11:16:43 +0200 Subject: [PATCH 3/3] Renamed get_clean_url -> cleanup_url. --- requests/models.py | 2 +- requests/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requests/models.py b/requests/models.py index a47a16b6..2ac8c33a 100644 --- a/requests/models.py +++ b/requests/models.py @@ -199,7 +199,7 @@ class Request(object): # Add the old request to the history collector. history.append(r) - url = get_clean_url(r.headers['location'], parent_url=self.url) + url = cleanup_url(r.headers['location'], parent_url=self.url) # If 303, convert to idempotent GET. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 diff --git a/requests/utils.py b/requests/utils.py index 1dcf3c2f..0a114768 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -44,7 +44,7 @@ def encode_params(params): else: return params -def get_clean_url(url, parent_url=None): +def cleanup_url(url, parent_url=None): # Handle redirection without scheme (see: RFC 1808 Section 4) if url.startswith('//'): parsed_rurl = urlparse(parent_url) @@ -73,7 +73,7 @@ def get_clean_url(url, parent_url=None): def build_url(url, query_params): """Build the actual URL to use.""" - url = get_clean_url(url) + url = cleanup_url(url) query_params = encode_params(query_params)