From 0b6dc091f50965250d9c421d8aadaa701186a1b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Mon, 8 Aug 2011 18:11:33 +0200 Subject: [PATCH 1/8] Added support for URLs with path not encoded. --- requests/models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/requests/models.py b/requests/models.py index a67691dd..bb15dc9b 100644 --- a/requests/models.py +++ b/requests/models.py @@ -185,7 +185,7 @@ class Request(object): # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') if not urlparse(url).netloc: parent_url_components = urlparse(self.url) - url = '%s://%s/%s' % (parent_url_components.scheme, parent_url_components.netloc, url) + url = '%s://%s/%s' % (parent_url_components.scheme, parent_url_components.netloc, urllib.quote(urllib.unquote(url))) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if r.status_code is 303: @@ -231,9 +231,10 @@ class Request(object): def _build_url(self): """Build the actual URL to use""" - # Support for unicode domain names. + # Support for unicode domain names and paths. parsed_url = list(urlparse(self.url)) parsed_url[1] = parsed_url[1].encode('idna') + parsed_url[2] = urllib.quote(urllib.unquote(parsed_url[2])) self.url = urlunparse(parsed_url) if self._enc_params: From aa0f78b740b6098f29594242b6b923113d0659b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Tue, 9 Aug 2011 15:14:08 +0200 Subject: [PATCH 2/8] Better support of utf-8 paths. --- requests/models.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/requests/models.py b/requests/models.py index 619d099f..51100290 100644 --- a/requests/models.py +++ b/requests/models.py @@ -247,10 +247,12 @@ class Request(object): """Build the actual URL to use""" # Support for unicode domain names and paths. - parsed_url = list(urlparse(self.url)) - parsed_url[1] = parsed_url[1].encode('idna') - parsed_url[2] = urllib.quote(urllib.unquote(parsed_url[2])) - self.url = urlunparse(parsed_url) + scheme, netloc, path, params, query, fragment = urlparse(self.url) + netloc = netloc.encode('idna') + if isinstance(path, unicode): + path = path.encode('utf-8') + path = urllib.quote(urllib.unquote(path)) + self.url = str(urlunparse([ scheme, netloc, path, params, query, fragment ])) if self._enc_params: if urlparse(self.url).query: From 32a861b2eddcfd73049f245984a12b88b364dadd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Tue, 9 Aug 2011 15:34:44 +0200 Subject: [PATCH 3/8] Fixed memory leak (see http://bugs.python.org/issue1208304) --- requests/models.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/requests/models.py b/requests/models.py index 51100290..6d97e35e 100644 --- a/requests/models.py +++ b/requests/models.py @@ -169,7 +169,8 @@ class Request(object): try: response.headers = CaseInsensitiveDict(getattr(resp.info(), 'dict', None)) response.read = resp.read - response.close = resp.close + response._resp = resp + response._close = resp.close except AttributeError: pass @@ -397,6 +398,11 @@ class Response(object): raise self.error + def close(self): + if self._resp.fp is not None and hasattr(self._resp.fp, '_sock'): + self._resp.fp._sock.recv = None + self._close() + class AuthManager(object): """Requests Authentication Manager.""" From 7b79cea7384a2dd3df90ac748bac3c4a89ebce24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Tue, 9 Aug 2011 15:41:59 +0200 Subject: [PATCH 4/8] Use urljoin instead of manual concatenation. --- requests/models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/requests/models.py b/requests/models.py index 51100290..57ee5dd8 100644 --- a/requests/models.py +++ b/requests/models.py @@ -12,7 +12,7 @@ import socket import zlib from urllib2 import HTTPError -from urlparse import urlparse, urlunparse +from urlparse import urlparse, urlunparse, urljoin from datetime import datetime from .config import settings @@ -198,8 +198,7 @@ class Request(object): # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') if not urlparse(url).netloc: - parent_url_components = urlparse(self.url) - url = '%s://%s/%s' % (parent_url_components.scheme, parent_url_components.netloc, urllib.quote(urllib.unquote(url))) + url = urljoin(r.url, urllib.quote(urllib.unquote(url))) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if r.status_code is 303: From e7e395549f8b60777f00c244dabd2bd9f89271b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Tue, 9 Aug 2011 17:28:58 +0200 Subject: [PATCH 5/8] Handle too many redirects. --- requests/exceptions.py | 3 +++ requests/models.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/requests/exceptions.py b/requests/exceptions.py index eff7512a..c08c6148 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -21,3 +21,6 @@ class URLRequired(RequestException): class InvalidMethod(RequestException): """An inappropriate method was attempted.""" + +class TooManyRedirects(RequestException): + """Too many redirects.""" diff --git a/requests/models.py b/requests/models.py index ca72802f..e08bce25 100644 --- a/requests/models.py +++ b/requests/models.py @@ -20,7 +20,7 @@ from .monkeys import Request as _Request, HTTPBasicAuthHandler, HTTPForcedBasicA from .structures import CaseInsensitiveDict from .packages.poster.encode import multipart_encode from .packages.poster.streaminghttp import register_openers, get_handlers -from .exceptions import RequestException, AuthenticationError, Timeout, URLRequired, InvalidMethod +from .exceptions import RequestException, AuthenticationError, Timeout, URLRequired, InvalidMethod, TooManyRedirects REDIRECT_STATI = (301, 302, 303, 307) @@ -192,6 +192,9 @@ class Request(object): (self.allow_redirects)) ): + if not len(history) < 30: + raise TooManyRedirects() + history.append(r) url = r.headers['location'] From cf94c96a68904b0c541425bb2eb0ee816d6a4e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Tue, 9 Aug 2011 17:30:19 +0200 Subject: [PATCH 6/8] Always close connection during redirections. --- requests/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requests/models.py b/requests/models.py index 6d97e35e..926bb94b 100644 --- a/requests/models.py +++ b/requests/models.py @@ -192,6 +192,8 @@ class Request(object): (self.allow_redirects)) ): + r.close() + history.append(r) url = r.headers['location'] From 571d808d25d3391664a69920d4e0a5cb1418af90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Wed, 17 Aug 2011 15:31:24 +0200 Subject: [PATCH 7/8] Fixed bad merge. --- requests/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requests/models.py b/requests/models.py index 0d7b67cb..7690f16a 100644 --- a/requests/models.py +++ b/requests/models.py @@ -230,7 +230,8 @@ class Request(object): # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') - url = urljoin(r.url, urllib.quote(urllib.unquote(url))) + if not urlparse(url).netloc: + url = urljoin(r.url, urllib.quote(urllib.unquote(url))) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if r.status_code is 303: From c22b71fc5b69736f40f8e491c5a6c4db461321f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Je=CC=81re=CC=81my=20Bethmont?= Date: Wed, 17 Aug 2011 16:04:53 +0200 Subject: [PATCH 8/8] Handle redirection without scheme. --- requests/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/requests/models.py b/requests/models.py index cc9cc6f0..931bafd8 100644 --- a/requests/models.py +++ b/requests/models.py @@ -225,6 +225,11 @@ class Request(object): url = r.headers['location'] + # Handle redirection without scheme (see: RFC 1808 Section 4) + if url.startswith('//'): + parsed_rurl = urlparse(r.url) + url = '%s:%s' % (parsed_rurl.scheme, url) + # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') if not urlparse(url).netloc: