diff --git a/requests/sessions.py b/requests/sessions.py index dde462f0..dc993d7f 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -13,7 +13,7 @@ from collections import Mapping from datetime import datetime from .auth import _basic_auth_str -from .compat import cookielib, OrderedDict, urljoin, urlparse +from .compat import cookielib, OrderedDict, urljoin, urlparse, is_py3, str from .cookies import ( cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT @@ -132,6 +132,13 @@ class SessionRedirectMixin(object): parsed = urlparse(location_url) location_url = parsed.geturl() + # On Python 3, the location header was decoded using Latin 1, but + # urlparse in requote_uri will encode it with UTF-8 before quoting. + # Because of this insanity, we need to fix it up ourselves by + # sending the URL back to bytes ourselves. + if is_py3 and isinstance(location_url, str): + location_url = location_url.encode('latin1') + # Facilitate relative 'location' headers, as allowed by RFC 7231. # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. diff --git a/requests/utils.py b/requests/utils.py index c5c3fd01..5c18e184 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -26,7 +26,7 @@ from . import certs from .compat import parse_http_list as _parse_list_header from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, builtin_str, getproxies, proxy_bypass, urlunparse, - basestring) + basestring, is_py3) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict from .exceptions import InvalidURL, FileModeWarning @@ -422,7 +422,26 @@ def unquote_unreserved(uri): """Un-escape any percent-escape sequences in a URI that are unreserved characters. This leaves all reserved, illegal and non-ASCII bytes encoded. """ - parts = uri.split('%') + # This convert function is used to optionally convert the output of `chr`. + # In Python 3, `chr` returns a unicode string, while in Python 2 it returns + # a bytestring. Here we deal with that by optionally converting. + def convert(is_bytes, c): + if is_py2 and not is_bytes: + return c.decode('ascii') + elif is_py3 and is_bytes: + return c.encode('ascii') + else: + return c + + # Handle both bytestrings and unicode strings. + is_bytes = isinstance(uri, bytes) + splitchar = u'%' + base = u'' + if is_bytes: + splitchar = splitchar.encode('ascii') + base = base.encode('ascii') + + parts = uri.split(splitchar) for i in range(1, len(parts)): h = parts[i][0:2] if len(h) == 2 and h.isalnum(): @@ -432,12 +451,12 @@ def unquote_unreserved(uri): raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) if c in UNRESERVED_SET: - parts[i] = c + parts[i][2:] + parts[i] = convert(is_bytes, c) + parts[i][2:] else: - parts[i] = '%' + parts[i] + parts[i] = splitchar + parts[i] else: - parts[i] = '%' + parts[i] - return ''.join(parts) + parts[i] = splitchar + parts[i] + return base.join(parts) def requote_uri(uri): diff --git a/test_requests.py b/test_requests.py index 89c2a47e..8e97fbba 100755 --- a/test_requests.py +++ b/test_requests.py @@ -17,7 +17,7 @@ from requests.adapters import HTTPAdapter from requests.auth import HTTPDigestAuth, _basic_auth_str from requests.compat import ( Morsel, cookielib, getproxies, str, urljoin, urlparse, is_py3, - builtin_str, OrderedDict) + builtin_str, OrderedDict, is_py2) from requests.cookies import cookiejar_from_dict, morsel_to_cookie from requests.exceptions import ( ConnectionError, ConnectTimeout, InvalidScheme, InvalidURL, MissingScheme, @@ -1520,6 +1520,20 @@ class TestUtils: quoted = 'http://example.com/fiz?buz=%25ppicture' assert quoted == requote_uri(quoted) + def test_unquote_unreserved_handles_unicode(self): + """Unicode strings can be passed to unquote_unreserved""" + from requests.utils import unquote_unreserved + uri = u'http://example.com/fizz?buzz=%41%2C' + unquoted = u'http://example.com/fizz?buzz=A%2C' + assert unquoted == unquote_unreserved(uri) + + def test_unquote_unreserved_handles_bytes(self): + """Bytestrings can be passed to unquote_unreserved""" + from requests.utils import unquote_unreserved + uri = b'http://example.com/fizz?buzz=%41%2C' + unquoted = b'http://example.com/fizz?buzz=A%2C' + assert unquoted == unquote_unreserved(uri) + class TestMorselToCookieExpires: """Tests for morsel_to_cookie when morsel contains expires.""" @@ -1641,6 +1655,7 @@ class RedirectSession(SessionRedirectMixin): self.max_redirects = 30 self.cookies = {} self.trust_env = False + self.location = '/' def send(self, *args, **kwargs): self.calls.append(SendCall(args, kwargs)) @@ -1655,7 +1670,7 @@ class RedirectSession(SessionRedirectMixin): except IndexError: r.status_code = 200 - r.headers = CaseInsensitiveDict({'Location': '/'}) + r.headers = CaseInsensitiveDict({'Location': self.location}) r.raw = self._build_raw() r.request = request return r @@ -1689,6 +1704,18 @@ class TestRedirects: TestRedirects.default_keyword_args) assert session.calls[-1] == send_call + @pytest.mark.skipif(is_py2, reason="requires python 3") + def test_redirects_with_latin1_header(self, httpbin): + """Test that redirect headers decoded with Latin 1 are correctly + followed""" + session = RedirectSession([303]) + session.location = u'http://xn--n8jyd3c767qtje.xn--q9jyb4c/ã\x83\x96ã\x83\xadã\x82°/' + prep = requests.Request('GET', httpbin('get')).prepare() + r0 = session.send(prep) + + responses = list(session.resolve_redirects(r0, prep)) + assert len(responses) == 1 + assert responses[0].request.url == u'http://xn--n8jyd3c767qtje.xn--q9jyb4c/%E3%83%96%E3%83%AD%E3%82%B0/' @pytest.fixture def list_of_tuples():