Merge pull request #2754 from Lukasa/python3-redirect-3.0.0

Fix unicode redirects on Python 3.
This commit is contained in:
Ian Cordasco
2016-04-15 16:39:18 -05:00
3 changed files with 62 additions and 9 deletions
+8 -1
View File
@@ -13,7 +13,7 @@ from collections import Mapping
from datetime import datetime
from .auth import _basic_auth_str
from .compat import cookielib, OrderedDict, urljoin, urlparse
from .compat import cookielib, OrderedDict, urljoin, urlparse, is_py3, str
from .cookies import (
cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies)
from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT
@@ -132,6 +132,13 @@ class SessionRedirectMixin(object):
parsed = urlparse(location_url)
location_url = parsed.geturl()
# On Python 3, the location header was decoded using Latin 1, but
# urlparse in requote_uri will encode it with UTF-8 before quoting.
# Because of this insanity, we need to fix it up ourselves by
# sending the URL back to bytes ourselves.
if is_py3 and isinstance(location_url, str):
location_url = location_url.encode('latin1')
# Facilitate relative 'location' headers, as allowed by RFC 7231.
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
# Compliant with RFC3986, we percent encode the url.
+25 -6
View File
@@ -26,7 +26,7 @@ from . import certs
from .compat import parse_http_list as _parse_list_header
from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2,
builtin_str, getproxies, proxy_bypass, urlunparse,
basestring)
basestring, is_py3)
from .cookies import RequestsCookieJar, cookiejar_from_dict
from .structures import CaseInsensitiveDict
from .exceptions import InvalidURL, FileModeWarning
@@ -422,7 +422,26 @@ def unquote_unreserved(uri):
"""Un-escape any percent-escape sequences in a URI that are unreserved
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
"""
parts = uri.split('%')
# This convert function is used to optionally convert the output of `chr`.
# In Python 3, `chr` returns a unicode string, while in Python 2 it returns
# a bytestring. Here we deal with that by optionally converting.
def convert(is_bytes, c):
if is_py2 and not is_bytes:
return c.decode('ascii')
elif is_py3 and is_bytes:
return c.encode('ascii')
else:
return c
# Handle both bytestrings and unicode strings.
is_bytes = isinstance(uri, bytes)
splitchar = u'%'
base = u''
if is_bytes:
splitchar = splitchar.encode('ascii')
base = base.encode('ascii')
parts = uri.split(splitchar)
for i in range(1, len(parts)):
h = parts[i][0:2]
if len(h) == 2 and h.isalnum():
@@ -432,12 +451,12 @@ def unquote_unreserved(uri):
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
if c in UNRESERVED_SET:
parts[i] = c + parts[i][2:]
parts[i] = convert(is_bytes, c) + parts[i][2:]
else:
parts[i] = '%' + parts[i]
parts[i] = splitchar + parts[i]
else:
parts[i] = '%' + parts[i]
return ''.join(parts)
parts[i] = splitchar + parts[i]
return base.join(parts)
def requote_uri(uri):
+29 -2
View File
@@ -17,7 +17,7 @@ from requests.adapters import HTTPAdapter
from requests.auth import HTTPDigestAuth, _basic_auth_str
from requests.compat import (
Morsel, cookielib, getproxies, str, urljoin, urlparse, is_py3,
builtin_str, OrderedDict)
builtin_str, OrderedDict, is_py2)
from requests.cookies import cookiejar_from_dict, morsel_to_cookie
from requests.exceptions import (
ConnectionError, ConnectTimeout, InvalidScheme, InvalidURL, MissingScheme,
@@ -1520,6 +1520,20 @@ class TestUtils:
quoted = 'http://example.com/fiz?buz=%25ppicture'
assert quoted == requote_uri(quoted)
def test_unquote_unreserved_handles_unicode(self):
"""Unicode strings can be passed to unquote_unreserved"""
from requests.utils import unquote_unreserved
uri = u'http://example.com/fizz?buzz=%41%2C'
unquoted = u'http://example.com/fizz?buzz=A%2C'
assert unquoted == unquote_unreserved(uri)
def test_unquote_unreserved_handles_bytes(self):
"""Bytestrings can be passed to unquote_unreserved"""
from requests.utils import unquote_unreserved
uri = b'http://example.com/fizz?buzz=%41%2C'
unquoted = b'http://example.com/fizz?buzz=A%2C'
assert unquoted == unquote_unreserved(uri)
class TestMorselToCookieExpires:
"""Tests for morsel_to_cookie when morsel contains expires."""
@@ -1641,6 +1655,7 @@ class RedirectSession(SessionRedirectMixin):
self.max_redirects = 30
self.cookies = {}
self.trust_env = False
self.location = '/'
def send(self, *args, **kwargs):
self.calls.append(SendCall(args, kwargs))
@@ -1655,7 +1670,7 @@ class RedirectSession(SessionRedirectMixin):
except IndexError:
r.status_code = 200
r.headers = CaseInsensitiveDict({'Location': '/'})
r.headers = CaseInsensitiveDict({'Location': self.location})
r.raw = self._build_raw()
r.request = request
return r
@@ -1689,6 +1704,18 @@ class TestRedirects:
TestRedirects.default_keyword_args)
assert session.calls[-1] == send_call
@pytest.mark.skipif(is_py2, reason="requires python 3")
def test_redirects_with_latin1_header(self, httpbin):
"""Test that redirect headers decoded with Latin 1 are correctly
followed"""
session = RedirectSession([303])
session.location = u'http://xn--n8jyd3c767qtje.xn--q9jyb4c/ã\x83\x96ã\x83\xadã\x82°/'
prep = requests.Request('GET', httpbin('get')).prepare()
r0 = session.send(prep)
responses = list(session.resolve_redirects(r0, prep))
assert len(responses) == 1
assert responses[0].request.url == u'http://xn--n8jyd3c767qtje.xn--q9jyb4c/%E3%83%96%E3%83%AD%E3%82%B0/'
@pytest.fixture
def list_of_tuples():