Merge pull request #2754 from Lukasa/python3-redirect-3.0.0

Fix unicode redirects on Python 3.
2026-06-05 22:50:18 +00:00 · 2016-04-15 16:39:18 -05:00
parent 41fc9ebff7 eab12fa029
commit 9fc765638f
3 changed files with 62 additions and 9 deletions
@@ -13,7 +13,7 @@ from collections import Mapping
 from datetime import datetime

 from .auth import _basic_auth_str
-from .compat import cookielib, OrderedDict, urljoin, urlparse
+from .compat import cookielib, OrderedDict, urljoin, urlparse, is_py3, str
 from .cookies import (
    cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies)
 from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT
@@ -132,6 +132,13 @@ class SessionRedirectMixin(object):
            parsed = urlparse(location_url)
            location_url = parsed.geturl()

+            # On Python 3, the location header was decoded using Latin 1, but
+            # urlparse in requote_uri will encode it with UTF-8 before quoting.
+            # Because of this insanity, we need to fix it up ourselves by
+            # sending the URL back to bytes ourselves.
+            if is_py3 and isinstance(location_url, str):
+                location_url = location_url.encode('latin1')
+
            # Facilitate relative 'location' headers, as allowed by RFC 7231.
            # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
            # Compliant with RFC3986, we percent encode the url.
@@ -26,7 +26,7 @@ from . import certs
 from .compat import parse_http_list as _parse_list_header
 from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2,
                     builtin_str, getproxies, proxy_bypass, urlunparse,
-                     basestring)
+                     basestring, is_py3)
 from .cookies import RequestsCookieJar, cookiejar_from_dict
 from .structures import CaseInsensitiveDict
 from .exceptions import InvalidURL, FileModeWarning
@@ -422,7 +422,26 @@ def unquote_unreserved(uri):
    """Un-escape any percent-escape sequences in a URI that are unreserved
    characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
    """
-    parts = uri.split('%')
+    # This convert function is used to optionally convert the output of `chr`.
+    # In Python 3, `chr` returns a unicode string, while in Python 2 it returns
+    # a bytestring. Here we deal with that by optionally converting.
+    def convert(is_bytes, c):
+        if is_py2 and not is_bytes:
+            return c.decode('ascii')
+        elif is_py3 and is_bytes:
+            return c.encode('ascii')
+        else:
+            return c
+
+    # Handle both bytestrings and unicode strings.
+    is_bytes = isinstance(uri, bytes)
+    splitchar = u'%'
+    base = u''
+    if is_bytes:
+        splitchar = splitchar.encode('ascii')
+        base = base.encode('ascii')
+
+    parts = uri.split(splitchar)
    for i in range(1, len(parts)):
        h = parts[i][0:2]
        if len(h) == 2 and h.isalnum():
@@ -432,12 +451,12 @@ def unquote_unreserved(uri):
                raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)

            if c in UNRESERVED_SET:
-                parts[i] = c + parts[i][2:]
+                parts[i] = convert(is_bytes, c) + parts[i][2:]
            else:
-                parts[i] = '%' + parts[i]
+                parts[i] = splitchar + parts[i]
        else:
-            parts[i] = '%' + parts[i]
-    return ''.join(parts)
+            parts[i] = splitchar + parts[i]
+    return base.join(parts)


 def requote_uri(uri):
@@ -17,7 +17,7 @@ from requests.adapters import HTTPAdapter
 from requests.auth import HTTPDigestAuth, _basic_auth_str
 from requests.compat import (
    Morsel, cookielib, getproxies, str, urljoin, urlparse, is_py3,
-    builtin_str, OrderedDict)
+    builtin_str, OrderedDict, is_py2)
 from requests.cookies import cookiejar_from_dict, morsel_to_cookie
 from requests.exceptions import (
    ConnectionError, ConnectTimeout, InvalidScheme, InvalidURL, MissingScheme,
@@ -1520,6 +1520,20 @@ class TestUtils:
        quoted = 'http://example.com/fiz?buz=%25ppicture'
        assert quoted == requote_uri(quoted)

+    def test_unquote_unreserved_handles_unicode(self):
+        """Unicode strings can be passed to unquote_unreserved"""
+        from requests.utils import unquote_unreserved
+        uri = u'http://example.com/fizz?buzz=%41%2C'
+        unquoted = u'http://example.com/fizz?buzz=A%2C'
+        assert unquoted == unquote_unreserved(uri)
+
+    def test_unquote_unreserved_handles_bytes(self):
+        """Bytestrings can be passed to unquote_unreserved"""
+        from requests.utils import unquote_unreserved
+        uri = b'http://example.com/fizz?buzz=%41%2C'
+        unquoted = b'http://example.com/fizz?buzz=A%2C'
+        assert unquoted == unquote_unreserved(uri)
+

 class TestMorselToCookieExpires:
    """Tests for morsel_to_cookie when morsel contains expires."""
@@ -1641,6 +1655,7 @@ class RedirectSession(SessionRedirectMixin):
        self.max_redirects = 30
        self.cookies = {}
        self.trust_env = False
+        self.location = '/'

    def send(self, *args, **kwargs):
        self.calls.append(SendCall(args, kwargs))
@@ -1655,7 +1670,7 @@ class RedirectSession(SessionRedirectMixin):
        except IndexError:
            r.status_code = 200

-        r.headers = CaseInsensitiveDict({'Location': '/'})
+        r.headers = CaseInsensitiveDict({'Location': self.location})
        r.raw = self._build_raw()
        r.request = request
        return r
@@ -1689,6 +1704,18 @@ class TestRedirects:
                                 TestRedirects.default_keyword_args)
            assert session.calls[-1] == send_call

+    @pytest.mark.skipif(is_py2, reason="requires python 3")
+    def test_redirects_with_latin1_header(self, httpbin):
+        """Test that redirect headers decoded with Latin 1 are correctly
+        followed"""
+        session = RedirectSession([303])
+        session.location = u'http://xn--n8jyd3c767qtje.xn--q9jyb4c/ã\x83\x96ã\x83\xadã\x82°/'
+        prep = requests.Request('GET', httpbin('get')).prepare()
+        r0 = session.send(prep)
+
+        responses = list(session.resolve_redirects(r0, prep))
+        assert len(responses) == 1
+        assert responses[0].request.url == u'http://xn--n8jyd3c767qtje.xn--q9jyb4c/%E3%83%96%E3%83%AD%E3%82%B0/'

@pytest.fixture
 def list_of_tuples():