Merge pull request #3620 from Lukasa/better-idna

Better support for internationalized domain names.
2026-06-05 22:50:18 +00:00 · 2016-10-21 13:09:04 +01:00
parent d14401f799 e7f5bd634d
commit 50486cf2be
15 changed files with 9580 additions and 4 deletions
@@ -3,6 +3,15 @@
 Release History
 ---------------

+dev
+++
+
+**Improvements**
+
+- Updated support for internationalized domain names from IDNA2003 to IDNA2008.
+  This updated support is required for several forms of IDNs and is mandatory
+  for .de domains.
+
 2.11.1 (2016-08-17)
 +++++++++++++++++++

@@ -17,7 +17,7 @@ ci: init
 certs:
 	curl http://ci.kennethreitz.org/job/ca-bundle/lastSuccessfulBuild/artifact/cacerts.pem -o requests/cacert.pem

-deps: urllib3 chardet
+deps: urllib3 chardet idna

 urllib3:
 	git clone https://github.com/shazow/urllib3.git && \
@@ -37,6 +37,16 @@ chardet:
 	    mv chardet/chardet requests/packages/ && \
 	    rm -fr chardet

+idna:
+	git clone https://github.com/kjd/idna.git && \
+	    rm -fr requests/packages/idna && \
+	    cd idna && \
+	    git checkout `git describe --abbrev=0 --tags` && \
+	    cd .. && \
+	    mv idna/idna requests/packages/ && \
+	    find requests/packages/idna -type f -exec sed -i "" "s/^from idna/from /" {} \; && \
+	    rm -fr idna
+
 publish:
 	python setup.py register
 	python setup.py sdist upload
@@ -52,3 +52,86 @@ CA Bundle License
 This Source Code Form is subject to the terms of the Mozilla Public
 License, v. 2.0. If a copy of the MPL was not distributed with this
 file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+Idna License
+============
+
+Copyright (c) 2013-2016, Kim Davies. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+#. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+#. Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided with
+   the distribution.
+
+#. Neither the name of the copyright holder nor the names of the 
+   contributors may be used to endorse or promote products derived 
+   from this software without specific prior written permission.
+
+#. THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS "AS IS" AND ANY
+   EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+   PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR 
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+   USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+   DAMAGE.
+
+Portions of the codec implementation and unit tests are derived from the
+Python standard library, which carries the `Python Software Foundation
+License <https://docs.python.org/2/license.html>`_:
+
+   Copyright (c) 2001-2014 Python Software Foundation; All Rights Reserved
+
+Portions of the unit tests are derived from the Unicode standard, which 
+is subject to the Unicode, Inc. License Agreement:
+
+   Copyright (c) 1991-2014 Unicode, Inc. All rights reserved.
+   Distributed under the Terms of Use in 
+   <http://www.unicode.org/copyright.html>.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of the Unicode data files and any associated documentation
+   (the "Data Files") or Unicode software and any associated documentation
+   (the "Software") to deal in the Data Files or Software
+   without restriction, including without limitation the rights to use,
+   copy, modify, merge, publish, distribute, and/or sell copies of
+   the Data Files or Software, and to permit persons to whom the Data Files
+   or Software are furnished to do so, provided that
+   
+   (a) this copyright and permission notice appear with all copies 
+   of the Data Files or Software,
+
+   (b) this copyright and permission notice appear in associated 
+   documentation, and
+
+   (c) there is clear notice in each modified Data File or in the Software
+   as well as in the documentation associated with the Data File(s) or
+   Software that the data or software has been modified.
+
+   THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+   ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+   WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+   NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+   DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+   DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+   TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+   PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+   Except as contained in this notice, the name of a copyright holder
+   shall not be used in advertising or otherwise to promote the sale,
+   use or other dealings in these Data Files or Software without prior
+   written authorization of the copyright holder.
+
@@ -21,6 +21,7 @@ from .structures import CaseInsensitiveDict

 from .auth import HTTPBasicAuth
 from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar
+from .packages import idna
 from .packages.urllib3.fields import RequestField
 from .packages.urllib3.filepost import encode_multipart_formdata
 from .packages.urllib3.util import parse_url
@@ -363,8 +364,8 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):

        # Only want to apply IDNA to the hostname
        try:
-            host = host.encode('idna').decode('utf-8')
-        except UnicodeError:
+            host = idna.encode(host, uts46=True).decode('utf-8')
+        except (UnicodeError, idna.IDNAError):
            raise InvalidURL('URL has an invalid label.')

        # Carefully reconstruct the network location
@@ -1,4 +1,4 @@
-If you are planning to submit a pull request to requests with any changes in 
+If you are planning to submit a pull request to requests with any changes in
 this library do not go any further. These are independent libraries which we
 vendor into requests. Any changes necessary to these libraries must be made in
 them and submitted as separate pull requests to those libraries.
@@ -7,5 +7,7 @@ urllib3 pull requests go here: https://github.com/shazow/urllib3

 chardet pull requests go here: https://github.com/chardet/chardet

+idna pull requests go here: https://github.com/kjd/idna
+
 See https://github.com/kennethreitz/requests/pull/1812#issuecomment-30854316
 for the reasoning behind this.
@@ -34,3 +34,9 @@ try:
 except ImportError:
    import chardet
    sys.modules['%s.chardet' % __name__] = chardet
+
+try:
+    from . import idna
+except ImportError:
+    import idna
+    sys.modules['%s.idna' % __name__] = idna
@@ -0,0 +1 @@
+from .core import *
@@ -0,0 +1,118 @@
+from .core import encode, decode, alabel, ulabel, IDNAError
+import codecs
+import re
+
+_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
+
+class Codec(codecs.Codec):
+
+    def encode(self, data, errors='strict'):
+
+        if errors != 'strict':
+            raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
+
+        if not data:
+            return "", 0
+
+        return encode(data), len(data)
+
+    def decode(self, data, errors='strict'):
+
+        if errors != 'strict':
+            raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
+
+        if not data:
+            return u"", 0
+
+        return decode(data), len(data)
+
+class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
+    def _buffer_encode(self, data, errors, final):
+        if errors != 'strict':
+            raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
+
+        if not data:
+            return ("", 0)
+
+        labels = _unicode_dots_re.split(data)
+        trailing_dot = u''
+        if labels:
+            if not labels[-1]:
+                trailing_dot = '.'
+                del labels[-1]
+            elif not final:
+                # Keep potentially unfinished label until the next call
+                del labels[-1]
+                if labels:
+                    trailing_dot = '.'
+
+        result = []
+        size = 0
+        for label in labels:
+            result.append(alabel(label))
+            if size:
+                size += 1
+            size += len(label)
+
+        # Join with U+002E
+        result = ".".join(result) + trailing_dot
+        size += len(trailing_dot)
+        return (result, size)
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+    def _buffer_decode(self, data, errors, final):
+        if errors != 'strict':
+            raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
+
+        if not data:
+            return (u"", 0)
+
+        # IDNA allows decoding to operate on Unicode strings, too.
+        if isinstance(data, unicode):
+            labels = _unicode_dots_re.split(data)
+        else:
+            # Must be ASCII string
+            data = str(data)
+            unicode(data, "ascii")
+            labels = data.split(".")
+
+        trailing_dot = u''
+        if labels:
+            if not labels[-1]:
+                trailing_dot = u'.'
+                del labels[-1]
+            elif not final:
+                # Keep potentially unfinished label until the next call
+                del labels[-1]
+                if labels:
+                    trailing_dot = u'.'
+
+        result = []
+        size = 0
+        for label in labels:
+            result.append(ulabel(label))
+            if size:
+                size += 1
+            size += len(label)
+
+        result = u".".join(result) + trailing_dot
+        size += len(trailing_dot)
+        return (result, size)
+
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    pass
+
+class StreamReader(Codec, codecs.StreamReader):
+    pass
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='idna',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
@@ -0,0 +1,12 @@
+from .core import *
+from .codec import *
+
+def ToASCII(label):
+    return encode(label)
+
+def ToUnicode(label):
+    return decode(label)
+
+def nameprep(s):
+    raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")
+
@@ -0,0 +1,387 @@
+from . import idnadata
+import bisect
+import unicodedata
+import re
+import sys
+from .intranges import intranges_contain
+
+_virama_combining_class = 9
+_alabel_prefix = b'xn--'
+_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
+
+if sys.version_info[0] == 3:
+    unicode = str
+    unichr = chr
+
+class IDNAError(UnicodeError):
+    """ Base exception for all IDNA-encoding related problems """
+    pass
+
+
+class IDNABidiError(IDNAError):
+    """ Exception when bidirectional requirements are not satisfied """
+    pass
+
+
+class InvalidCodepoint(IDNAError):
+    """ Exception when a disallowed or unallocated codepoint is used """
+    pass
+
+
+class InvalidCodepointContext(IDNAError):
+    """ Exception when the codepoint is not valid in the context it is used """
+    pass
+
+
+def _combining_class(cp):
+    return unicodedata.combining(unichr(cp))
+
+def _is_script(cp, script):
+    return intranges_contain(ord(cp), idnadata.scripts[script])
+
+def _punycode(s):
+    return s.encode('punycode')
+
+def _unot(s):
+    return 'U+{0:04X}'.format(s)
+
+
+def valid_label_length(label):
+
+    if len(label) > 63:
+        return False
+    return True
+
+
+def valid_string_length(label, trailing_dot):
+
+    if len(label) > (254 if trailing_dot else 253):
+        return False
+    return True
+
+
+def check_bidi(label, check_ltr=False):
+
+    # Bidi rules should only be applied if string contains RTL characters
+    bidi_label = False
+    for (idx, cp) in enumerate(label, 1):
+        direction = unicodedata.bidirectional(cp)
+        if direction == '':
+            # String likely comes from a newer version of Unicode
+            raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx))
+        if direction in ['R', 'AL', 'AN']:
+            bidi_label = True
+            break
+    if not bidi_label and not check_ltr:
+        return True
+
+    # Bidi rule 1
+    direction = unicodedata.bidirectional(label[0])
+    if direction in ['R', 'AL']:
+        rtl = True
+    elif direction == 'L':
+        rtl = False
+    else:
+        raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label)))
+
+    valid_ending = False
+    number_type = False
+    for (idx, cp) in enumerate(label, 1):
+        direction = unicodedata.bidirectional(cp)
+
+        if rtl:
+            # Bidi rule 2
+            if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
+                raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx))
+            # Bidi rule 3
+            if direction in ['R', 'AL', 'EN', 'AN']:
+                valid_ending = True
+            elif direction != 'NSM':
+                valid_ending = False
+            # Bidi rule 4
+            if direction in ['AN', 'EN']:
+                if not number_type:
+                    number_type = direction
+                else:
+                    if number_type != direction:
+                        raise IDNABidiError('Can not mix numeral types in a right-to-left label')
+        else:
+            # Bidi rule 5
+            if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
+                raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx))
+            # Bidi rule 6
+            if direction in ['L', 'EN']:
+                valid_ending = True
+            elif direction != 'NSM':
+                valid_ending = False
+
+    if not valid_ending:
+        raise IDNABidiError('Label ends with illegal codepoint directionality')
+
+    return True
+
+
+def check_initial_combiner(label):
+
+    if unicodedata.category(label[0])[0] == 'M':
+        raise IDNAError('Label begins with an illegal combining character')
+    return True
+
+
+def check_hyphen_ok(label):
+
+    if label[2:4] == '--':
+        raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
+    if label[0] == '-' or label[-1] == '-':
+        raise IDNAError('Label must not start or end with a hyphen')
+    return True
+
+
+def check_nfc(label):
+
+    if unicodedata.normalize('NFC', label) != label:
+        raise IDNAError('Label must be in Normalization Form C')
+
+
+def valid_contextj(label, pos):
+
+    cp_value = ord(label[pos])
+
+    if cp_value == 0x200c:
+
+        if pos > 0:
+            if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
+                return True
+
+        ok = False
+        for i in range(pos-1, -1, -1):
+            joining_type = idnadata.joining_types.get(ord(label[i]))
+            if joining_type == 'T':
+                continue
+            if joining_type in ['L', 'D']:
+                ok = True
+                break
+
+        if not ok:
+            return False
+
+        ok = False
+        for i in range(pos+1, len(label)):
+            joining_type = idnadata.joining_types.get(ord(label[i]))
+            if joining_type == 'T':
+                continue
+            if joining_type in ['R', 'D']:
+                ok = True
+                break
+        return ok
+
+    if cp_value == 0x200d:
+
+        if pos > 0:
+            if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
+                return True
+        return False
+
+    else:
+
+        return False
+
+
+def valid_contexto(label, pos, exception=False):
+
+    cp_value = ord(label[pos])
+
+    if cp_value == 0x00b7:
+        if 0 < pos < len(label)-1:
+            if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
+                return True
+        return False
+
+    elif cp_value == 0x0375:
+        if pos < len(label)-1 and len(label) > 1:
+            return _is_script(label[pos + 1], 'Greek')
+        return False
+
+    elif cp_value == 0x05f3 or cp_value == 0x05f4:
+        if pos > 0:
+            return _is_script(label[pos - 1], 'Hebrew')
+        return False
+
+    elif cp_value == 0x30fb:
+        for cp in label:
+            if cp == u'\u30fb':
+                continue
+            if not _is_script(cp, 'Hiragana') and not _is_script(cp, 'Katakana') and not _is_script(cp, 'Han'):
+                return False
+        return True
+
+    elif 0x660 <= cp_value <= 0x669:
+        for cp in label:
+            if 0x6f0 <= ord(cp) <= 0x06f9:
+                return False
+        return True
+
+    elif 0x6f0 <= cp_value <= 0x6f9:
+        for cp in label:
+            if 0x660 <= ord(cp) <= 0x0669:
+                return False
+        return True
+
+
+def check_label(label):
+
+    if isinstance(label, (bytes, bytearray)):
+        label = label.decode('utf-8')
+    if len(label) == 0:
+        raise IDNAError('Empty Label')
+
+    check_nfc(label)
+    check_hyphen_ok(label)
+    check_initial_combiner(label)
+
+    for (pos, cp) in enumerate(label):
+        cp_value = ord(cp)
+        if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):
+            continue
+        elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']):
+            if not valid_contextj(label, pos):
+                raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
+        elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']):
+            if not valid_contexto(label, pos):
+                raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
+        else:
+            raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
+
+    check_bidi(label)
+
+
+def alabel(label):
+
+    try:
+        label = label.encode('ascii')
+        try:
+            ulabel(label)
+        except:
+            raise IDNAError('The label {0} is not a valid A-label'.format(label))
+        if not valid_label_length(label):
+            raise IDNAError('Label too long')
+        return label
+    except UnicodeError:
+        pass
+
+    if not label:
+        raise IDNAError('No Input')
+
+    label = unicode(label)
+    check_label(label)
+    label = _punycode(label)
+    label = _alabel_prefix + label
+
+    if not valid_label_length(label):
+        raise IDNAError('Label too long')
+
+    return label
+
+
+def ulabel(label):
+
+    if not isinstance(label, (bytes, bytearray)):
+        try:
+            label = label.encode('ascii')
+        except UnicodeError:
+            check_label(label)
+            return label
+
+    label = label.lower()
+    if label.startswith(_alabel_prefix):
+        label = label[len(_alabel_prefix):]
+    else:
+        check_label(label)
+        return label.decode('ascii')
+
+    label = label.decode('punycode')
+    check_label(label)
+    return label
+
+
+def uts46_remap(domain, std3_rules=True, transitional=False):
+    """Re-map the characters in the string according to UTS46 processing."""
+    from .uts46data import uts46data
+    output = u""
+    try:
+        for pos, char in enumerate(domain):
+            code_point = ord(char)
+            uts46row = uts46data[code_point if code_point < 256 else
+                bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
+            status = uts46row[1]
+            replacement = uts46row[2] if len(uts46row) == 3 else None
+            if (status == "V" or
+                    (status == "D" and not transitional) or
+                    (status == "3" and std3_rules and replacement is None)):
+                output += char
+            elif replacement is not None and (status == "M" or
+                    (status == "3" and std3_rules) or
+                    (status == "D" and transitional)):
+                output += replacement
+            elif status != "I":
+                raise IndexError()
+        return unicodedata.normalize("NFC", output)
+    except IndexError:
+        raise InvalidCodepoint(
+            "Codepoint {0} not allowed at position {1} in {2}".format(
+            _unot(code_point), pos + 1, repr(domain)))
+
+
+def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
+
+    if isinstance(s, (bytes, bytearray)):
+        s = s.decode("ascii")
+    if uts46:
+        s = uts46_remap(s, std3_rules, transitional)
+    trailing_dot = False
+    result = []
+    if strict:
+        labels = s.split('.')
+    else:
+        labels = _unicode_dots_re.split(s)
+    while labels and not labels[0]:
+        del labels[0]
+    if not labels:
+        raise IDNAError('Empty domain')
+    if labels[-1] == '':
+        del labels[-1]
+        trailing_dot = True
+    for label in labels:
+        result.append(alabel(label))
+    if trailing_dot:
+        result.append(b'')
+    s = b'.'.join(result)
+    if not valid_string_length(s, trailing_dot):
+        raise IDNAError('Domain too long')
+    return s
+
+
+def decode(s, strict=False, uts46=False, std3_rules=False):
+
+    if isinstance(s, (bytes, bytearray)):
+        s = s.decode("ascii")
+    if uts46:
+        s = uts46_remap(s, std3_rules, False)
+    trailing_dot = False
+    result = []
+    if not strict:
+        labels = _unicode_dots_re.split(s)
+    else:
+        labels = s.split(u'.')
+    while labels and not labels[0]:
+        del labels[0]
+    if not labels:
+        raise IDNAError('Empty domain')
+    if not labels[-1]:
+        del labels[-1]
+        trailing_dot = True
+    for label in labels:
+        result.append(ulabel(label))
+    if trailing_dot:
+        result.append(u'')
+    return u'.'.join(result)
@@ -0,0 +1,46 @@
+"""
+Given a list of integers, made up of (hopefully) a small number of long runs
+of consecutive integers, compute a representation of the form
+((start1, end1), (start2, end2) ...). Then answer the question "was x present
+in the original list?" in time O(log(# runs)).
+"""
+
+import bisect
+
+def intranges_from_list(list_):
+    """Represent a list of integers as a sequence of ranges:
+    ((start_0, end_0), (start_1, end_1), ...), such that the original
+    integers are exactly those x such that start_i <= x < end_i for some i.
+    """
+
+    sorted_list = sorted(list_)
+    ranges = []
+    last_write = -1
+    for i in range(len(sorted_list)):
+        if i+1 < len(sorted_list):
+            if sorted_list[i] == sorted_list[i+1]-1:
+                continue
+        current_range = sorted_list[last_write+1:i+1]
+        range_tuple = (current_range[0], current_range[-1] + 1)
+        ranges.append(range_tuple)
+        last_write = i
+
+    return tuple(ranges)
+
+
+def intranges_contain(int_, ranges):
+    """Determine if `int_` falls into one of the ranges in `ranges`."""
+    tuple_ = (int_, int_)
+    pos = bisect.bisect_left(ranges, tuple_)
+    # we could be immediately ahead of a tuple (start, end)
+    # with start < int_ <= end
+    if pos > 0:
+        left, right = ranges[pos-1]
+        if left <= int_ < right:
+            return True
+    # or we could be immediately behind a tuple (int_, end)
+    if pos < len(ranges):
+        left, _ = ranges[pos]
+        if left == int_:
+            return True
+    return False
@@ -37,6 +37,7 @@ packages = [
    'requests',
    'requests.packages',
    'requests.packages.chardet',
+    'requests.packages.idna',
    'requests.packages.urllib3',
    'requests.packages.urllib3.packages',
    'requests.packages.urllib3.contrib',
@@ -1884,6 +1884,55 @@ def test_urllib3_pool_connection_closed(httpbin):
 def test_vendor_aliases():
    from requests.packages import urllib3
    from requests.packages import chardet
+    from requests.packages import idna

    with pytest.raises(ImportError):
        from requests.packages import webbrowser
+
+
+class TestPreparingURLs(object):
+    @pytest.mark.parametrize(
+        'url,expected',
+        (
+            ('http://google.com', 'http://google.com/'),
+            (u'http://ジェーピーニック.jp', u'http://xn--hckqz9bzb1cyrb.jp/'),
+            (
+                u'http://ジェーピーニック.jp'.encode('utf-8'),
+                u'http://xn--hckqz9bzb1cyrb.jp/'
+            ),
+            (
+                u'http://straße.de/straße',
+                u'http://xn--strae-oqa.de/stra%C3%9Fe'
+            ),
+            (
+                u'http://straße.de/straße'.encode('utf-8'),
+                u'http://xn--strae-oqa.de/stra%C3%9Fe'
+            ),
+            (
+                u'http://Königsgäßchen.de/straße',
+                u'http://xn--knigsgchen-b4a3dun.de/stra%C3%9Fe'
+            ),
+            (
+                u'http://Königsgäßchen.de/straße'.encode('utf-8'),
+                u'http://xn--knigsgchen-b4a3dun.de/stra%C3%9Fe'
+            ),
+        )
+    )
+    def test_preparing_url(self, url, expected):
+        r = requests.Request(url=url)
+        p = r.prepare()
+        assert p.url == expected
+
+    @pytest.mark.parametrize(
+        'url',
+        (
+            b"http://*.google.com",
+            b"http://*",
+            u"http://*.google.com",
+            u"http://*",
+        )
+    )
+    def test_preparing_bad_url(self, url):
+        r = requests.Request(url=url)
+        with pytest.raises(requests.exceptions.InvalidURL):
+            r.prepare()