Merge pull request #2142 from sigmavirus24/urllib3-1.9

Upgrade urllib3 to 1.9.x
This commit is contained in:
2014-07-22 16:21:05 -04:00
21 changed files with 820 additions and 546 deletions
+2 -1
View File
@@ -11,6 +11,7 @@ and maintain connections.
import socket
from .models import Response
from .packages.urllib3 import Retry
from .packages.urllib3.poolmanager import PoolManager, proxy_from_url
from .packages.urllib3.response import HTTPResponse
from .packages.urllib3.util import Timeout as TimeoutSauce
@@ -340,7 +341,7 @@ class HTTPAdapter(BaseAdapter):
assert_same_host=False,
preload_content=False,
decode_content=False,
retries=self.max_retries,
retries=Retry(self.max_retries, read=False),
timeout=timeout
)
+16 -8
View File
@@ -1,9 +1,3 @@
# urllib3/__init__.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
"""
urllib3 - Thread-safe connection pooling and re-using.
"""
@@ -23,7 +17,10 @@ from . import exceptions
from .filepost import encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
from .util import make_headers, get_host, Timeout
from .util.request import make_headers
from .util.url import get_host
from .util.timeout import Timeout
from .util.retry import Retry
# Set default logging handler to avoid "No handler found" warnings.
@@ -51,8 +48,19 @@ def add_stderr_logger(level=logging.DEBUG):
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
logger.addHandler(handler)
logger.setLevel(level)
logger.debug('Added an stderr logging handler to logger: %s' % __name__)
logger.debug('Added a stderr logging handler to logger: %s' % __name__)
return handler
# ... Clean up.
del NullHandler
# Set security warning to only go off once by default.
import warnings
warnings.simplefilter('module', exceptions.InsecureRequestWarning)
def disable_warnings(category=exceptions.HTTPWarning):
"""
Helper for quickly disabling all urllib3 warnings.
"""
warnings.simplefilter('ignore', category)
+1 -7
View File
@@ -1,9 +1,3 @@
# urllib3/_collections.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from collections import Mapping, MutableMapping
try:
from threading import RLock
@@ -116,7 +110,7 @@ class HTTPHeaderDict(MutableMapping):
A ``dict`` like container for storing HTTP Headers.
Field names are stored and compared case-insensitively in compliance with
RFC 2616. Iteration provides the first case-sensitive key seen for each
RFC 7230. Iteration provides the first case-sensitive key seen for each
case-insensitive pair.
Using ``__setitem__`` syntax overwrites fields that compare equal
+76 -49
View File
@@ -1,52 +1,44 @@
# urllib3/connection.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import sys
import socket
from socket import timeout as SocketTimeout
try: # Python 3
try: # Python 3
from http.client import HTTPConnection as _HTTPConnection, HTTPException
except ImportError:
from httplib import HTTPConnection as _HTTPConnection, HTTPException
class DummyConnection(object):
"Used to detect a failed ConnectionCls import."
pass
try: # Compiled with SSL?
ssl = None
try: # Compiled with SSL?
HTTPSConnection = DummyConnection
import ssl
BaseSSLError = ssl.SSLError
except (ImportError, AttributeError): # Platform-specific: No SSL.
ssl = None
class BaseSSLError(BaseException):
pass
try: # Python 3
from http.client import HTTPSConnection as _HTTPSConnection
except ImportError:
from httplib import HTTPSConnection as _HTTPSConnection
import ssl
BaseSSLError = ssl.SSLError
except (ImportError, AttributeError): # Platform-specific: No SSL.
pass
from .exceptions import (
ConnectTimeoutError,
)
from .packages.ssl_match_hostname import match_hostname
from .packages import six
from .util import (
assert_fingerprint,
from .util.ssl_ import (
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
assert_fingerprint,
)
from .util import connection
port_by_scheme = {
'http': 80,
@@ -58,38 +50,79 @@ class HTTPConnection(_HTTPConnection, object):
"""
Based on httplib.HTTPConnection but provides an extra constructor
backwards-compatibility layer between older and newer Pythons.
Additional keyword parameters are used to configure attributes of the connection.
Accepted parameters include:
- ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
- ``source_address``: Set the source address for the current connection.
.. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x
- ``socket_options``: Set specific options on the underlying socket. If not specified, then
defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
For example, if you wish to enable TCP Keep Alive in addition to the defaults,
you might pass::
HTTPConnection.default_socket_options + [
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
]
Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
"""
default_port = port_by_scheme['http']
# By default, disable Nagle's Algorithm.
tcp_nodelay = 1
#: Disable Nagle's algorithm by default.
#: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
#: Whether this connection verifies the host's certificate.
is_verified = False
def __init__(self, *args, **kw):
if six.PY3: # Python 3
kw.pop('strict', None)
if sys.version_info < (2, 7): # Python 2.6 and older
kw.pop('source_address', None)
# Pre-set source_address in case we have an older Python like 2.6.
self.source_address = kw.get('source_address')
if sys.version_info < (2, 7): # Python 2.6
# _HTTPConnection on Python 2.6 will balk at this keyword arg, but
# not newer versions. We can still use it when creating a
# connection though, so we pop it *after* we have saved it as
# self.source_address.
kw.pop('source_address', None)
#: The socket options provided by the user. If no options are
#: provided, we use the default options.
self.socket_options = kw.pop('socket_options', self.default_socket_options)
# Superclass also sets self.source_address in Python 2.7+.
_HTTPConnection.__init__(self, *args, **kw)
_HTTPConnection.__init__(self, *args, **kw)
def _new_conn(self):
""" Establish a socket connection and set nodelay settings on it.
:return: a new socket connection
:return: New socket connection.
"""
extra_args = []
if self.source_address: # Python 2.7+
extra_args.append(self.source_address)
extra_kw = {}
if self.source_address:
extra_kw['source_address'] = self.source_address
conn = socket.create_connection(
(self.host, self.port), self.timeout, *extra_args)
conn.setsockopt(
socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay)
if self.socket_options:
extra_kw['socket_options'] = self.socket_options
try:
conn = connection.create_connection(
(self.host, self.port), self.timeout, **extra_kw)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
return conn
@@ -101,6 +134,8 @@ class HTTPConnection(_HTTPConnection, object):
if getattr(self, '_tunnel_host', None):
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel()
# Mark this connection as not reusable
self.auto_open = 0
def connect(self):
conn = self._new_conn()
@@ -137,7 +172,6 @@ class VerifiedHTTPSConnection(HTTPSConnection):
cert_reqs = None
ca_certs = None
ssl_version = None
conn_kw = {}
def set_cert(self, key_file=None, cert_file=None,
cert_reqs=None, ca_certs=None,
@@ -152,18 +186,7 @@ class VerifiedHTTPSConnection(HTTPSConnection):
def connect(self):
# Add certificate verification
try:
sock = socket.create_connection(
address=(self.host, self.port), timeout=self.timeout,
**self.conn_kw)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
self.tcp_nodelay)
conn = self._new_conn()
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
@@ -173,17 +196,19 @@ class VerifiedHTTPSConnection(HTTPSConnection):
# _tunnel_host was added in Python 2.6.3
# (See: http://hg.python.org/cpython/rev/0f57b30a152f)
self.sock = sock
self.sock = conn
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
# Mark this connection as not reusable
self.auto_open = 0
# Override the host with the one we're requesting data from.
hostname = self._tunnel_host
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file,
cert_reqs=resolved_cert_reqs,
ca_certs=self.ca_certs,
server_hostname=hostname,
@@ -197,6 +222,8 @@ class VerifiedHTTPSConnection(HTTPSConnection):
match_hostname(self.sock.getpeercert(),
self.assert_hostname or hostname)
self.is_verified = resolved_cert_reqs == ssl.CERT_REQUIRED
if ssl:
# Make a copy for testing.
+153 -106
View File
@@ -1,17 +1,12 @@
# urllib3/connectionpool.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import sys
import errno
import logging
import sys
import warnings
from socket import error as SocketError, timeout as SocketTimeout
import socket
try: # Python 3
try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
@@ -20,16 +15,16 @@ except ImportError:
from .exceptions import (
ClosedPoolError,
ConnectionError,
ConnectTimeoutError,
ProtocolError,
EmptyPoolError,
HostChangedError,
LocationParseError,
LocationValueError,
MaxRetryError,
ProxyError,
ReadTimeoutError,
SSLError,
TimeoutError,
ReadTimeoutError,
ProxyError,
InsecureRequestWarning,
)
from .packages.ssl_match_hostname import CertificateError
from .packages import six
@@ -41,11 +36,11 @@ from .connection import (
)
from .request import RequestMethods
from .response import HTTPResponse
from .util import (
get_host,
is_connection_dropped,
Timeout,
)
from .util.connection import is_connection_dropped
from .util.retry import Retry
from .util.timeout import Timeout
from .util.url import get_host
xrange = six.moves.xrange
@@ -54,8 +49,8 @@ log = logging.getLogger(__name__)
_Default = object()
## Pool objects
## Pool objects
class ConnectionPool(object):
"""
Base class for all connection pools, such as
@@ -66,13 +61,11 @@ class ConnectionPool(object):
QueueCls = LifoQueue
def __init__(self, host, port=None):
if host is None:
raise LocationParseError(host)
if not host:
raise LocationValueError("No host specified.")
# httplib doesn't like it when we include brackets in ipv6 addresses
host = host.strip('[]')
self.host = host
self.host = host.strip('[]')
self.port = port
def __str__(self):
@@ -82,6 +75,7 @@ class ConnectionPool(object):
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
Thread-safe connection pool for one host.
@@ -126,6 +120,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
Headers to include with all requests, unless other headers are given
explicitly.
:param retries:
Retry configuration to use by default with requests in this pool.
:param _proxy:
Parsed proxy URL, should not be used directly, instead, see
:class:`urllib3.connectionpool.ProxyManager`"
@@ -133,6 +130,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param _proxy_headers:
A dictionary with proxy headers, should not be used directly,
instead, see :class:`urllib3.connectionpool.ProxyManager`"
:param \**conn_kw:
Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
:class:`urllib3.connection.HTTPSConnection` instances.
"""
scheme = 'http'
@@ -140,18 +141,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
headers=None, _proxy=None, _proxy_headers=None, **conn_kw):
headers=None, retries=None,
_proxy=None, _proxy_headers=None,
**conn_kw):
ConnectionPool.__init__(self, host, port)
RequestMethods.__init__(self, headers)
self.strict = strict
# This is for backwards compatibility and can be removed once a timeout
# can only be set to a Timeout object
if not isinstance(timeout, Timeout):
timeout = Timeout.from_float(timeout)
if retries is None:
retries = Retry.DEFAULT
self.timeout = timeout
self.retries = retries
self.pool = self.QueueCls(maxsize)
self.block = block
@@ -166,11 +171,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# These are mostly for testing and debugging purposes.
self.num_connections = 0
self.num_requests = 0
if sys.version_info < (2, 7): # Python 2.6 and older
conn_kw.pop('source_address', None)
self.conn_kw = conn_kw
if self.proxy:
# Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
# We cannot know if the user has added default socket options, so we cannot replace the
# list.
self.conn_kw.setdefault('socket_options', [])
def _new_conn(self):
"""
Return a fresh :class:`HTTPConnection`.
@@ -182,10 +190,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn = self.ConnectionCls(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
strict=self.strict, **self.conn_kw)
if self.proxy is not None:
# Enable Nagle's algorithm for proxies, to avoid packet
# fragmentation.
conn.tcp_nodelay = 0
return conn
def _get_conn(self, timeout=None):
@@ -204,7 +208,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
try:
conn = self.pool.get(block=self.block, timeout=timeout)
except AttributeError: # self.pool is None
except AttributeError: # self.pool is None
raise ClosedPoolError(self, "Pool is closed.")
except Empty:
@@ -218,6 +222,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if conn and is_connection_dropped(conn):
log.info("Resetting dropped connection: %s" % self.host)
conn.close()
if getattr(conn, 'auto_open', 1) == 0:
# This is a proxied connection that has been mutated by
# httplib._tunnel() and cannot be reused (since it would
# attempt to bypass the proxy)
conn = None
return conn or self._new_conn()
@@ -237,7 +246,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
try:
self.pool.put(conn, block=False)
return # Everything is dandy, done.
return # Everything is dandy, done.
except AttributeError:
# self.pool is None.
pass
@@ -251,6 +260,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if conn:
conn.close()
def _validate_conn(self, conn):
"""
Called right before a request is made, after the socket is created.
"""
pass
def _get_timeout(self, timeout):
""" Helper that always returns a :class:`urllib3.util.Timeout` """
if timeout is _Default:
@@ -282,23 +297,21 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
self.num_requests += 1
timeout_obj = self._get_timeout(timeout)
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
try:
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
# conn.request() calls httplib.*.request, not the method in
# urllib3.request. It also calls makefile (recv) on the socket.
conn.request(method, url, **httplib_request_kw)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, timeout_obj.connect_timeout))
# Trigger any extra validation we need to do.
self._validate_conn(conn)
# conn.request() calls httplib.*.request, not the method in
# urllib3.request. It also calls makefile (recv) on the socket.
conn.request(method, url, **httplib_request_kw)
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
# App Engine doesn't have a sock attr
if hasattr(conn, 'sock'):
if getattr(conn, 'sock', None):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
@@ -306,18 +319,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# timeouts, check for a zero timeout before making the request.
if read_timeout == 0:
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
if read_timeout is Timeout.DEFAULT_TIMEOUT:
conn.sock.settimeout(socket.getdefaulttimeout())
else: # None or a value
else: # None or a value
conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
try: # Python 2.7+, use buffering of HTTP responses
try: # Python 2.7+, use buffering of HTTP responses
httplib_response = conn.getresponse(buffering=True)
except TypeError: # Python 2.6 and older
except TypeError: # Python 2.6 and older
httplib_response = conn.getresponse()
except SocketTimeout:
raise ReadTimeoutError(
@@ -329,17 +341,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# http://bugs.python.org/issue10272
if 'timed out' in str(e) or \
'did not complete (read)' in str(e): # Python 2.6
raise ReadTimeoutError(self, url, "Read timed out.")
raise ReadTimeoutError(
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
raise
except SocketError as e: # Platform-specific: Python 2
except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
if e.errno in _blocking_errnos:
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
raise
@@ -364,7 +376,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn.close()
except Empty:
pass # Done.
pass # Done.
def is_same_host(self, url):
"""
@@ -385,7 +397,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
return (scheme, host, port) == (self.scheme, self.host, self.port)
def urlopen(self, method, url, body=None, headers=None, retries=3,
def urlopen(self, method, url, body=None, headers=None, retries=None,
redirect=True, assert_same_host=True, timeout=_Default,
pool_timeout=None, release_conn=None, **response_kw):
"""
@@ -419,9 +431,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
these headers completely replace any pool-specific headers.
:param retries:
Number of retries to allow before raising a MaxRetryError exception.
If `False`, then retries are disabled and any exception is raised
immediately.
Configure the number of retries to allow before raising a
:class:`~urllib3.exceptions.MaxRetryError` exception.
Pass ``None`` to retry until you receive a response. Pass a
:class:`~urllib3.util.retry.Retry` object for fine-grained control
over different types of retries.
Pass an integer number to retry connection errors that many times,
but no other types of errors. Pass zero to never retry.
If ``False``, then retries are disabled and any exception is raised
immediately. Also, instead of raising a MaxRetryError on redirects,
the redirect response will be returned.
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
@@ -460,15 +483,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if headers is None:
headers = self.headers
if retries < 0 and retries is not False:
raise MaxRetryError(self, url)
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
# Check host
if assert_same_host and not self.is_same_host(url):
raise HostChangedError(self, url, retries - 1)
raise HostChangedError(self, url, retries)
conn = None
@@ -484,10 +507,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
err = None
try:
# Request a connection from the queue
# Request a connection from the queue.
conn = self._get_conn(timeout=pool_timeout)
# Make the request on the httplib connection object
# Make the request on the httplib connection object.
httplib_response = self._make_request(conn, method, url,
timeout=timeout,
body=body, headers=headers)
@@ -526,21 +549,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn.close()
conn = None
if not retries:
if isinstance(e, TimeoutError):
# TimeoutError is exempt from MaxRetryError-wrapping.
# FIXME: ... Not sure why. Add a reason here.
raise
stacktrace = sys.exc_info()[2]
if isinstance(e, SocketError) and self.proxy:
e = ProxyError('Cannot connect to proxy.', e)
elif isinstance(e, (SocketError, HTTPException)):
e = ProtocolError('Connection aborted.', e)
# Wrap unexpected exceptions with the most appropriate
# module-level exception and re-raise.
if isinstance(e, SocketError) and self.proxy:
raise ProxyError('Cannot connect to proxy.', e)
if retries is False:
raise ConnectionError('Connection failed.', e)
raise MaxRetryError(self, url, e)
retries = retries.increment(method, url, error=e,
_pool=self, _stacktrace=stacktrace)
retries.sleep()
# Keep track of the error for the retry warning.
err = e
@@ -554,23 +571,43 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if not conn:
# Try again
log.warning("Retrying (%d attempts remain) after connection "
log.warning("Retrying (%r) after connection "
"broken by '%r': %s" % (retries, err, url))
return self.urlopen(method, url, body, headers, retries - 1,
return self.urlopen(method, url, body, headers, retries,
redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
# Handle redirect?
redirect_location = redirect and response.get_redirect_location()
if redirect_location and retries is not False:
if redirect_location:
if response.status == 303:
method = 'GET'
try:
retries = retries.increment(method, url, response=response, _pool=self)
except MaxRetryError:
if retries.raise_on_redirect:
raise
return response
log.info("Redirecting %s -> %s" % (url, redirect_location))
return self.urlopen(method, redirect_location, body, headers,
retries - 1, redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
retries=retries, redirect=redirect,
assert_same_host=assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
# Check if we should retry the HTTP response.
if retries.is_forced_retry(method, status_code=response.status):
retries = retries.increment(method, url, response=response, _pool=self)
retries.sleep()
log.info("Forced retry: %s" % url)
return self.urlopen(method, url, body, headers,
retries=retries, redirect=redirect,
assert_same_host=assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
return response
@@ -597,19 +634,17 @@ class HTTPSConnectionPool(HTTPConnectionPool):
ConnectionCls = HTTPSConnection
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1,
block=False, headers=None, retries=None,
_proxy=None, _proxy_headers=None,
key_file=None, cert_file=None, cert_reqs=None,
ca_certs=None, ssl_version=None,
assert_hostname=None, assert_fingerprint=None,
**conn_kw):
if sys.version_info < (2, 7): # Python 2.6 or older
conn_kw.pop('source_address', None)
HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
block, headers, _proxy, _proxy_headers, **conn_kw)
block, headers, retries, _proxy, _proxy_headers,
**conn_kw)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
@@ -617,7 +652,6 @@ class HTTPSConnectionPool(HTTPConnectionPool):
self.ssl_version = ssl_version
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
self.conn_kw = conn_kw
def _prepare_conn(self, conn):
"""
@@ -633,7 +667,6 @@ class HTTPSConnectionPool(HTTPConnectionPool):
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)
conn.ssl_version = self.ssl_version
conn.conn_kw = self.conn_kw
if self.proxy is not None:
# Python 2.7+
@@ -641,7 +674,12 @@ class HTTPSConnectionPool(HTTPConnectionPool):
set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
set_tunnel = conn._set_tunnel
set_tunnel(self.host, self.port, self.proxy_headers)
if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older
set_tunnel(self.host, self.port)
else:
set_tunnel(self.host, self.port, self.proxy_headers)
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
conn.connect()
@@ -667,21 +705,30 @@ class HTTPSConnectionPool(HTTPConnectionPool):
actual_host = self.proxy.host
actual_port = self.proxy.port
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
extra_params.update(self.conn_kw)
conn = self.ConnectionCls(host=actual_host, port=actual_port,
timeout=self.timeout.connect_timeout,
**extra_params)
if self.proxy is not None:
# Enable Nagle's algorithm for proxies, to avoid packet
# fragmentation.
conn.tcp_nodelay = 0
strict=self.strict, **self.conn_kw)
return self._prepare_conn(conn)
def _validate_conn(self, conn):
"""
Called right before a request is made, after the socket is created.
"""
super(HTTPSConnectionPool, self)._validate_conn(conn)
# Force connect early to allow us to validate the connection.
if not conn.sock:
conn.connect()
if not conn.is_verified:
warnings.warn((
'Unverified HTTPS request is being made. '
'Adding certificate verification is strongly advised. See: '
'https://urllib3.readthedocs.org/en/latest/security.html '
'(This warning will only appear once by default.)'),
InsecureRequestWarning)
def connection_from_url(url, **kw):
"""
@@ -698,7 +745,7 @@ def connection_from_url(url, **kw):
:class:`.ConnectionPool`. Useful for specifying things like
timeout, maxsize, headers, etc.
Example: ::
Example::
>>> conn = connection_from_url('http://google.com/')
>>> r = conn.request('GET', '/')
@@ -1,9 +1,3 @@
# urllib3/contrib/ntlmpool.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
"""
NTLM authenticating pool, contributed by erikcederstran
+21 -181
View File
@@ -54,7 +54,6 @@ from pyasn1.type import univ, constraint
from socket import _fileobject, timeout
import ssl
import select
from cStringIO import StringIO
from .. import connection
from .. import util
@@ -155,196 +154,37 @@ def get_subj_alt_name(peer_cert):
return dns_name
class fileobject(_fileobject):
def _wait_for_sock(self):
rd, wd, ed = select.select([self._sock], [], [],
self._sock.gettimeout())
if not rd:
raise timeout()
def read(self, size=-1):
# Use max, disallow tiny reads in a loop as they are very inefficient.
# We never leave read() with any leftover data from a new recv() call
# in our internal buffer.
rbufsize = max(self._rbufsize, self.default_bufsize)
# Our use of StringIO rather than lists of string objects returned by
# recv() minimizes memory usage and fragmentation that occurs when
# rbufsize is large compared to the typical return value of recv().
buf = self._rbuf
buf.seek(0, 2) # seek end
if size < 0:
# Read until EOF
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(rbufsize)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
if not data:
break
buf.write(data)
return buf.getvalue()
else:
# Read until size bytes or EOF seen, whichever comes first
buf_len = buf.tell()
if buf_len >= size:
# Already have size bytes in our buffer? Extract and return.
buf.seek(0)
rv = buf.read(size)
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return rv
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
left = size - buf_len
# recv() will malloc the amount of memory given as its
# parameter even though it often returns much less data
# than that. The returned data string is short lived
# as we copy it into a StringIO and free it. This avoids
# fragmentation issues on many platforms.
try:
data = self._sock.recv(left)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
if not data:
break
n = len(data)
if n == size and not buf_len:
# Shortcut. Avoid buffer data copies when:
# - We have no data in our buffer.
# AND
# - Our call to recv returned exactly the
# number of bytes we were asked to read.
return data
if n == left:
buf.write(data)
del data # explicit free
break
assert n <= left, "recv(%d) returned %d bytes" % (left, n)
buf.write(data)
buf_len += n
del data # explicit free
#assert buf_len == buf.tell()
return buf.getvalue()
def readline(self, size=-1):
buf = self._rbuf
buf.seek(0, 2) # seek end
if buf.tell() > 0:
# check if we already have it in our buffer
buf.seek(0)
bline = buf.readline(size)
if bline.endswith('\n') or len(bline) == size:
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return bline
del bline
if size < 0:
# Read until \n or EOF, whichever comes first
if self._rbufsize <= 1:
# Speed up unbuffered case
buf.seek(0)
buffers = [buf.read()]
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
data = None
recv = self._sock.recv
while True:
try:
while data != "\n":
data = recv(1)
if not data:
break
buffers.append(data)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
break
return "".join(buffers)
buf.seek(0, 2) # seek end
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
if not data:
break
nl = data.find('\n')
if nl >= 0:
nl += 1
buf.write(data[:nl])
self._rbuf.write(data[nl:])
del data
break
buf.write(data)
return buf.getvalue()
else:
# Read until size bytes or \n or EOF seen, whichever comes first
buf.seek(0, 2) # seek end
buf_len = buf.tell()
if buf_len >= size:
buf.seek(0)
rv = buf.read(size)
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return rv
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
if not data:
break
left = size - buf_len
# did we just receive a newline?
nl = data.find('\n', 0, left)
if nl >= 0:
nl += 1
# save the excess data to _rbuf
self._rbuf.write(data[nl:])
if buf_len:
buf.write(data[:nl])
break
else:
# Shortcut. Avoid data copy through buf when returning
# a substring of our first recv().
return data[:nl]
n = len(data)
if n == size and not buf_len:
# Shortcut. Avoid data copy through buf when
# returning exactly all of our first recv().
return data
if n >= left:
buf.write(data[:left])
self._rbuf.write(data[left:])
break
buf.write(data)
buf_len += n
#assert buf_len == buf.tell()
return buf.getvalue()
class WrappedSocket(object):
'''API-compatibility wrapper for Python OpenSSL's Connection-class.'''
def __init__(self, connection, socket):
def __init__(self, connection, socket, suppress_ragged_eofs=True):
self.connection = connection
self.socket = socket
self.suppress_ragged_eofs = suppress_ragged_eofs
def fileno(self):
return self.socket.fileno()
def makefile(self, mode, bufsize=-1):
return fileobject(self.connection, mode, bufsize)
return _fileobject(self, mode, bufsize)
def recv(self, *args, **kwargs):
try:
data = self.connection.recv(*args, **kwargs)
except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
return b''
else:
raise
except OpenSSL.SSL.WantReadError:
rd, wd, ed = select.select(
[self.socket], [], [], self.socket.gettimeout())
if not rd:
raise timeout()
else:
return self.recv(*args, **kwargs)
else:
return data
def settimeout(self, timeout):
return self.socket.settimeout(timeout)
+26 -13
View File
@@ -1,9 +1,3 @@
# urllib3/exceptions.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
## Base Exceptions
@@ -11,6 +5,11 @@ class HTTPError(Exception):
"Base exception used by this module."
pass
class HTTPWarning(Warning):
"Base warning used by this module."
pass
class PoolError(HTTPError):
"Base exception for errors caused within a pool."
@@ -44,16 +43,20 @@ class ProxyError(HTTPError):
pass
class ConnectionError(HTTPError):
"Raised when a normal connection fails."
pass
class DecodeError(HTTPError):
"Raised when automatic decoding based on Content-Type fails."
pass
class ProtocolError(HTTPError):
"Raised when something unexpected happens mid-request/response."
pass
#: Renamed to ProtocolError but aliased for backwards compatibility.
ConnectionError = ProtocolError
## Leaf Exceptions
class MaxRetryError(RequestError):
@@ -64,7 +67,7 @@ class MaxRetryError(RequestError):
message = "Max retries exceeded with url: %s" % url
if reason:
message += " (Caused by %s: %s)" % (type(reason), reason)
message += " (Caused by %r)" % reason
else:
message += " (Caused by redirect)"
@@ -116,7 +119,12 @@ class ClosedPoolError(PoolError):
pass
class LocationParseError(ValueError, HTTPError):
class LocationValueError(ValueError, HTTPError):
"Raised when there is something wrong with a given URL input."
pass
class LocationParseError(LocationValueError):
"Raised when get_host or similar fails to parse the URL input."
def __init__(self, location):
@@ -124,3 +132,8 @@ class LocationParseError(ValueError, HTTPError):
HTTPError.__init__(self, message)
self.location = location
class InsecureRequestWarning(HTTPWarning):
"Warned when making an unverified HTTPS request."
pass
+13 -13
View File
@@ -1,9 +1,3 @@
# urllib3/fields.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import email.utils
import mimetypes
@@ -78,9 +72,10 @@ class RequestField(object):
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
Supports constructing :class:`~urllib3.fields.RequestField` from parameter
of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type)
tuple where the MIME type is optional. For example: ::
Supports constructing :class:`~urllib3.fields.RequestField` from
parameter of key/value strings AND key/filetuple. A filetuple is a
(filename, data, MIME type) tuple where the MIME type is optional.
For example::
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
@@ -125,8 +120,8 @@ class RequestField(object):
'Content-Disposition' fields.
:param header_parts:
A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as
`k1="v1"; k2="v2"; ...`.
A sequence of (k, v) typles or a :class:`dict` of (k, v) to format
as `k1="v1"; k2="v2"; ...`.
"""
parts = []
iterable = header_parts
@@ -158,7 +153,8 @@ class RequestField(object):
lines.append('\r\n')
return '\r\n'.join(lines)
def make_multipart(self, content_disposition=None, content_type=None, content_location=None):
def make_multipart(self, content_disposition=None, content_type=None,
content_location=None):
"""
Makes this request field into a multipart request field.
@@ -172,6 +168,10 @@ class RequestField(object):
"""
self.headers['Content-Disposition'] = content_disposition or 'form-data'
self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))])
self.headers['Content-Disposition'] += '; '.join([
'', self._render_parts(
(('name', self._name), ('filename', self._filename))
)
])
self.headers['Content-Type'] = content_type
self.headers['Content-Location'] = content_location
+4 -11
View File
@@ -1,11 +1,4 @@
# urllib3/filepost.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import codecs
import mimetypes
from uuid import uuid4
from io import BytesIO
@@ -38,10 +31,10 @@ def iter_field_objects(fields):
i = iter(fields)
for field in i:
if isinstance(field, RequestField):
yield field
else:
yield RequestField.from_tuples(*field)
if isinstance(field, RequestField):
yield field
else:
yield RequestField.from_tuples(*field)
def iter_fields(fields):
@@ -2,7 +2,6 @@
# Passes Python2.7's test suite and incorporates all the latest updates.
# Copyright 2009 Raymond Hettinger, released under the MIT License.
# http://code.activestate.com/recipes/576693/
try:
from thread import get_ident as _get_ident
except ImportError:
+25 -18
View File
@@ -1,9 +1,3 @@
# urllib3/poolmanager.py
# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import logging
try: # Python 3
@@ -14,8 +8,10 @@ except ImportError:
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
from .exceptions import LocationValueError
from .request import RequestMethods
from .util import parse_url
from .util.url import parse_url
from .util.retry import Retry
__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
@@ -49,7 +45,7 @@ class PoolManager(RequestMethods):
Additional parameters are used to create fresh
:class:`urllib3.connectionpool.ConnectionPool` instances.
Example: ::
Example::
>>> manager = PoolManager(num_pools=2)
>>> r = manager.request('GET', 'http://google.com/')
@@ -102,10 +98,11 @@ class PoolManager(RequestMethods):
``urllib3.connectionpool.port_by_scheme``.
"""
if not host:
raise LocationValueError("No host specified.")
scheme = scheme or 'http'
port = port or port_by_scheme.get(scheme, 80)
pool_key = (scheme, host, port)
with self.pools.lock:
@@ -118,6 +115,7 @@ class PoolManager(RequestMethods):
# Make a fresh ConnectionPool of the desired type
pool = self._new_pool(scheme, host, port)
self.pools[pool_key] = pool
return pool
def connection_from_url(self, url):
@@ -161,13 +159,18 @@ class PoolManager(RequestMethods):
# Support relative URLs for redirecting.
redirect_location = urljoin(url, redirect_location)
# RFC 2616, Section 10.3.4
# RFC 7231, Section 6.4.4
if response.status == 303:
method = 'GET'
log.info("Redirecting %s -> %s" % (url, redirect_location))
kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown
retries = kw.get('retries')
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect)
kw['retries'] = retries.increment(method, redirect_location)
kw['redirect'] = redirect
log.info("Redirecting %s -> %s" % (url, redirect_location))
return self.urlopen(method, redirect_location, **kw)
@@ -208,12 +211,16 @@ class ProxyManager(PoolManager):
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
assert proxy.scheme in ("http", "https"), \
'Not supported proxy scheme %s' % proxy.scheme
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
assert self.proxy.scheme in ("http", "https"), \
'Not supported proxy scheme %s' % self.proxy.scheme
connection_pool_kw['_proxy'] = self.proxy
connection_pool_kw['_proxy_headers'] = self.proxy_headers
super(ProxyManager, self).__init__(
num_pools, headers, **connection_pool_kw)
@@ -248,10 +255,10 @@ class ProxyManager(PoolManager):
# For proxied HTTPS requests, httplib sets the necessary headers
# on the CONNECT to the proxy. For HTTP, we'll definitely
# need to set 'Host' at the very least.
kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
self.headers))
headers = kw.get('headers', self.headers)
kw['headers'] = self._set_proxy_headers(url, headers)
return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
def proxy_from_url(url, **kw):
+19 -25
View File
@@ -1,9 +1,3 @@
# urllib3/request.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
try:
from urllib.parse import urlencode
except ImportError:
@@ -26,8 +20,8 @@ class RequestMethods(object):
Specifically,
:meth:`.request_encode_url` is for sending requests whose fields are encoded
in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_url` is for sending requests whose fields are
encoded in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_body` is for sending requests whose fields are
encoded in the *body* of the request using multipart or www-form-urlencoded
@@ -51,7 +45,7 @@ class RequestMethods(object):
def urlopen(self, method, url, body=None, headers=None,
encode_multipart=True, multipart_boundary=None,
**kw): # Abstract
**kw): # Abstract
raise NotImplemented("Classes extending RequestMethods must implement "
"their own ``urlopen`` method.")
@@ -61,8 +55,8 @@ class RequestMethods(object):
``fields`` based on the ``method`` used.
This is a convenience method that requires the least amount of manual
effort. It can be used in most situations, while still having the option
to drop down to more specific methods when necessary, such as
effort. It can be used in most situations, while still having the
option to drop down to more specific methods when necessary, such as
:meth:`request_encode_url`, :meth:`request_encode_body`,
or even the lowest level :meth:`urlopen`.
"""
@@ -70,12 +64,12 @@ class RequestMethods(object):
if method in self._encode_url_methods:
return self.request_encode_url(method, url, fields=fields,
headers=headers,
**urlopen_kw)
headers=headers,
**urlopen_kw)
else:
return self.request_encode_body(method, url, fields=fields,
headers=headers,
**urlopen_kw)
headers=headers,
**urlopen_kw)
def request_encode_url(self, method, url, fields=None, **urlopen_kw):
"""
@@ -94,18 +88,18 @@ class RequestMethods(object):
the body. This is useful for request methods like POST, PUT, PATCH, etc.
When ``encode_multipart=True`` (default), then
:meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the
payload with the appropriate content type. Otherwise
:meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
the payload with the appropriate content type. Otherwise
:meth:`urllib.urlencode` is used with the
'application/x-www-form-urlencoded' content type.
Multipart encoding must be used when posting files, and it's reasonably
safe to use it in other times too. However, it may break request signing,
such as with OAuth.
safe to use it in other times too. However, it may break request
signing, such as with OAuth.
Supports an optional ``fields`` parameter of key/value strings AND
key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
the MIME type is optional. For example: ::
the MIME type is optional. For example::
fields = {
'foo': 'bar',
@@ -119,17 +113,17 @@ class RequestMethods(object):
When uploading a file, providing a filename (the first parameter of the
tuple) is optional but recommended to best mimick behavior of browsers.
Note that if ``headers`` are supplied, the 'Content-Type' header will be
overwritten because it depends on the dynamic random boundary string
Note that if ``headers`` are supplied, the 'Content-Type' header will
be overwritten because it depends on the dynamic random boundary string
which is used to compose the body of the request. The random boundary
string can be explicitly set with the ``multipart_boundary`` parameter.
"""
if encode_multipart:
body, content_type = encode_multipart_formdata(fields or {},
boundary=multipart_boundary)
body, content_type = encode_multipart_formdata(
fields or {}, boundary=multipart_boundary)
else:
body, content_type = (urlencode(fields or {}),
'application/x-www-form-urlencoded')
'application/x-www-form-urlencoded')
if headers is None:
headers = self.headers
+47 -35
View File
@@ -1,22 +1,14 @@
# urllib3/response.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import logging
import zlib
import io
from socket import timeout as SocketTimeout
from ._collections import HTTPHeaderDict
from .exceptions import DecodeError
from .exceptions import ProtocolError, DecodeError, ReadTimeoutError
from .packages.six import string_types as basestring, binary_type
from .util import is_fp_closed
from .connection import HTTPException, BaseSSLError
from .util.response import is_fp_closed
log = logging.getLogger(__name__)
class DeflateDecoder(object):
@@ -91,11 +83,14 @@ class HTTPResponse(io.IOBase):
self.decode_content = decode_content
self._decoder = None
self._body = body if body and isinstance(body, basestring) else None
self._body = None
self._fp = None
self._original_response = original_response
self._fp_bytes_read = 0
if body and isinstance(body, (basestring, binary_type)):
self._body = body
self._pool = pool
self._connection = connection
@@ -163,8 +158,8 @@ class HTTPResponse(io.IOBase):
after having ``.read()`` the file object. (Overridden if ``amt`` is
set.)
"""
# Note: content-encoding value should be case-insensitive, per RFC 2616
# Section 3.5
# Note: content-encoding value should be case-insensitive, per RFC 7230
# Section 3.2
content_encoding = self.headers.get('content-encoding', '').lower()
if self._decoder is None:
if content_encoding in self.CONTENT_DECODERS:
@@ -178,23 +173,42 @@ class HTTPResponse(io.IOBase):
flush_decoder = False
try:
if amt is None:
# cStringIO doesn't like amt=None
data = self._fp.read()
flush_decoder = True
else:
cache_content = False
data = self._fp.read(amt)
if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
# Close the connection when no data is returned
#
# This is redundant to what httplib/http.client _should_
# already do. However, versions of python released before
# December 15, 2012 (http://bugs.python.org/issue16298) do not
# properly close the connection in all cases. There is no harm
# in redundantly calling close.
self._fp.close()
try:
if amt is None:
# cStringIO doesn't like amt=None
data = self._fp.read()
flush_decoder = True
else:
cache_content = False
data = self._fp.read(amt)
if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
# Close the connection when no data is returned
#
# This is redundant to what httplib/http.client _should_
# already do. However, versions of python released before
# December 15, 2012 (http://bugs.python.org/issue16298) do
# not properly close the connection in all cases. There is
# no harm in redundantly calling close.
self._fp.close()
flush_decoder = True
except SocketTimeout:
# FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
# there is yet no clean way to get at it from this context.
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
except BaseSSLError as e:
# FIXME: Is there a better way to differentiate between SSLErrors?
if not 'read operation timed out' in str(e): # Defensive:
# This shouldn't happen but just in case we're missing an edge
# case, let's avoid swallowing SSL errors.
raise
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
except HTTPException as e:
# This includes IncompleteRead.
raise ProtocolError('Connection broken: %r' % e, e)
self._fp_bytes_read += len(data)
@@ -204,8 +218,7 @@ class HTTPResponse(io.IOBase):
except (IOError, zlib.error) as e:
raise DecodeError(
"Received response with content-encoding: %s, but "
"failed to decode it." % content_encoding,
e)
"failed to decode it." % content_encoding, e)
if flush_decoder and decode_content and self._decoder:
buf = self._decoder.decompress(binary_type())
@@ -242,7 +255,6 @@ class HTTPResponse(io.IOBase):
if data:
yield data
@classmethod
def from_httplib(ResponseCls, r, **response_kw):
"""
@@ -297,7 +309,7 @@ class HTTPResponse(io.IOBase):
elif hasattr(self._fp, "fileno"):
return self._fp.fileno()
else:
raise IOError("The file-like object this HTTPResponse is wrapped "
raise IOError("The file-like object this HTTPResponse is wrapped "
"around has no file descriptor")
def flush(self):
+3 -6
View File
@@ -1,9 +1,4 @@
# urllib3/util/__init__.py
# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
# For backwards compatibility, provide imports that used to be here.
from .connection import is_connection_dropped
from .request import make_headers
from .response import is_fp_closed
@@ -19,6 +14,8 @@ from .timeout import (
current_time,
Timeout,
)
from .retry import Retry
from .url import (
get_host,
parse_url,
+55 -3
View File
@@ -1,4 +1,4 @@
from socket import error as SocketError
import socket
try:
from select import poll, POLLIN
except ImportError: # `poll` doesn't exist on OSX and other platforms
@@ -8,6 +8,7 @@ except ImportError: # `poll` doesn't exist on OSX and other platforms
except ImportError: # `select` doesn't exist on AppEngine.
select = False
def is_connection_dropped(conn): # Platform-specific
"""
Returns True if the connection is dropped and should be closed.
@@ -22,7 +23,7 @@ def is_connection_dropped(conn): # Platform-specific
if sock is False: # Platform-specific: AppEngine
return False
if sock is None: # Connection already closed (such as by httplib).
return False
return True
if not poll:
if not select: # Platform-specific: AppEngine
@@ -30,7 +31,7 @@ def is_connection_dropped(conn): # Platform-specific
try:
return select([sock], [], [], 0.0)[0]
except SocketError:
except socket.error:
return True
# This version is better on platforms that support it.
@@ -42,4 +43,55 @@ def is_connection_dropped(conn): # Platform-specific
return True
# This function is copied from socket.py in the Python 2.7 standard
# library test suite. Added to its signature is only `socket_options`.
def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None, socket_options=None):
"""Connect to *address* and return the socket object.
Convenience function. Connect to *address* (a 2-tuple ``(host,
port)``) and return the socket object. Passing the optional
*timeout* parameter will set the timeout on the socket instance
before attempting to connect. If no *timeout* is supplied, the
global default timeout setting returned by :func:`getdefaulttimeout`
is used. If *source_address* is set it must be a tuple of (host, port)
for the socket to bind as a source address before making the connection.
An host of '' or port 0 tells the OS to use the default.
"""
host, port = address
err = None
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
# This is the only addition urllib3 makes to this function.
_set_socket_options(sock, socket_options)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
sock.connect(sa)
return sock
except socket.error as _:
err = _
if sock is not None:
sock.close()
if err is not None:
raise err
else:
raise socket.error("getaddrinfo returns an empty list")
def _set_socket_options(sock, options):
if options is None:
return
for opt in options:
sock.setsockopt(*opt)
+11 -8
View File
@@ -1,13 +1,12 @@
from base64 import b64encode
from ..packages import six
from ..packages.six import b
ACCEPT_ENCODING = 'gzip,deflate'
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
basic_auth=None, proxy_basic_auth=None):
basic_auth=None, proxy_basic_auth=None, disable_cache=None):
"""
Shortcuts for generating request headers.
@@ -32,7 +31,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
Colon-separated username:password string for 'proxy-authorization: basic ...'
auth header.
Example: ::
:param disable_cache:
If ``True``, adds 'cache-control: no-cache' header.
Example::
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
@@ -57,12 +59,13 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
if basic_auth:
headers['authorization'] = 'Basic ' + \
b64encode(six.b(basic_auth)).decode('utf-8')
b64encode(b(basic_auth)).decode('utf-8')
if proxy_basic_auth:
headers['proxy-authorization'] = 'Basic ' + \
b64encode(six.b(proxy_basic_auth)).decode('utf-8')
b64encode(b(proxy_basic_auth)).decode('utf-8')
if disable_cache:
headers['cache-control'] = 'no-cache'
return headers
+279
View File
@@ -0,0 +1,279 @@
import time
import logging
from ..exceptions import (
ProtocolError,
ConnectTimeoutError,
ReadTimeoutError,
MaxRetryError,
)
from ..packages import six
log = logging.getLogger(__name__)
class Retry(object):
""" Retry configuration.
Each retry attempt will create a new Retry object with updated values, so
they can be safely reused.
Retries can be defined as a default for a pool::
retries = Retry(connect=5, read=2, redirect=5)
http = PoolManager(retries=retries)
response = http.request('GET', 'http://example.com/')
Or per-request (which overrides the default for the pool)::
response = http.request('GET', 'http://example.com/', retries=Retry(10))
Retries can be disabled by passing ``False``::
response = http.request('GET', 'http://example.com/', retries=False)
Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
retries are disabled, in which case the causing exception will be raised.
:param int total:
Total number of retries to allow. Takes precedence over other counts.
Set to ``None`` to remove this constraint and fall back on other
counts. It's a good idea to set this to some sensibly-high value to
account for unexpected edge cases and avoid infinite retry loops.
Set to ``0`` to fail on the first retry.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param int connect:
How many connection-related errors to retry on.
These are errors raised before the request is sent to the remote server,
which we assume has not triggered the server to process the request.
Set to ``0`` to fail on the first retry of this type.
:param int read:
How many times to retry on read errors.
These errors are raised after the request was sent to the server, so the
request may have side-effects.
Set to ``0`` to fail on the first retry of this type.
:param int redirect:
How many redirects to perform. Limit this to avoid infinite redirect
loops.
A redirect is a HTTP response with a status code 301, 302, 303, 307 or
308.
Set to ``0`` to fail on the first retry of this type.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param iterable method_whitelist:
Set of uppercased HTTP method verbs that we should retry on.
By default, we only retry on methods which are considered to be
indempotent (multiple requests with the same parameters end with the
same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
:param iterable status_forcelist:
A set of HTTP status codes that we should force a retry on.
By default, this is disabled with ``None``.
:param float backoff_factor:
A backoff factor to apply between attempts. urllib3 will sleep for::
{backoff factor} * (2 ^ ({number of total retries} - 1))
seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer
than :attr:`Retry.MAX_BACKOFF`.
By default, backoff is disabled (set to 0).
:param bool raise_on_redirect: Whether, if the number of redirects is
exhausted, to raise a MaxRetryError, or to return a response with a
response code in the 3xx range.
"""
DEFAULT_METHOD_WHITELIST = frozenset([
'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
#: Maximum backoff time.
BACKOFF_MAX = 120
def __init__(self, total=10, connect=None, read=None, redirect=None,
method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
backoff_factor=0, raise_on_redirect=True, _observed_errors=0):
self.total = total
self.connect = connect
self.read = read
if redirect is False or total is False:
redirect = 0
raise_on_redirect = False
self.redirect = redirect
self.status_forcelist = status_forcelist or set()
self.method_whitelist = method_whitelist
self.backoff_factor = backoff_factor
self.raise_on_redirect = raise_on_redirect
self._observed_errors = _observed_errors # TODO: use .history instead?
def new(self, **kw):
params = dict(
total=self.total,
connect=self.connect, read=self.read, redirect=self.redirect,
method_whitelist=self.method_whitelist,
status_forcelist=self.status_forcelist,
backoff_factor=self.backoff_factor,
raise_on_redirect=self.raise_on_redirect,
_observed_errors=self._observed_errors,
)
params.update(kw)
return type(self)(**params)
@classmethod
def from_int(cls, retries, redirect=True, default=None):
""" Backwards-compatibility for the old retries format."""
if retries is None:
retries = default if default is not None else cls.DEFAULT
if isinstance(retries, Retry):
return retries
redirect = bool(redirect) and None
new_retries = cls(retries, redirect=redirect)
log.debug("Converted retries value: %r -> %r" % (retries, new_retries))
return new_retries
def get_backoff_time(self):
""" Formula for computing the current backoff
:rtype: float
"""
if self._observed_errors <= 1:
return 0
backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1))
return min(self.BACKOFF_MAX, backoff_value)
def sleep(self):
""" Sleep between retry attempts using an exponential backoff.
By default, the backoff factor is 0 and this method will return
immediately.
"""
backoff = self.get_backoff_time()
if backoff <= 0:
return
time.sleep(backoff)
def _is_connection_error(self, err):
""" Errors when we're fairly sure that the server did not receive the
request, so it should be safe to retry.
"""
return isinstance(err, ConnectTimeoutError)
def _is_read_error(self, err):
""" Errors that occur after the request has been started, so we can't
assume that the server did not process any of it.
"""
return isinstance(err, (ReadTimeoutError, ProtocolError))
def is_forced_retry(self, method, status_code):
""" Is this method/response retryable? (Based on method/codes whitelists)
"""
if self.method_whitelist and method.upper() not in self.method_whitelist:
return False
return self.status_forcelist and status_code in self.status_forcelist
def is_exhausted(self):
""" Are we out of retries?
"""
retry_counts = (self.total, self.connect, self.read, self.redirect)
retry_counts = list(filter(None, retry_counts))
if not retry_counts:
return False
return min(retry_counts) < 0
def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None):
""" Return a new Retry object with incremented retry counters.
:param response: A response object, or None, if the server did not
return a response.
:type response: :class:`~urllib3.response.HTTPResponse`
:param Exception error: An error encountered during the request, or
None if the response was received successfully.
:return: A new ``Retry`` object.
"""
if self.total is False and error:
# Disabled, indicate to re-raise the error.
raise six.reraise(type(error), error, _stacktrace)
total = self.total
if total is not None:
total -= 1
_observed_errors = self._observed_errors
connect = self.connect
read = self.read
redirect = self.redirect
if error and self._is_connection_error(error):
# Connect retry?
if connect is False:
raise six.reraise(type(error), error, _stacktrace)
elif connect is not None:
connect -= 1
_observed_errors += 1
elif error and self._is_read_error(error):
# Read retry?
if read is False:
raise six.reraise(type(error), error, _stacktrace)
elif read is not None:
read -= 1
_observed_errors += 1
elif response and response.get_redirect_location():
# Redirect retry?
if redirect is not None:
redirect -= 1
else:
# FIXME: Nothing changed, scenario doesn't make sense.
_observed_errors += 1
new_retry = self.new(
total=total,
connect=connect, read=read, redirect=redirect,
_observed_errors=_observed_errors)
if new_retry.is_exhausted():
raise MaxRetryError(_pool, url, error)
log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry))
return new_retry
def __repr__(self):
return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
'read={self.read}, redirect={self.redirect})').format(
cls=type(self), self=self)
# For backwards compatibility (equivalent to pre-v1.9):
Retry.DEFAULT = Retry(3)
+2 -3
View File
@@ -34,10 +34,9 @@ def assert_fingerprint(cert, fingerprint):
}
fingerprint = fingerprint.replace(':', '').lower()
digest_length, odd = divmod(len(fingerprint), 2)
digest_length, rest = divmod(len(fingerprint), 2)
if rest or digest_length not in hashfunc_map:
if odd or digest_length not in hashfunc_map:
raise SSLError('Fingerprint is of invalid length.')
# We need encode() here for py32; works on py2 and p33.
+53 -47
View File
@@ -1,32 +1,49 @@
# The default socket timeout, used by httplib to indicate that no timeout was
# specified by the user
from socket import _GLOBAL_DEFAULT_TIMEOUT
import time
from ..exceptions import TimeoutStateError
# A sentinel value to indicate that no timeout was specified by the user in
# urllib3
_Default = object()
def current_time():
"""
Retrieve the current time, this function is mocked out in unit testing.
Retrieve the current time. This function is mocked out in unit testing.
"""
return time.time()
_Default = object()
# The default timeout to use for socket connections. This is the attribute used
# by httplib to define the default timeout
class Timeout(object):
"""
Utility object for storing timeout values.
""" Timeout configuration.
Example usage:
Timeouts can be defined as a default for a pool::
.. code-block:: python
timeout = Timeout(connect=2.0, read=7.0)
http = PoolManager(timeout=timeout)
response = http.request('GET', 'http://example.com/')
timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
pool.request(...) # Etc, etc
Or per-request (which overrides the default for the pool)::
response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
Timeouts can be disabled by setting all the parameters to ``None``::
no_timeout = Timeout(connect=None, read=None)
response = http.request('GET', 'http://example.com/, timeout=no_timeout)
:param total:
This combines the connect and read timeouts into one; the read timeout
will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: integer, float, or None
:param connect:
The maximum amount of time to wait for a connection attempt to a server
@@ -47,25 +64,15 @@ class Timeout(object):
:type read: integer, float, or None
:param total:
This combines the connect and read timeouts into one; the read timeout
will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: integer, float, or None
.. note::
Many factors can affect the total amount of time for urllib3 to return
an HTTP response. Specifically, Python's DNS resolver does not obey the
timeout specified on the socket. Other factors that can affect total
request time include high CPU load, high swap, the program running at a
low priority level, or other behaviors. The observed running time for
urllib3 to return a response may be greater than the value passed to
`total`.
an HTTP response.
For example, Python's DNS resolver does not obey the timeout specified
on the socket. Other factors that can affect total request time include
high CPU load, high swap, the program running at a low priority level,
or other behaviors.
In addition, the read and total timeouts only measure the time between
read operations on the socket connecting the client and the server,
@@ -73,8 +80,8 @@ class Timeout(object):
response. For most requests, the timeout is raised because the server
has not sent the first byte in the specified time. This is not always
the case; if a server streams one byte every fifteen seconds, a timeout
of 20 seconds will not ever trigger, even though the request will
take several minutes to complete.
of 20 seconds will not trigger, even though the request will take
several minutes to complete.
If your goal is to cut off any request after a set amount of wall clock
time, consider having a second "watcher" thread to cut off a slow
@@ -94,17 +101,16 @@ class Timeout(object):
return '%s(connect=%r, read=%r, total=%r)' % (
type(self).__name__, self._connect, self._read, self.total)
@classmethod
def _validate_timeout(cls, value, name):
""" Check that a timeout attribute is valid
""" Check that a timeout attribute is valid.
:param value: The timeout value to validate
:param name: The name of the timeout attribute to validate. This is used
for clear error messages
:return: the value
:raises ValueError: if the type is not an integer or a float, or if it
is a numeric value less than zero
:param name: The name of the timeout attribute to validate. This is
used to specify in error messages.
:return: The validated and casted version of the given value.
:raises ValueError: If the type is not an integer or a float, or if it
is a numeric value less than zero.
"""
if value is _Default:
return cls.DEFAULT_TIMEOUT
@@ -123,7 +129,7 @@ class Timeout(object):
raise ValueError("Attempted to set %s timeout to %s, but the "
"timeout cannot be set to a value less "
"than 0." % (name, value))
except TypeError: # Python 3
except TypeError: # Python 3
raise ValueError("Timeout value %s was %s, but it must be an "
"int or float." % (name, value))
@@ -135,12 +141,12 @@ class Timeout(object):
The timeout value used by httplib.py sets the same timeout on the
connect(), and recv() socket requests. This creates a :class:`Timeout`
object that sets the individual timeouts to the ``timeout`` value passed
to this function.
object that sets the individual timeouts to the ``timeout`` value
passed to this function.
:param timeout: The legacy timeout value
:param timeout: The legacy timeout value.
:type timeout: integer, float, sentinel default object, or None
:return: a Timeout object
:return: Timeout object
:rtype: :class:`Timeout`
"""
return Timeout(read=timeout, connect=timeout)
@@ -174,7 +180,7 @@ class Timeout(object):
def get_connect_duration(self):
""" Gets the time elapsed since the call to :meth:`start_connect`.
:return: the elapsed time
:return: Elapsed time.
:rtype: float
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to get duration for a timer that hasn't been started.
@@ -191,7 +197,7 @@ class Timeout(object):
This will be a positive float or integer, the value None
(never timeout), or the default system timeout.
:return: the connect timeout
:return: Connect timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
"""
if self.total is None:
@@ -214,7 +220,7 @@ class Timeout(object):
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
raised.
:return: the value to use for the read timeout
:return: Value to use for the read timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
has not yet been called on this object.
@@ -223,7 +229,7 @@ class Timeout(object):
self.total is not self.DEFAULT_TIMEOUT and
self._read is not None and
self._read is not self.DEFAULT_TIMEOUT):
# in case the connect timeout has not yet been established.
# In case the connect timeout has not yet been established.
if self._start_connect is None:
return self._read
return max(0, min(self.total - self.get_connect_duration(),
+14 -5
View File
@@ -3,15 +3,20 @@ from collections import namedtuple
from ..exceptions import LocationParseError
class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
class Url(namedtuple('Url', url_attrs)):
"""
Datastructure for representing an HTTP URL. Used as a return value for
:func:`parse_url`.
"""
slots = ()
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
query=None, fragment=None):
return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
query, fragment)
@property
def hostname(self):
@@ -43,7 +48,7 @@ def split_first(s, delims):
If not found, then the first part is the full input string.
Example: ::
Example::
>>> split_first('foo/bar?baz', '?/=')
('foo', 'bar?baz', '/')
@@ -76,7 +81,7 @@ def parse_url(url):
Partly backwards-compatible with :mod:`urlparse`.
Example: ::
Example::
>>> parse_url('http://google.com/mail/')
Url(scheme='http', host='google.com', port=None, path='/', ...)
@@ -91,6 +96,10 @@ def parse_url(url):
# Additionally, this implementations does silly things to be optimal
# on CPython.
if not url:
# Empty
return Url()
scheme = None
auth = None
host = None