merge 2.0 insto master

This commit is contained in:
Kenneth Reitz
2013-09-24 14:13:28 -04:00
22 changed files with 1186 additions and 205 deletions
+1
View File
@@ -135,3 +135,4 @@ Patches and Suggestions
- David Pursehouse <david.pursehouse@gmail.com> @dpursehouse
- Jon Parise
- Alexander Karpinsky @homm86
- Marc Schlaich @schlamar
+44
View File
@@ -3,6 +3,50 @@
History
-------
2.0.0 (2013-XX-XX)
++++++++++++++++++
**API Changes:**
- Keys in the Headers dictionary are now native strings on all Python versions,
i.e. bytestrings on Python 2, unicode on Python 3.
- Proxy URLs now *must* have an explicit scheme. A ``MissingSchema`` exception
will be raised if they don't.
- ``RequestException`` is now a subclass of ``IOError``, not ``RuntimeError``.
- Added new method to ``PreparedRequest`` objects: ``PreparedRequest.copy()``.
- Added new method to ``Session`` objects: ``Session.update_request()``. This
method updates a ``Request`` object with the data (e.g. cookies) stored on
the ``Session``.
- Added new method to ``Session`` objects: ``Session.prepare_request()``. This
method updates and prepares a ``Request`` object, and returns the
corresponding ``PreparedRequest`` object.
- Added new method to ``HTTPAdapter`` objects: ``HTTPAdapter.proxy_headers()``.
This should not be called directly, but improves the subclass interface.
- ``httplib.IncompleteRead`` exceptions caused by incorrect chunked encoding
will now raise a Requests ``ChunkedEncodingError`` instead.
- Invalid percent-escape sequences now cause a Requests ``InvalidURL``
exception to be raised.
- HTTP 208 no longer uses reason phrase ``"im_used"``. Correctly uses
``"already_reported"``.
- HTTP 226 reason added (``"im_used"``).
**Bugfixes:**
- Vastly improved proxy support, including the CONNECT verb. Special thanks to
the many contributors who worked towards this improvement.
- Chunked encoding fixes.
- Support for mixed case schemes.
- Retrieve environment proxies from more locations.
- Minor cookies fixes.
- Improved streaming behaviour, particularly for compressed data.
- Miscellaneous small Python 3 text encoding bugs.
- ``.netrc`` no longer overrides explicit auth.
- Cookies set by hooks are now correctly persisted on Sessions.
- Fix problem with cookies that specify port numbers in their host field.
- ``BytesIO`` can be used to perform streaming uploads.
- More generous parsing of the ``no_proxy`` environment variable.
- Non-string objects can be passed in data values alongside files.
1.2.3 (2013-05-25)
++++++++++++++++++
+1
View File
@@ -54,6 +54,7 @@ Features
- Multipart File Uploads
- Connection Timeouts
- Thread-safety
- HTTP(S) proxy support
Installation
+1 -1
View File
@@ -38,7 +38,7 @@ Requests takes all of the work out of Python HTTP/1.1 — making your integrati
Testimonials
------------
Her Majesty's Government, Amazon, Google, Runscope, Twilio, Mozilla, Heroku, PayPal, NPR, Obama for America, Transifex, Native Instruments, The Washington Post, Twitter, SoundCloud, Kippt, Readability, and Federal US Institutions use Requests internally. It has been downloaded over 3,000,000 times from PyPI.
Her Majesty's Government, Amazon, Google, Twilio, Runscope, Mozilla, Heroku, PayPal, NPR, Obama for America, Transifex, Native Instruments, The Washington Post, Twitter, SoundCloud, Kippt, Readability, and Federal US Institutions use Requests internally. It has been downloaded over 5,000,000 times from PyPI.
**Armin Ronacher**
Requests is the perfect example how beautiful an API can be with the
+1 -1
View File
@@ -50,7 +50,7 @@ __copyright__ = 'Copyright 2013 Kenneth Reitz'
# Attempt to enable urllib3's SNI support, if possible
try:
from requests.packages.urllib3.contrib import pyopenssl
from .packages.urllib3.contrib import pyopenssl
pyopenssl.inject_into_urllib3()
except ImportError:
pass
+40 -13
View File
@@ -11,11 +11,12 @@ and maintain connections.
import socket
from .models import Response
from .packages.urllib3.poolmanager import PoolManager, ProxyManager
from .packages.urllib3.poolmanager import PoolManager, proxy_from_url
from .packages.urllib3.response import HTTPResponse
from .packages.urllib3.util import Timeout
from .compat import urlparse, basestring, urldefrag, unquote
from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers,
prepend_scheme_if_needed, get_auth_from_url)
except_on_missing_scheme, get_auth_from_url)
from .structures import CaseInsensitiveDict
from .packages.urllib3.exceptions import MaxRetryError
from .packages.urllib3.exceptions import TimeoutError
@@ -71,6 +72,7 @@ class HTTPAdapter(BaseAdapter):
pool_block=DEFAULT_POOLBLOCK):
self.max_retries = max_retries
self.config = {}
self.proxy_manager = {}
super(HTTPAdapter, self).__init__()
@@ -193,8 +195,15 @@ class HTTPAdapter(BaseAdapter):
proxy = proxies.get(urlparse(url.lower()).scheme)
if proxy:
proxy = prepend_scheme_if_needed(proxy, urlparse(url.lower()).scheme)
conn = ProxyManager(self.poolmanager.connection_from_url(proxy))
except_on_missing_scheme(proxy)
proxy_headers = self.proxy_headers(proxy)
if not proxy in self.proxy_manager:
self.proxy_manager[proxy] = proxy_from_url(
proxy,
proxy_headers=proxy_headers)
conn = self.proxy_manager[proxy].connection_from_url(url)
else:
conn = self.poolmanager.connection_from_url(url.lower())
@@ -232,8 +241,9 @@ class HTTPAdapter(BaseAdapter):
return url
def add_headers(self, request, **kwargs):
"""Add any headers needed by the connection. Currently this adds a
Proxy-Authorization header.
"""Add any headers needed by the connection. As of v2.0 this does
nothing by default, but is left for overriding by users that subclass
the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
This should not be called from user code, and is only exposed for use
when subclassing the
@@ -242,12 +252,22 @@ class HTTPAdapter(BaseAdapter):
:param request: The :class:`PreparedRequest <PreparedRequest>` to add headers to.
:param kwargs: The keyword arguments from the call to send().
"""
proxies = kwargs.get('proxies', {})
pass
if proxies is None:
proxies = {}
def proxy_headers(self, proxy):
"""Returns a dictionary of the headers to add to any request sent
through a proxy. This works with urllib3 magic to ensure that they are
correctly sent to the proxy, rather than in a tunnelled request if
CONNECT is being used.
proxy = proxies.get(urlparse(request.url).scheme)
This should not be called from user code, and is only exposed for use
when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param proxies: The url of the proxy being used for this request.
:param kwargs: Optional additional keyword arguments.
"""
headers = {}
username, password = get_auth_from_url(proxy)
if username and password:
@@ -255,8 +275,10 @@ class HTTPAdapter(BaseAdapter):
# to decode them.
username = unquote(username)
password = unquote(password)
request.headers['Proxy-Authorization'] = _basic_auth_str(username,
password)
headers['Proxy-Authorization'] = _basic_auth_str(username,
password)
return headers
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None):
"""Sends PreparedRequest object. Returns Response object.
@@ -273,10 +295,15 @@ class HTTPAdapter(BaseAdapter):
self.cert_verify(conn, request.url, verify, cert)
url = self.request_url(request, proxies)
self.add_headers(request, proxies=proxies)
self.add_headers(request)
chunked = not (request.body is None or 'Content-Length' in request.headers)
if stream:
timeout = Timeout(connect=timeout)
else:
timeout = Timeout(connect=timeout, read=timeout)
try:
if not chunked:
resp = conn.urlopen(
+9 -6
View File
@@ -18,7 +18,6 @@ from base64 import b64encode
from .compat import urlparse, str
from .utils import parse_dict_header
log = logging.getLogger(__name__)
CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded'
@@ -106,7 +105,9 @@ class HTTPDigestAuth(AuthBase):
A1 = '%s:%s:%s' % (self.username, realm, self.password)
A2 = '%s:%s' % (method, path)
if qop == 'auth':
if qop is None:
respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2)))
elif qop == 'auth' or 'auth' in qop.split(','):
if nonce == self.last_nonce:
self.nonce_count += 1
else:
@@ -121,8 +122,6 @@ class HTTPDigestAuth(AuthBase):
cnonce = (hashlib.sha1(s).hexdigest()[:16])
noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, hash_utf8(A2))
respdig = KD(hash_utf8(A1), noncebit)
elif qop is None:
respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2)))
else:
# XXX handle auth-int.
return None
@@ -159,10 +158,14 @@ class HTTPDigestAuth(AuthBase):
# to allow our new request to reuse the same one.
r.content
r.raw.release_conn()
prep = r.request.copy()
prep.prepare_cookies(r.cookies)
r.request.headers['Authorization'] = self.build_digest_header(r.request.method, r.request.url)
_r = r.connection.send(r.request, **kwargs)
prep.headers['Authorization'] = self.build_digest_header(
prep.method, prep.url)
_r = r.connection.send(prep, **kwargs)
_r.history.append(r)
_r.request = prep
return _r
+1 -1
View File
@@ -9,7 +9,7 @@ This module contains the set of Requests' exceptions.
"""
class RequestException(RuntimeError):
class RequestException(IOError):
"""There was an ambiguous exception that occurred while handling your
request."""
+13 -7
View File
@@ -11,7 +11,7 @@ import collections
import logging
import datetime
from io import BytesIO
from io import BytesIO, UnsupportedOperation
from .hooks import default_hooks
from .structures import CaseInsensitiveDict
@@ -25,7 +25,7 @@ from .exceptions import (
from .utils import (
guess_filename, get_auth_from_url, requote_uri,
stream_decode_response_unicode, to_key_val_list, parse_header_links,
iter_slices, guess_json_utf, super_len)
iter_slices, guess_json_utf, super_len, to_native_string)
from .compat import (
cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO,
is_py2, chardet, json, builtin_str, basestring, IncompleteRead)
@@ -108,6 +108,10 @@ class RequestEncodingMixin(object):
val = [val]
for v in val:
if v is not None:
# Don't call str() on bytestrings: in Py3 it all goes wrong.
if not isinstance(v, bytes):
v = str(v)
new_fields.append(
(field.decode('utf-8') if isinstance(field, bytes) else field,
v.encode('utf-8') if isinstance(v, str) else v))
@@ -143,6 +147,9 @@ class RequestHooksMixin(object):
def register_hook(self, event, hook):
"""Properly register a hook."""
if event not in self.hooks:
raise ValueError('Unsupported event specified, with event name "%s"' % (event))
if isinstance(hook, collections.Callable):
self.hooks[event].append(hook)
elif hasattr(hook, '__iter__'):
@@ -188,8 +195,8 @@ class Request(RequestHooksMixin):
url=None,
headers=None,
files=None,
data=dict(),
params=dict(),
data=None,
params=None,
auth=None,
cookies=None,
hooks=None):
@@ -363,8 +370,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
"""Prepares the given HTTP headers."""
if headers:
headers = dict((name.encode('ascii'), value) for name, value in headers.items())
self.headers = CaseInsensitiveDict(headers)
self.headers = CaseInsensitiveDict((to_native_string(name), value) for name, value in headers.items())
else:
self.headers = CaseInsensitiveDict()
@@ -388,7 +394,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
try:
length = super_len(data)
except (TypeError, AttributeError):
except (TypeError, AttributeError, UnsupportedOperation):
length = None
if is_stream:
+1 -1
View File
@@ -23,7 +23,7 @@ from . import exceptions
from .filepost import encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
from .util import make_headers, get_host
from .util import make_headers, get_host, Timeout
# Set default logging handler to avoid "No handler found" warnings.
+209 -62
View File
@@ -4,12 +4,11 @@
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import logging
import socket
import errno
import logging
from socket import error as SocketError, timeout as SocketTimeout
from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint
import socket
try: # Python 3
from http.client import HTTPConnection, HTTPException
@@ -22,6 +21,7 @@ try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
import Queue as _ # Platform-specific: Windows
try: # Compiled with SSL?
@@ -44,21 +44,29 @@ except (ImportError, AttributeError): # Platform-specific: No SSL.
pass
from .request import RequestMethods
from .response import HTTPResponse
from .util import get_host, is_connection_dropped, ssl_wrap_socket
from .exceptions import (
ClosedPoolError,
ConnectTimeoutError,
EmptyPoolError,
HostChangedError,
MaxRetryError,
SSLError,
TimeoutError,
ReadTimeoutError,
ProxyError,
)
from .packages.ssl_match_hostname import match_hostname, CertificateError
from .packages.ssl_match_hostname import CertificateError, match_hostname
from .packages import six
from .request import RequestMethods
from .response import HTTPResponse
from .util import (
assert_fingerprint,
get_host,
is_connection_dropped,
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
Timeout,
)
xrange = six.moves.xrange
@@ -96,11 +104,24 @@ class VerifiedHTTPSConnection(HTTPSConnection):
def connect(self):
# Add certificate verification
sock = socket.create_connection((self.host, self.port), self.timeout)
try:
sock = socket.create_connection(
address=(self.host, self.port),
timeout=self.timeout)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
if self._tunnel_host:
self.sock = sock
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
@@ -117,6 +138,7 @@ class VerifiedHTTPSConnection(HTTPSConnection):
match_hostname(self.sock.getpeercert(),
self.assert_hostname or self.host)
## Pool objects
class ConnectionPool(object):
@@ -129,6 +151,9 @@ class ConnectionPool(object):
QueueCls = LifoQueue
def __init__(self, host, port=None):
# httplib doesn't like it when we include brackets in ipv6 addresses
host = host.strip('[]')
self.host = host
self.port = port
@@ -136,6 +161,8 @@ class ConnectionPool(object):
return '%s(host=%r, port=%r)' % (type(self).__name__,
self.host, self.port)
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
@@ -154,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
as a valid HTTP/1.0 or 1.1 status line, passed into
:class:`httplib.HTTPConnection`.
.. note::
Only works in Python 2. This parameter is ignored in Python 3.
:param timeout:
Socket timeout in seconds for each individual connection, can be
a float. None disables timeout.
Socket timeout in seconds for each individual connection. This can
be a float or integer, which sets the timeout for the HTTP request,
or an instance of :class:`urllib3.util.Timeout` which gives you more
fine-grained control over request timeouts. After the constructor has
been parsed, this is always a `urllib3.util.Timeout` object.
:param maxsize:
Number of connections to save that can be reused. More than 1 is useful
@@ -174,20 +207,39 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.
:param _proxy:
Parsed proxy URL, should not be used directly, instead, see
:class:`urllib3.connectionpool.ProxyManager`"
:param _proxy_headers:
A dictionary with proxy headers, should not be used directly,
instead, see :class:`urllib3.connectionpool.ProxyManager`"
"""
scheme = 'http'
def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
block=False, headers=None):
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
headers=None, _proxy=None, _proxy_headers=None):
ConnectionPool.__init__(self, host, port)
RequestMethods.__init__(self, headers)
self.strict = strict
# This is for backwards compatibility and can be removed once a timeout
# can only be set to a Timeout object
if not isinstance(timeout, Timeout):
timeout = Timeout.from_float(timeout)
self.timeout = timeout
self.pool = self.QueueCls(maxsize)
self.block = block
self.proxy = _proxy
self.proxy_headers = _proxy_headers or {}
# Fill the queue up so that doing get() on it will block properly
for _ in xrange(maxsize):
self.pool.put(None)
@@ -203,9 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
return HTTPConnection(host=self.host,
port=self.port,
strict=self.strict)
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
return HTTPConnection(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
**extra_params)
def _get_conn(self, timeout=None):
"""
@@ -266,31 +323,89 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
% self.host)
# Connection never got put back into the pool, close it.
conn.close()
if conn:
conn.close()
def _get_timeout(self, timeout):
""" Helper that always returns a :class:`urllib3.util.Timeout` """
if timeout is _Default:
return self.timeout.clone()
if isinstance(timeout, Timeout):
return timeout.clone()
else:
# User passed us an int/float. This is for backwards compatibility,
# can be removed later
return Timeout.from_float(timeout)
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
Perform a request on a given httplib connection object taken from our
pool.
:param conn:
a connection from one of our connection pools
:param timeout:
Socket timeout in seconds for the request. This can be a
float or integer, which will set the same timeout value for
the socket connect and the socket read, or an instance of
:class:`urllib3.util.Timeout`, which gives you more fine-grained
control over your timeouts.
"""
self.num_requests += 1
if timeout is _Default:
timeout = self.timeout
timeout_obj = self._get_timeout(timeout)
conn.timeout = timeout # This only does anything in Py26+
conn.request(method, url, **httplib_request_kw)
try:
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
# conn.request() calls httplib.*.request, not the method in
# request.py. It also calls makefile (recv) on the socket
conn.request(method, url, **httplib_request_kw)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, timeout_obj.connect_timeout))
# Set timeout
sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr.
if sock:
sock.settimeout(timeout)
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
log.debug("Setting read timeout to %s" % read_timeout)
# App Engine doesn't have a sock attr
if hasattr(conn, 'sock') and \
read_timeout is not None and \
read_timeout is not Timeout.DEFAULT_TIMEOUT:
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
# the exception and assuming all BadStatusLine exceptions are read
# timeouts, check for a zero timeout before making the request.
if read_timeout == 0:
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
try: # Python 2.7+, use buffering of HTTP responses
httplib_response = conn.getresponse(buffering=True)
except TypeError: # Python 2.6 and older
httplib_response = conn.getresponse()
except SocketTimeout:
raise ReadTimeoutError(
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
if e.errno in _blocking_errnos:
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
raise
try: # Python 2.7+, use buffering of HTTP responses
httplib_response = conn.getresponse(buffering=True)
except TypeError: # Python 2.6 and older
httplib_response = conn.getresponse()
# AppEngine doesn't have a version attr.
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
@@ -370,7 +485,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
303, 307). Each redirect counts as a retry.
303, 307, 308). Each redirect counts as a retry.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
@@ -378,8 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
use the pool on an HTTP proxy and request foreign hosts.
:param timeout:
If specified, overrides the default timeout for this one request.
It may be a float (in seconds).
If specified, overrides the default timeout for this one
request. It may be a float (in seconds) or an instance of
:class:`urllib3.util.Timeout`.
:param pool_timeout:
If set and the pool is set to block=True, then this method will
@@ -406,9 +522,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if retries < 0:
raise MaxRetryError(self, url)
if timeout is _Default:
timeout = self.timeout
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
@@ -444,20 +557,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# ``response.release_conn()`` is called (implicitly by
# ``response.read()``)
except Empty as e:
except Empty:
# Timed out by queue
raise TimeoutError(self, url,
"Request timed out. (pool_timeout=%s)" %
pool_timeout)
raise ReadTimeoutError(
self, url, "Read timed out, no pool connections are available.")
except SocketTimeout as e:
except SocketTimeout:
# Timed out by socket
raise TimeoutError(self, url,
"Request timed out. (timeout=%s)" %
timeout)
raise ReadTimeoutError(self, url, "Read timed out.")
except BaseSSLError as e:
# SSL certificate error
if 'timed out' in str(e) or \
'did not complete (read)' in str(e): # Platform-specific: Python 2.6
raise ReadTimeoutError(self, url, "Read timed out.")
raise SSLError(e)
except CertificateError as e:
@@ -465,6 +578,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise SSLError(e)
except (HTTPException, SocketError) as e:
if isinstance(e, SocketError) and self.proxy is not None:
raise ProxyError('Cannot connect to proxy. '
'Socket error: %s.' % e)
# Connection broken, discard. It will be replaced next _get_conn().
conn = None
# This is necessary so we can access e below
@@ -526,13 +643,13 @@ class HTTPSConnectionPool(HTTPConnectionPool):
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
_proxy=None, _proxy_headers=None,
key_file=None, cert_file=None, cert_reqs=None,
ca_certs=None, ssl_version=None,
assert_hostname=None, assert_fingerprint=None):
HTTPConnectionPool.__init__(self, host, port,
strict, timeout, maxsize,
block, headers)
HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
block, headers, _proxy, _proxy_headers)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
@@ -541,6 +658,34 @@ class HTTPSConnectionPool(HTTPConnectionPool):
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
def _prepare_conn(self, connection):
"""
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
and establish the tunnel if proxy is used.
"""
if isinstance(connection, VerifiedHTTPSConnection):
connection.set_cert(key_file=self.key_file,
cert_file=self.cert_file,
cert_reqs=self.cert_reqs,
ca_certs=self.ca_certs,
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)
connection.ssl_version = self.ssl_version
if self.proxy is not None:
# Python 2.7+
try:
set_tunnel = connection.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
set_tunnel = connection._set_tunnel
set_tunnel(self.host, self.port, self.proxy_headers)
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
connection.connect()
return connection
def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPSConnection`.
@@ -549,26 +694,28 @@ class HTTPSConnectionPool(HTTPConnectionPool):
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
actual_host = self.host
actual_port = self.port
if self.proxy is not None:
actual_host = self.proxy.host
actual_port = self.proxy.port
if not ssl: # Platform-specific: Python compiled without +ssl
if not HTTPSConnection or HTTPSConnection is object:
raise SSLError("Can't connect to HTTPS URL because the SSL "
"module is not available.")
connection_class = HTTPSConnection
else:
connection_class = VerifiedHTTPSConnection
return HTTPSConnection(host=self.host,
port=self.port,
strict=self.strict)
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
connection = connection_class(host=actual_host, port=actual_port,
timeout=self.timeout.connect_timeout,
**extra_params)
connection = VerifiedHTTPSConnection(host=self.host,
port=self.port,
strict=self.strict)
connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
cert_reqs=self.cert_reqs, ca_certs=self.ca_certs,
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)
connection.ssl_version = self.ssl_version
return connection
return self._prepare_conn(connection)
def connection_from_url(url, **kw):
+179 -8
View File
@@ -20,13 +20,13 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
'''
from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification,
SUBJ_ALT_NAME_SUPPORT)
from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
from ndg.httpsclient.subj_alt_name import SubjectAltName
import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
from socket import _fileobject
import ssl
from cStringIO import StringIO
from .. import connectionpool
from .. import util
@@ -99,6 +99,172 @@ def get_subj_alt_name(peer_cert):
return dns_name
class fileobject(_fileobject):
def read(self, size=-1):
# Use max, disallow tiny reads in a loop as they are very inefficient.
# We never leave read() with any leftover data from a new recv() call
# in our internal buffer.
rbufsize = max(self._rbufsize, self.default_bufsize)
# Our use of StringIO rather than lists of string objects returned by
# recv() minimizes memory usage and fragmentation that occurs when
# rbufsize is large compared to the typical return value of recv().
buf = self._rbuf
buf.seek(0, 2) # seek end
if size < 0:
# Read until EOF
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(rbufsize)
except OpenSSL.SSL.WantReadError:
continue
if not data:
break
buf.write(data)
return buf.getvalue()
else:
# Read until size bytes or EOF seen, whichever comes first
buf_len = buf.tell()
if buf_len >= size:
# Already have size bytes in our buffer? Extract and return.
buf.seek(0)
rv = buf.read(size)
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return rv
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
left = size - buf_len
# recv() will malloc the amount of memory given as its
# parameter even though it often returns much less data
# than that. The returned data string is short lived
# as we copy it into a StringIO and free it. This avoids
# fragmentation issues on many platforms.
try:
data = self._sock.recv(left)
except OpenSSL.SSL.WantReadError:
continue
if not data:
break
n = len(data)
if n == size and not buf_len:
# Shortcut. Avoid buffer data copies when:
# - We have no data in our buffer.
# AND
# - Our call to recv returned exactly the
# number of bytes we were asked to read.
return data
if n == left:
buf.write(data)
del data # explicit free
break
assert n <= left, "recv(%d) returned %d bytes" % (left, n)
buf.write(data)
buf_len += n
del data # explicit free
#assert buf_len == buf.tell()
return buf.getvalue()
def readline(self, size=-1):
buf = self._rbuf
buf.seek(0, 2) # seek end
if buf.tell() > 0:
# check if we already have it in our buffer
buf.seek(0)
bline = buf.readline(size)
if bline.endswith('\n') or len(bline) == size:
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return bline
del bline
if size < 0:
# Read until \n or EOF, whichever comes first
if self._rbufsize <= 1:
# Speed up unbuffered case
buf.seek(0)
buffers = [buf.read()]
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
data = None
recv = self._sock.recv
while True:
try:
while data != "\n":
data = recv(1)
if not data:
break
buffers.append(data)
except OpenSSL.SSL.WantReadError:
continue
break
return "".join(buffers)
buf.seek(0, 2) # seek end
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
continue
if not data:
break
nl = data.find('\n')
if nl >= 0:
nl += 1
buf.write(data[:nl])
self._rbuf.write(data[nl:])
del data
break
buf.write(data)
return buf.getvalue()
else:
# Read until size bytes or \n or EOF seen, whichever comes first
buf.seek(0, 2) # seek end
buf_len = buf.tell()
if buf_len >= size:
buf.seek(0)
rv = buf.read(size)
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return rv
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
continue
if not data:
break
left = size - buf_len
# did we just receive a newline?
nl = data.find('\n', 0, left)
if nl >= 0:
nl += 1
# save the excess data to _rbuf
self._rbuf.write(data[nl:])
if buf_len:
buf.write(data[:nl])
break
else:
# Shortcut. Avoid data copy through buf when returning
# a substring of our first recv().
return data[:nl]
n = len(data)
if n == size and not buf_len:
# Shortcut. Avoid data copy through buf when
# returning exactly all of our first recv().
return data
if n >= left:
buf.write(data[:left])
self._rbuf.write(data[left:])
break
buf.write(data)
buf_len += n
#assert buf_len == buf.tell()
return buf.getvalue()
class WrappedSocket(object):
'''API-compatibility wrapper for Python OpenSSL's Connection-class.'''
@@ -110,7 +276,7 @@ class WrappedSocket(object):
return self.socket.fileno()
def makefile(self, mode, bufsize=-1):
return _fileobject(self.connection, mode, bufsize)
return fileobject(self.connection, mode, bufsize)
def settimeout(self, timeout):
return self.socket.settimeout(timeout)
@@ -123,8 +289,9 @@ class WrappedSocket(object):
def getpeercert(self, binary_form=False):
x509 = self.connection.get_peer_certificate()
if not x509:
raise ssl.SSLError('')
return x509
if binary_form:
return OpenSSL.crypto.dump_certificate(
@@ -165,9 +332,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
cnx = OpenSSL.SSL.Connection(ctx, sock)
cnx.set_tlsext_host_name(server_hostname)
cnx.set_connect_state()
try:
cnx.do_handshake()
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad handshake', e)
while True:
try:
cnx.do_handshake()
except OpenSSL.SSL.WantReadError:
continue
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad handshake', e)
break
return WrappedSocket(cnx, sock)
+28 -2
View File
@@ -39,6 +39,11 @@ class SSLError(HTTPError):
pass
class ProxyError(HTTPError):
"Raised when the connection to a proxy fails."
pass
class DecodeError(HTTPError):
"Raised when automatic decoding based on Content-Type fails."
pass
@@ -70,8 +75,29 @@ class HostChangedError(RequestError):
self.retries = retries
class TimeoutError(RequestError):
"Raised when a socket timeout occurs."
class TimeoutStateError(HTTPError):
""" Raised when passing an invalid state to a timeout """
pass
class TimeoutError(HTTPError):
""" Raised when a socket timeout error occurs.
Catching this error will catch both :exc:`ReadTimeoutErrors
<ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
"""
pass
class ReadTimeoutError(TimeoutError, RequestError):
"Raised when a socket timeout occurs while receiving data from a server"
pass
# This timeout error does not have a URL attached and needs to inherit from the
# base HTTPError
class ConnectTimeoutError(TimeoutError):
"Raised when a socket timeout occurs while connecting to a server"
pass
+177
View File
@@ -0,0 +1,177 @@
# urllib3/fields.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import email.utils
import mimetypes
from .packages import six
def guess_content_type(filename, default='application/octet-stream'):
"""
Guess the "Content-Type" of a file.
:param filename:
The filename to guess the "Content-Type" of using :mod:`mimetimes`.
:param default:
If no "Content-Type" can be guessed, default to `default`.
"""
if filename:
return mimetypes.guess_type(filename)[0] or default
return default
def format_header_param(name, value):
"""
Helper function to format and quote a single header parameter.
Particularly useful for header parameters which might contain
non-ASCII values, like file names. This follows RFC 2231, as
suggested by RFC 2388 Section 4.4.
:param name:
The name of the parameter, a string expected to be ASCII only.
:param value:
The value of the parameter, provided as a unicode string.
"""
if not any(ch in value for ch in '"\\\r\n'):
result = '%s="%s"' % (name, value)
try:
result.encode('ascii')
except UnicodeEncodeError:
pass
else:
return result
if not six.PY3: # Python 2:
value = value.encode('utf-8')
value = email.utils.encode_rfc2231(value, 'utf-8')
value = '%s*=%s' % (name, value)
return value
class RequestField(object):
"""
A data container for request body parameters.
:param name:
The name of this request field.
:param data:
The data/value body.
:param filename:
An optional filename of the request field.
:param headers:
An optional dict-like object of headers to initially use for the field.
"""
def __init__(self, name, data, filename=None, headers=None):
self._name = name
self._filename = filename
self.data = data
self.headers = {}
if headers:
self.headers = dict(headers)
@classmethod
def from_tuples(cls, fieldname, value):
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
Supports constructing :class:`~urllib3.fields.RequestField` from parameter
of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type)
tuple where the MIME type is optional. For example: ::
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'realfile': ('barfile.txt', open('realfile').read()),
'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
'nonamefile': 'contents of nonamefile field',
Field names and filenames must be unicode.
"""
if isinstance(value, tuple):
if len(value) == 3:
filename, data, content_type = value
else:
filename, data = value
content_type = guess_content_type(filename)
else:
filename = None
content_type = None
data = value
request_param = cls(fieldname, data, filename=filename)
request_param.make_multipart(content_type=content_type)
return request_param
def _render_part(self, name, value):
"""
Overridable helper function to format a single header parameter.
:param name:
The name of the parameter, a string expected to be ASCII only.
:param value:
The value of the parameter, provided as a unicode string.
"""
return format_header_param(name, value)
def _render_parts(self, header_parts):
"""
Helper function to format and quote a single header.
Useful for single headers that are composed of multiple items. E.g.,
'Content-Disposition' fields.
:param header_parts:
A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as
`k1="v1"; k2="v2"; ...`.
"""
parts = []
iterable = header_parts
if isinstance(header_parts, dict):
iterable = header_parts.items()
for name, value in iterable:
if value:
parts.append(self._render_part(name, value))
return '; '.join(parts)
def render_headers(self):
"""
Renders the headers for this request field.
"""
lines = []
sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location']
for sort_key in sort_keys:
if self.headers.get(sort_key, False):
lines.append('%s: %s' % (sort_key, self.headers[sort_key]))
for header_name, header_value in self.headers.items():
if header_name not in sort_keys:
if header_value:
lines.append('%s: %s' % (header_name, header_value))
lines.append('\r\n')
return '\r\n'.join(lines)
def make_multipart(self, content_disposition=None, content_type=None, content_location=None):
"""
Makes this request field into a multipart request field.
This method overrides "Content-Disposition", "Content-Type" and
"Content-Location" headers to the request parameter.
:param content_type:
The 'Content-Type' of the request body.
:param content_location:
The 'Content-Location' of the request body.
"""
self.headers['Content-Disposition'] = content_disposition or 'form-data'
self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))])
self.headers['Content-Type'] = content_type
self.headers['Content-Location'] = content_location
+30 -27
View File
@@ -12,6 +12,7 @@ from io import BytesIO
from .packages import six
from .packages.six import b
from .fields import RequestField
writer = codecs.lookup('utf-8')[3]
@@ -23,15 +24,38 @@ def choose_boundary():
return uuid4().hex
def get_content_type(filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
def iter_field_objects(fields):
"""
Iterate over fields.
Supports list of (k, v) tuples and dicts, and lists of
:class:`~urllib3.fields.RequestField`.
"""
if isinstance(fields, dict):
i = six.iteritems(fields)
else:
i = iter(fields)
for field in i:
if isinstance(field, RequestField):
yield field
else:
yield RequestField.from_tuples(*field)
def iter_fields(fields):
"""
Iterate over fields.
.. deprecated ::
The addition of `~urllib3.fields.RequestField` makes this function
obsolete. Instead, use :func:`iter_field_objects`, which returns
`~urllib3.fields.RequestField` objects, instead.
Supports list of (k, v) tuples and dicts.
"""
if isinstance(fields, dict):
return ((k, v) for k, v in six.iteritems(fields))
@@ -44,15 +68,7 @@ def encode_multipart_formdata(fields, boundary=None):
Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
:param fields:
Dictionary of fields or list of (key, value) or (key, value, MIME type)
field tuples. The key is treated as the field name, and the value as
the body of the form-data bytes. If the value is a tuple of two
elements, then the first element is treated as the filename of the
form-data section and a suitable MIME type is guessed based on the
filename. If the value is a tuple of three elements, then the third
element is treated as an explicit MIME type of the form-data section.
Field names and filenames must be unicode.
Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
:param boundary:
If not specified, then a random boundary will be generated using
@@ -62,24 +78,11 @@ def encode_multipart_formdata(fields, boundary=None):
if boundary is None:
boundary = choose_boundary()
for fieldname, value in iter_fields(fields):
for field in iter_field_objects(fields):
body.write(b('--%s\r\n' % (boundary)))
if isinstance(value, tuple):
if len(value) == 3:
filename, data, content_type = value
else:
filename, data = value
content_type = get_content_type(filename)
writer(body).write('Content-Disposition: form-data; name="%s"; '
'filename="%s"\r\n' % (fieldname, filename))
body.write(b('Content-Type: %s\r\n\r\n' %
(content_type,)))
else:
data = value
writer(body).write('Content-Disposition: form-data; name="%s"\r\n'
% (fieldname))
body.write(b'\r\n')
writer(body).write(field.render_headers())
data = field.data
if isinstance(data, int):
data = str(data) # Backwards compatibility
@@ -7,23 +7,60 @@ __version__ = '3.2.2'
class CertificateError(ValueError):
pass
def _dnsname_to_pat(dn):
def _dnsname_match(dn, hostname, max_wildcards=1):
"""Matching according to RFC 6125, section 6.4.3
http://tools.ietf.org/html/rfc6125#section-6.4.3
"""
pats = []
for frag in dn.split(r'.'):
if frag == '*':
# When '*' is a fragment by itself, it matches a non-empty dotless
# fragment.
pats.append('[^.]+')
else:
# Otherwise, '*' matches any dotless fragment.
frag = re.escape(frag)
pats.append(frag.replace(r'\*', '[^.]*'))
return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
if not dn:
return False
parts = dn.split(r'.')
leftmost = parts[0]
wildcards = leftmost.count('*')
if wildcards > max_wildcards:
# Issue #17980: avoid denials of service by refusing more
# than one wildcard per fragment. A survery of established
# policy among SSL implementations showed it to be a
# reasonable choice.
raise CertificateError(
"too many wildcards in certificate DNS name: " + repr(dn))
# speed up common case w/o wildcards
if not wildcards:
return dn.lower() == hostname.lower()
# RFC 6125, section 6.4.3, subitem 1.
# The client SHOULD NOT attempt to match a presented identifier in which
# the wildcard character comprises a label other than the left-most label.
if leftmost == '*':
# When '*' is a fragment by itself, it matches a non-empty dotless
# fragment.
pats.append('[^.]+')
elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
# RFC 6125, section 6.4.3, subitem 3.
# The client SHOULD NOT attempt to match a presented identifier
# where the wildcard character is embedded within an A-label or
# U-label of an internationalized domain name.
pats.append(re.escape(leftmost))
else:
# Otherwise, '*' matches any dotless string, e.g. www*
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
# add the remaining fragments, ignore any wildcards
for frag in parts[1:]:
pats.append(re.escape(frag))
pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
return pat.match(hostname)
def match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules
are mostly followed, but IP addresses are not accepted for *hostname*.
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
rules are followed, but IP addresses are not accepted for *hostname*.
CertificateError is raised on failure. On success, the function
returns nothing.
@@ -34,7 +71,7 @@ def match_hostname(cert, hostname):
san = cert.get('subjectAltName', ())
for key, value in san:
if key == 'DNS':
if _dnsname_to_pat(value).match(hostname):
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if not dnsnames:
@@ -45,7 +82,7 @@ def match_hostname(cert, hostname):
# XXX according to RFC 2818, the most specific Common Name
# must be used.
if key == 'commonName':
if _dnsname_to_pat(value).match(hostname):
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if len(dnsnames) > 1:
+81 -24
View File
@@ -13,7 +13,7 @@ except ImportError:
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import connection_from_url, port_by_scheme
from .connectionpool import port_by_scheme
from .request import RequestMethods
from .util import parse_url
@@ -60,6 +60,8 @@ class PoolManager(RequestMethods):
"""
proxy = None
def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
RequestMethods.__init__(self, headers)
self.connection_pool_kw = connection_pool_kw
@@ -99,21 +101,23 @@ class PoolManager(RequestMethods):
If ``port`` isn't given, it will be derived from the ``scheme`` using
``urllib3.connectionpool.port_by_scheme``.
"""
scheme = scheme or 'http'
port = port or port_by_scheme.get(scheme, 80)
pool_key = (scheme, host, port)
with self.pools.lock:
# If the scheme, host, or port doesn't match existing open connections,
# open a new ConnectionPool.
pool = self.pools.get(pool_key)
if pool:
return pool
# If the scheme, host, or port doesn't match existing open
# connections, open a new ConnectionPool.
pool = self.pools.get(pool_key)
if pool:
return pool
# Make a fresh ConnectionPool of the desired type
pool = self._new_pool(scheme, host, port)
self.pools[pool_key] = pool
# Make a fresh ConnectionPool of the desired type
pool = self._new_pool(scheme, host, port)
self.pools[pool_key] = pool
return pool
def connection_from_url(self, url):
@@ -145,7 +149,10 @@ class PoolManager(RequestMethods):
if 'headers' not in kw:
kw['headers'] = self.headers
response = conn.urlopen(method, u.request_uri, **kw)
if self.proxy is not None and u.scheme == "http":
response = conn.urlopen(method, url, **kw)
else:
response = conn.urlopen(method, u.request_uri, **kw)
redirect_location = redirect and response.get_redirect_location()
if not redirect_location:
@@ -164,15 +171,59 @@ class PoolManager(RequestMethods):
return self.urlopen(method, redirect_location, **kw)
class ProxyManager(RequestMethods):
class ProxyManager(PoolManager):
"""
Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method
will make requests to any url through the defined proxy. The ProxyManager
class will automatically set the 'Host' header if it is not provided.
Behaves just like :class:`PoolManager`, but sends all requests through
the defined proxy, using the CONNECT method for HTTPS URLs.
:param poxy_url:
The URL of the proxy to be used.
:param proxy_headers:
A dictionary contaning headers that will be sent to the proxy. In case
of HTTP they are being sent with each request, while in the
HTTPS/CONNECT case they are sent only once. Could be used for proxy
authentication.
Example:
>>> proxy = urllib3.ProxyManager('http://localhost:3128/')
>>> r1 = proxy.request('GET', 'http://google.com/')
>>> r2 = proxy.request('GET', 'http://httpbin.org/')
>>> len(proxy.pools)
1
>>> r3 = proxy.request('GET', 'https://httpbin.org/')
>>> r4 = proxy.request('GET', 'https://twitter.com/')
>>> len(proxy.pools)
3
"""
def __init__(self, proxy_pool):
self.proxy_pool = proxy_pool
def __init__(self, proxy_url, num_pools=10, headers=None,
proxy_headers=None, **connection_pool_kw):
if isinstance(proxy_url, HTTPConnectionPool):
proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
proxy_url.port)
proxy = parse_url(proxy_url)
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
assert self.proxy.scheme in ("http", "https"), \
'Not supported proxy scheme %s' % self.proxy.scheme
connection_pool_kw['_proxy'] = self.proxy
connection_pool_kw['_proxy_headers'] = self.proxy_headers
super(ProxyManager, self).__init__(
num_pools, headers, **connection_pool_kw)
def connection_from_host(self, host, port=None, scheme='http'):
if scheme == "https":
return super(ProxyManager, self).connection_from_host(
host, port, scheme)
return super(ProxyManager, self).connection_from_host(
self.proxy.host, self.proxy.port, self.proxy.scheme)
def _set_proxy_headers(self, url, headers=None):
"""
@@ -187,16 +238,22 @@ class ProxyManager(RequestMethods):
if headers:
headers_.update(headers)
return headers_
def urlopen(self, method, url, **kw):
def urlopen(self, method, url, redirect=True, **kw):
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
kw['assert_same_host'] = False
kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers'))
return self.proxy_pool.urlopen(method, url, **kw)
u = parse_url(url)
if u.scheme == "http":
# It's too late to set proxy headers on per-request basis for
# tunnelled HTTPS connections, should use
# constructor's proxy_headers instead.
kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
self.headers))
kw['headers'].update(self.proxy_headers)
return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
def proxy_from_url(url, **pool_kw):
proxy_pool = connection_from_url(url, **pool_kw)
return ProxyManager(proxy_pool)
def proxy_from_url(url, **kw):
return ProxyManager(proxy_url=url, **kw)
+3 -2
View File
@@ -74,6 +74,7 @@ class HTTPResponse(io.IOBase):
"""
CONTENT_DECODERS = ['gzip', 'deflate']
REDIRECT_STATUSES = [301, 302, 303, 307, 308]
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
@@ -107,7 +108,7 @@ class HTTPResponse(io.IOBase):
code and valid location. ``None`` if redirect status and no
location. ``False`` if not a redirect status code.
"""
if self.status in [301, 302, 303, 307]:
if self.status in self.REDIRECT_STATUSES:
return self.headers.get('location')
return False
@@ -191,7 +192,7 @@ class HTTPResponse(io.IOBase):
"failed to decode it." % content_encoding,
e)
if flush_decoder and self._decoder:
if flush_decoder and decode_content and self._decoder:
buf = self._decoder.decompress(binary_type())
data += buf + self._decoder.flush()
+231 -4
View File
@@ -6,10 +6,11 @@
from base64 import b64encode
from collections import namedtuple
from socket import error as SocketError
from hashlib import md5, sha1
from binascii import hexlify, unhexlify
from collections import namedtuple
from hashlib import md5, sha1
from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT
import time
try:
from select import poll, POLLIN
@@ -32,7 +33,233 @@ except ImportError:
pass
from .packages import six
from .exceptions import LocationParseError, SSLError
from .exceptions import LocationParseError, SSLError, TimeoutStateError
_Default = object()
# The default timeout to use for socket connections. This is the attribute used
# by httplib to define the default timeout
def current_time():
"""
Retrieve the current time, this function is mocked out in unit testing.
"""
return time.time()
class Timeout(object):
"""
Utility object for storing timeout values.
Example usage:
.. code-block:: python
timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
pool.request(...) # Etc, etc
:param connect:
The maximum amount of time to wait for a connection attempt to a server
to succeed. Omitting the parameter will default the connect timeout to
the system default, probably `the global default timeout in socket.py
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
None will set an infinite timeout for connection attempts.
:type connect: integer, float, or None
:param read:
The maximum amount of time to wait between consecutive
read operations for a response from the server. Omitting
the parameter will default the read timeout to the system
default, probably `the global default timeout in socket.py
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
None will set an infinite timeout.
:type read: integer, float, or None
:param total:
The maximum amount of time to wait for an HTTP request to connect and
return. This combines the connect and read timeouts into one. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: integer, float, or None
.. note::
Many factors can affect the total amount of time for urllib3 to return
an HTTP response. Specifically, Python's DNS resolver does not obey the
timeout specified on the socket. Other factors that can affect total
request time include high CPU load, high swap, the program running at a
low priority level, or other behaviors. The observed running time for
urllib3 to return a response may be greater than the value passed to
`total`.
In addition, the read and total timeouts only measure the time between
read operations on the socket connecting the client and the server, not
the total amount of time for the request to return a complete response.
As an example, you may want a request to return within 7 seconds or
fail, so you set the ``total`` timeout to 7 seconds. If the server
sends one byte to you every 5 seconds, the request will **not** trigger
time out. This case is admittedly rare.
"""
#: A sentinel object representing the default timeout value
DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
def __init__(self, connect=_Default, read=_Default, total=None):
self._connect = self._validate_timeout(connect, 'connect')
self._read = self._validate_timeout(read, 'read')
self.total = self._validate_timeout(total, 'total')
self._start_connect = None
def __str__(self):
return '%s(connect=%r, read=%r, total=%r)' % (
type(self).__name__, self._connect, self._read, self.total)
@classmethod
def _validate_timeout(cls, value, name):
""" Check that a timeout attribute is valid
:param value: The timeout value to validate
:param name: The name of the timeout attribute to validate. This is used
for clear error messages
:return: the value
:raises ValueError: if the type is not an integer or a float, or if it
is a numeric value less than zero
"""
if value is _Default:
return cls.DEFAULT_TIMEOUT
if value is None or value is cls.DEFAULT_TIMEOUT:
return value
try:
float(value)
except (TypeError, ValueError):
raise ValueError("Timeout value %s was %s, but it must be an "
"int or float." % (name, value))
try:
if value < 0:
raise ValueError("Attempted to set %s timeout to %s, but the "
"timeout cannot be set to a value less "
"than 0." % (name, value))
except TypeError: # Python 3
raise ValueError("Timeout value %s was %s, but it must be an "
"int or float." % (name, value))
return value
@classmethod
def from_float(cls, timeout):
""" Create a new Timeout from a legacy timeout value.
The timeout value used by httplib.py sets the same timeout on the
connect(), and recv() socket requests. This creates a :class:`Timeout`
object that sets the individual timeouts to the ``timeout`` value passed
to this function.
:param timeout: The legacy timeout value
:type timeout: integer, float, sentinel default object, or None
:return: a Timeout object
:rtype: :class:`Timeout`
"""
return Timeout(read=timeout, connect=timeout)
def clone(self):
""" Create a copy of the timeout object
Timeout properties are stored per-pool but each request needs a fresh
Timeout object to ensure each one has its own start/stop configured.
:return: a copy of the timeout object
:rtype: :class:`Timeout`
"""
# We can't use copy.deepcopy because that will also create a new object
# for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
# detect the user default.
return Timeout(connect=self._connect, read=self._read,
total=self.total)
def start_connect(self):
""" Start the timeout clock, used during a connect() attempt
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to start a timer that has been started already.
"""
if self._start_connect is not None:
raise TimeoutStateError("Timeout timer has already been started.")
self._start_connect = current_time()
return self._start_connect
def get_connect_duration(self):
""" Gets the time elapsed since the call to :meth:`start_connect`.
:return: the elapsed time
:rtype: float
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to get duration for a timer that hasn't been started.
"""
if self._start_connect is None:
raise TimeoutStateError("Can't get connect duration for timer "
"that has not started.")
return current_time() - self._start_connect
@property
def connect_timeout(self):
""" Get the value to use when setting a connection timeout.
This will be a positive float or integer, the value None
(never timeout), or the default system timeout.
:return: the connect timeout
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
"""
if self.total is None:
return self._connect
if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
return self.total
return min(self._connect, self.total)
@property
def read_timeout(self):
""" Get the value for the read timeout.
This assumes some time has elapsed in the connection timeout and
computes the read timeout appropriately.
If self.total is set, the read timeout is dependent on the amount of
time taken by the connect timeout. If the connection time has not been
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
raised.
:return: the value to use for the read timeout
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
has not yet been called on this object.
"""
if (self.total is not None and
self.total is not self.DEFAULT_TIMEOUT and
self._read is not None and
self._read is not self.DEFAULT_TIMEOUT):
# in case the connect timeout has not yet been established.
if self._start_connect is None:
return self._read
return max(0, min(self.total - self.get_connect_duration(),
self._read))
elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
return max(0, self.total - self.get_connect_duration())
else:
return self._read
class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
+5 -4
View File
@@ -74,10 +74,7 @@ class SessionRedirectMixin(object):
# ((resp.status_code is codes.see_other))
while (('location' in resp.headers and resp.status_code in REDIRECT_STATI)):
prepared_request = PreparedRequest()
prepared_request.body = req.body
prepared_request.headers = req.headers.copy()
prepared_request.hooks = req.hooks
prepared_request = req.copy()
resp.content # Consume socket so it can be released
@@ -468,6 +465,10 @@ class Session(SessionRedirectMixin):
r = dispatch_hook('response', hooks, r, **kwargs)
# Persist cookies
if r.history:
# If the hooks create history then we want those cookies too
for resp in r.history:
extract_cookies_to_jar(self.cookies, resp.request, resp.raw)
extract_cookies_to_jar(self.cookies, request, r.raw)
# Redirect resolving generator.
+38 -19
View File
@@ -21,10 +21,11 @@ from netrc import netrc, NetrcParseError
from . import __version__
from . import certs
from .compat import parse_http_list as _parse_list_header
from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse
from .compat import getproxies, proxy_bypass
from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse,
is_py2, is_py3, builtin_str, getproxies, proxy_bypass)
from .cookies import RequestsCookieJar, cookiejar_from_dict
from .structures import CaseInsensitiveDict
from .exceptions import MissingSchema, InvalidURL
_hush_pyflakes = (RequestsCookieJar,)
@@ -366,7 +367,11 @@ def unquote_unreserved(uri):
for i in range(1, len(parts)):
h = parts[i][0:2]
if len(h) == 2 and h.isalnum():
c = chr(int(h, 16))
try:
c = chr(int(h, 16))
except ValueError:
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
if c in UNRESERVED_SET:
parts[i] = c + parts[i][2:]
else:
@@ -397,18 +402,18 @@ def get_environ_proxies(url):
# we're getting isn't in the no_proxy list.
no_proxy = get_proxy('no_proxy')
netloc = urlparse(url).netloc
if no_proxy:
# We need to check whether we match here. We need to see if we match
# the end of the netloc, both with and without the port.
no_proxy = no_proxy.split(',')
no_proxy = no_proxy.replace(' ', '').split(',')
for host in no_proxy:
if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
# The URL does match something in no_proxy, so we don't want
# to apply the proxies on this URL.
return {}
# If the system proxy settings indicate that this URL should be bypassed,
# don't proxy.
if proxy_bypass(netloc):
@@ -418,7 +423,7 @@ def get_environ_proxies(url):
# anywhere that no_proxy applies to, and the system settings don't require
# bypassing the proxy for the current URL.
return getproxies()
def default_user_agent():
"""Return a string representing the default user agent."""
@@ -528,18 +533,13 @@ def guess_json_utf(data):
return None
def prepend_scheme_if_needed(url, new_scheme):
'''Given a URL that may or may not have a scheme, prepend the given scheme.
Does not replace a present scheme with the one provided as an argument.'''
scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
def except_on_missing_scheme(url):
"""Given a URL, raise a MissingSchema exception if the scheme is missing.
"""
scheme, netloc, path, params, query, fragment = urlparse(url)
# urlparse is a finicky beast, and sometimes decides that there isn't a
# netloc present. Assume that it's being over-cautious, and switch netloc
# and path if urlparse decided there was no netloc.
if not netloc:
netloc, path = path, netloc
return urlunparse((scheme, netloc, path, params, query, fragment))
if not scheme:
raise MissingSchema('Proxy URLs must have explicit schemes.')
def get_auth_from_url(url):
@@ -550,3 +550,22 @@ def get_auth_from_url(url):
return (parsed.username, parsed.password)
else:
return ('', '')
def to_native_string(string, encoding='ascii'):
"""
Given a string object, regardless of type, returns a representation of that
string in the native string type, encoding and decoding where necessary.
This assumes ASCII unless told otherwise.
"""
out = None
if isinstance(string, builtin_str):
out = string
else:
if is_py2:
out = string.encode(encoding)
else:
out = string.decode(encoding)
return out
+41 -8
View File
@@ -182,13 +182,13 @@ class RequestsTestCase(unittest.TestCase):
assert r.json()['cookies']['foo'] == 'bar'
# Make sure the session cj is still the custom one
assert s.cookies is cj
def test_requests_in_history_are_not_overridden(self):
resp = requests.get(httpbin('redirect/3'))
urls = [r.url for r in resp.history]
req_urls = [r.request.url for r in resp.history]
self.assertEquals(urls, req_urls)
def test_user_agent_transfers(self):
heads = {
@@ -246,7 +246,7 @@ class RequestsTestCase(unittest.TestCase):
self.assertEqual(r.status_code, 401)
s = requests.session()
# Should use netrc and work.
r = s.get(url)
self.assertEqual(r.status_code, 200)
@@ -268,10 +268,26 @@ class RequestsTestCase(unittest.TestCase):
self.assertEqual(r.status_code, 401)
s = requests.session()
s.auth = auth
s.auth = HTTPDigestAuth('user', 'pass')
r = s.get(url)
self.assertEqual(r.status_code, 200)
def test_DIGEST_AUTH_RETURNS_COOKIE(self):
url = httpbin('digest-auth', 'auth', 'user', 'pass')
auth = HTTPDigestAuth('user', 'pass')
r = requests.get(url)
assert r.cookies['fake'] == 'fake_value'
r = requests.get(url, auth=auth)
assert r.status_code == 200
def test_DIGEST_AUTH_SETS_SESSION_COOKIES(self):
url = httpbin('digest-auth', 'auth', 'user', 'pass')
auth = HTTPDigestAuth('user', 'pass')
s = requests.Session()
s.get(url, auth=auth)
assert s.cookies['fake'] == 'fake_value'
def test_DIGEST_STREAM(self):
auth = HTTPDigestAuth('user', 'pass')
@@ -563,17 +579,16 @@ class RequestsTestCase(unittest.TestCase):
s.headers.update({'accept': 'application/json'})
r = s.get(httpbin('get'))
headers = r.request.headers
# ASCII encode because of key comparison changes in py3
self.assertEqual(
headers['accept'.encode('ascii')],
headers['accept'],
'application/json'
)
self.assertEqual(
headers['Accept'.encode('ascii')],
headers['Accept'],
'application/json'
)
self.assertEqual(
headers['ACCEPT'.encode('ascii')],
headers['ACCEPT'],
'application/json'
)
@@ -645,6 +660,24 @@ class RequestsTestCase(unittest.TestCase):
r = requests.Request('GET', url).prepare()
self.assertEqual(r.url, url)
def test_header_keys_are_native(self):
headers = {u'unicode': 'blah', 'byte'.encode('ascii'): 'blah'}
r = requests.Request('GET', httpbin('get'), headers=headers)
p = r.prepare()
# This is testing that they are builtin strings. A bit weird, but there
# we go.
self.assertTrue('unicode' in p.headers.keys())
self.assertTrue('byte' in p.headers.keys())
def test_can_send_nonstring_objects_with_files(self):
data = {'a': 0.0}
files = {'b': 'foo'}
r = requests.Request('POST', httpbin('post'), data=data, files=files)
p = r.prepare()
self.assertTrue('multipart/form-data' in p.headers['Content-Type'])
class TestContentEncodingDetection(unittest.TestCase):