Files
requests3/requests/packages/urllib3/connectionpool.py
T
Kenneth Reitz 55cea74b56 urllib3 update
2011-09-25 17:42:27 -04:00

503 lines
17 KiB
Python

import logging
import socket
from urllib import urlencode
from httplib import HTTPConnection, HTTPSConnection, HTTPException
from Queue import Queue, Empty, Full
from select import select
from socket import error as SocketError, timeout as SocketTimeout
try:
import ssl
BaseSSLError = ssl.SSLError
except ImportError:
ssl = None
BaseSSLError = None
from .filepost import encode_multipart_formdata
from .response import HTTPResponse
from .exceptions import (
SSLError,
MaxRetryError,
TimeoutError,
HostChangedError,
EmptyPoolError)
log = logging.getLogger(__name__)
## Connection objects (extension of httplib)
class VerifiedHTTPSConnection(HTTPSConnection):
"""
Based on httplib.HTTPSConnection but wraps the socket with
SSL certification.
"""
def __init__(self):
HTTPSConnection.__init__()
self.cert_reqs = None
self.ca_certs = None
def set_cert(self, key_file=None, cert_file=None,
cert_reqs='CERT_NONE', ca_certs=None):
ssl_req_scheme = {
'CERT_NONE': ssl.CERT_NONE,
'CERT_OPTIONAL': ssl.CERT_OPTIONAL,
'CERT_REQUIRED': ssl.CERT_REQUIRED
}
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE
self.ca_certs = ca_certs
def connect(self):
# Add certificate verification
sock = socket.create_connection((self.host, self.port), self.timeout)
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
cert_reqs=self.cert_reqs,
ca_certs=self.ca_certs)
## Pool objects
class ConnectionPool(object):
pass
class HTTPConnectionPool(ConnectionPool):
"""
Thread-safe connection pool for one host.
host
Host used for this HTTP Connection (e.g. "localhost"), passed into
httplib.HTTPConnection()
port
Port used for this HTTP Connection (None is equivalent to 80), passed
into httplib.HTTPConnection()
strict
Causes BadStatusLine to be raised if the status line can't be parsed
as a valid HTTP/1.0 or 1.1 status line, passed into
httplib.HTTPConnection()
timeout
Socket timeout for each individual connection, can be a float. None
disables timeout.
maxsize
Number of connections to save that can be reused. More than 1 is useful
in multithreaded situations. If ``block`` is set to false, more
connections will be created but they will not be saved once they've
been used.
block
If set to True, no more than ``maxsize`` connections will be used at
a time. When no free connections are available, the call will block
until a connection has been released. This is a useful side effect for
particular multithreaded situations where one does not want to use more
than maxsize connections per host to prevent flooding.
headers
Headers to include with all requests, unless other headers are given
explicitly.
"""
scheme = 'http'
def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
block=False, headers=None):
self.host = host
self.port = port
self.strict = strict
self.timeout = timeout
self.pool = Queue(maxsize)
self.block = block
self.headers = headers or {}
# Fill the queue up so that doing get() on it will block properly
for _ in xrange(maxsize):
self.pool.put(None)
# These are mostly for testing and debugging purposes.
self.num_connections = 0
self.num_requests = 0
def _new_conn(self):
"""
Return a fresh HTTPConnection.
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
return HTTPConnection(host=self.host, port=self.port)
def _get_conn(self, timeout=None):
"""
Get a connection. Will return a pooled connection if one is available.
Otherwise, a fresh connection is returned.
"""
conn = None
try:
conn = self.pool.get(block=self.block, timeout=timeout)
# If this is a persistent connection, check if it got disconnected
if conn and conn.sock and select([conn.sock], [], [], 0.0)[0]:
# Either data is buffered (bad), or the connection is dropped.
log.info("Resetting dropped connection: %s" % self.host)
conn.close()
except Empty:
if self.block:
raise EmptyPoolError("Pool reached maximum size and no more "
"connections are allowed.")
pass # Oh well, we'll create a new connection then
return conn or self._new_conn()
def _put_conn(self, conn):
"""
Put a connection back into the pool.
If the pool is already full, the connection is discarded because we
exceeded maxsize. If connections are discarded frequently, then maxsize
should be increased.
"""
try:
self.pool.put(conn, block=False)
except Full:
# This should never happen if self.block == True
log.warning("HttpConnectionPool is full, discarding connection: %s"
% self.host)
def _make_request(self, conn, method, url, **httplib_request_kw):
"""
Perform a request on a given httplib connection object taken from our
pool.
"""
self.num_requests += 1
conn.request(method, url, **httplib_request_kw)
conn.sock.settimeout(self.timeout)
httplib_response = conn.getresponse()
log.debug("\"%s %s %s\" %s %s" %
(method, url,
conn._http_vsn_str, # pylint: disable-msg=W0212
httplib_response.status, httplib_response.length))
return httplib_response
def is_same_host(self, url):
return (url.startswith('/') or
get_host(url) == (self.scheme, self.host, self.port))
def urlopen(self, method, url, body=None, headers=None, retries=3,
redirect=True, assert_same_host=True, pool_timeout=None,
release_conn=None, **response_kw):
"""
Get a connection from the pool and perform an HTTP request.
method
HTTP request method (such as GET, POST, PUT, etc.)
body
Data to send in the request body (useful for creating
POST requests, see HTTPConnectionPool.post_url for
more convenience).
headers
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
retries
Number of retries to allow before raising
a MaxRetryError exception.
redirect
Automatically handle redirects (status codes 301, 302, 303, 307),
each redirect counts as a retry.
assert_same_host
If True, will make sure that the host of the pool requests is
consistent else will raise HostChangedError. When False, you can
use the pool on an HTTP proxy and request foreign hosts.
pool_timeout
If set and the pool is set to block=True, then this method will
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
connection is available within the time period.
release_conn
If False, then the urlopen call will not release the connection
back into the pool once a response is received. This is useful if
you're not preloading the response's content immediately. You will
need to call ``r.release_conn()`` on the response ``r`` to return
the connection back into the pool. If None, it takes the value of
``response_kw.get('preload_content', True)``.
Additional parameters are passed to
``HTTPResponse.from_httplib(r, **response_kw)``
"""
if headers is None:
headers = self.headers
if retries < 0:
raise MaxRetryError("Max retries exceeded for url: %s" % url)
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
# Check host
if assert_same_host and not self.is_same_host(url):
host = "%s://%s" % (self.scheme, self.host)
if self.port:
host = "%s:%d" % (host, self.port)
raise HostChangedError("Connection pool with host '%s' tried to "
"open a foreign host: %s" % (host, url))
# Request a connection from the queue
conn = self._get_conn(timeout=pool_timeout)
try:
# Make the request on the httplib connection object
httplib_response = self._make_request(conn, method, url,
body=body, headers=headers)
# Import httplib's response into our own wrapper object
response = HTTPResponse.from_httplib(httplib_response,
pool=self,
connection=conn,
**response_kw)
# The connection will be put back into the pool when
# response.release_conn() is called (implicitly by response.read())
except (SocketTimeout, Empty), e:
# Timed out either by socket or queue
raise TimeoutError("Request timed out after %f seconds" %
self.timeout)
except (BaseSSLError), e:
# SSL certificate error
raise SSLError(e)
except (HTTPException, SocketError), e:
# Connection broken, discard. It will be replaced next _get_conn().
conn = None
finally:
if release_conn:
# Put the connection back to be reused
self._put_conn(conn)
if not conn:
log.warn("Retrying (%d attempts remain) after connection "
"broken by '%r': %s" % (retries, e, url))
return self.urlopen(method, url, body, headers, retries - 1,
redirect, assert_same_host) # Try again
# Handle redirection
if (redirect and
response.status in [301, 302, 303, 307] and
'location' in response.headers): # Redirect, retry
log.info("Redirecting %s -> %s" %
(url, response.headers.get('location')))
return self.urlopen(method, response.headers.get('location'), body,
headers, retries - 1, redirect,
assert_same_host)
return response
def get_url(self, url, fields=None, headers=None, retries=3,
redirect=True, **response_kw):
"""
Wrapper for performing GET with urlopen (see urlopen for more details).
Supports an optional ``fields`` dictionary parameter key/value strings.
If provided, they will be added to the url.
"""
if fields:
url += '?' + urlencode(fields)
return self.urlopen('GET', url, headers=headers, retries=retries,
redirect=redirect, **response_kw)
def post_url(self, url, fields=None, headers=None, retries=3,
redirect=True, encode_multipart=True, multipart_boundary=None,
**response_kw):
"""
Wrapper for performing POST with urlopen (see urlopen
for more details).
Supports an optional ``fields`` parameter of key/value strings AND
key/filetuple. A filetuple is a (filename, data) tuple. For example:
fields = {
'foo': 'bar',
'foofile': ('foofile.txt', 'contents of foofile'),
}
If encode_multipart=True (default), then
``urllib3.filepost.encode_multipart_formdata`` is used to encode the
payload with the appropriate content type. Otherwise
``urllib.urlencode`` is used with 'application/x-www-form-urlencoded'
content type.
Multipart encoding must be used when posting files, and it's reasonably
safe to use it other times too. It may break request signing, such as
OAuth.
NOTE: If ``headers`` are supplied, the 'Content-Type' value will be
overwritten because it depends on the dynamic random boundary string
which is used to compose the body of the request.
"""
if encode_multipart:
body, content_type = encode_multipart_formdata(fields or {},
boundary=multipart_boundary)
else:
body, content_type = (
urlencode(fields or {}),
'application/x-www-form-urlencoded')
headers = headers or {}
headers.update({'Content-Type': content_type})
return self.urlopen('POST', url, body, headers=headers,
retries=retries, redirect=redirect, **response_kw)
class HTTPSConnectionPool(HTTPConnectionPool):
"""
Same as HTTPConnectionPool, but HTTPS.
"""
scheme = 'https'
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
key_file=None, cert_file=None,
cert_reqs='CERT_NONE', ca_certs=None):
super(HTTPSConnectionPool, self).__init__(host, port,
strict, timeout, maxsize,
block, headers)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
self.ca_certs = ca_certs
def _new_conn(self):
"""
Return a fresh HTTPSConnection.
"""
self.num_connections += 1
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
if not ssl:
return HTTPSConnection(host=self.host, port=self.port)
connection = VerifiedHTTPSConnection(host=self.host, port=self.port)
connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
return connection
## Helpers
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
basic_auth=None):
"""
Shortcuts for generating request headers.
keep_alive
If true, adds 'connection: keep-alive' header.
accept_encoding
Can be a boolean, list, or string.
True translates to 'gzip,deflate'.
List will get joined by comma.
String will be used as provided.
user_agent
String representing the user-agent you want, such as
"python-urllib3/0.6"
basic_auth
Colon-separated username:password string for 'authorization: basic ...'
auth header.
"""
headers = {}
if accept_encoding:
if isinstance(accept_encoding, str):
pass
elif isinstance(accept_encoding, list):
accept_encoding = ','.join(accept_encoding)
else:
accept_encoding = 'gzip,deflate'
headers['accept-encoding'] = accept_encoding
if user_agent:
headers['user-agent'] = user_agent
if keep_alive:
headers['connection'] = 'keep-alive'
if basic_auth:
headers['authorization'] = 'Basic ' + \
basic_auth.encode('base64').strip()
return headers
def get_host(url):
"""
Given a url, return its scheme, host and port (None if it's not there).
For example:
>>> get_host('http://google.com/mail/')
http, google.com, None
>>> get_host('google.com:80')
http, google.com, 80
"""
# This code is actually similar to urlparse.urlsplit, but much
# simplified for our needs.
port = None
scheme = 'http'
if '//' in url:
scheme, url = url.split('://', 1)
if '/' in url:
url, _path = url.split('/', 1)
if ':' in url:
url, port = url.split(':', 1)
port = int(port)
return scheme, url, port
def connection_from_url(url, **kw):
"""
Given a url, return an HTTP(S)ConnectionPool instance of its host.
This is a shortcut for not having to determine the host of the url
before creating an HTTP(S)ConnectionPool instance.
Passes on whatever kw arguments to the constructor of
HTTP(S)ConnectionPool. (e.g. timeout, maxsize, block)
"""
scheme, host, port = get_host(url)
if scheme == 'https':
return HTTPSConnectionPool(host, port=port, **kw)
else:
return HTTPConnectionPool(host, port=port, **kw)