Import urllib3

This commit is contained in:
Kenneth Reitz
2011-10-23 16:18:52 -04:00
parent 95336a0542
commit 7c271a466f
10 changed files with 1370 additions and 0 deletions
+48
View File
@@ -0,0 +1,48 @@
# urllib3/__init__.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
"""
urllib3 - Thread-safe connection pooling and re-using.
"""
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
__version__ = '1.0.1'
from .connectionpool import (
HTTPConnectionPool,
HTTPSConnectionPool,
connection_from_url,
get_host,
make_headers)
from .exceptions import (
HTTPError,
MaxRetryError,
SSLError,
TimeoutError)
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
from .filepost import encode_multipart_formdata
# Set default logging handler to avoid "No handler found" warnings.
import logging
try:
from logging import NullHandler
except ImportError:
class NullHandler(logging.Handler):
def emit(self, record):
pass
logging.getLogger(__name__).addHandler(NullHandler())
# ... Clean up.
del logging
del NullHandler
+120
View File
@@ -0,0 +1,120 @@
# urllib3/_collections.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from collections import MutableMapping, deque
__all__ = ['RecentlyUsedContainer']
class AccessEntry(object):
__slots__ = ('key', 'is_valid')
def __init__(self, key, is_valid=True):
self.key = key
self.is_valid = is_valid
class RecentlyUsedContainer(MutableMapping):
"""
Provides a dict-like that maintains up to ``maxsize`` keys while throwing
away the least-recently-used keys beyond ``maxsize``.
"""
# TODO: Make this threadsafe. _prune_invalidated_entries should be the
# only real pain-point for this.
# If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we
# will attempt to cleanup the invalidated entries in the access_log
# datastructure during the next 'get' operation.
CLEANUP_FACTOR = 10
def __init__(self, maxsize=10):
self._maxsize = maxsize
self._container = {}
# We use a deque to to store our keys ordered by the last access.
self.access_log = deque()
# We look up the access log entry by the key to invalidate it so we can
# insert a new authorative entry at the head without having to dig and
# find the old entry for removal immediately.
self.access_lookup = {}
# Trigger a heap cleanup when we get past this size
self.access_log_limit = maxsize * self.CLEANUP_FACTOR
def _push_entry(self, key):
"Push entry onto our access log, invalidate the old entry if exists."
# Invalidate old entry if it exists
old_entry = self.access_lookup.get(key)
if old_entry:
old_entry.is_valid = False
new_entry = AccessEntry(key)
self.access_lookup[key] = new_entry
self.access_log.appendleft(new_entry)
def _prune_entries(self, num):
"Pop entries from our access log until we popped ``num`` valid ones."
while num > 0:
p = self.access_log.pop()
if not p.is_valid:
continue # Invalidated entry, skip
del self._container[p.key]
del self.access_lookup[p.key]
num -= 1
def _prune_invalidated_entries(self):
"Rebuild our access_log without the invalidated entries."
self.access_log = deque(e for e in self.access_log if e.is_valid)
def _get_ordered_access_keys(self):
# Used for testing
return [e.key for e in self.access_log if e.is_valid]
def __getitem__(self, key):
item = self._container.get(key)
if not item:
return
# Insert new entry with new high priority, also implicitly invalidates
# the old entry.
self._push_entry(key)
if len(self.access_log) > self.access_log_limit:
# Heap is getting too big, try to clean up any tailing invalidated
# entries.
self._prune_invalidated_entries()
return item
def __setitem__(self, key, item):
# Add item to our container and access log
self._container[key] = item
self._push_entry(key)
# Discard invalid and excess entries
self._prune_entries(len(self._container) - self._maxsize)
def __delitem__(self, key):
self._invalidate_entry(key)
del self._container[key]
del self._access_lookup[key]
def __len__(self):
return self._container.__len__()
def __iter__(self):
return self._container.__iter__()
def __contains__(self, key):
return self._container.__contains__(key)
+525
View File
@@ -0,0 +1,525 @@
# urllib3/connectionpool.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import logging
import socket
from httplib import HTTPConnection, HTTPSConnection, HTTPException
from Queue import Queue, Empty, Full
from select import select
from socket import error as SocketError, timeout as SocketTimeout
try:
import ssl
BaseSSLError = ssl.SSLError
except ImportError:
ssl = None
BaseSSLError = None
from .request import RequestMethods
from .response import HTTPResponse
from .exceptions import (
SSLError,
MaxRetryError,
TimeoutError,
HostChangedError,
EmptyPoolError,
)
log = logging.getLogger(__name__)
_Default = object()
## Connection objects (extension of httplib)
class VerifiedHTTPSConnection(HTTPSConnection):
"""
Based on httplib.HTTPSConnection but wraps the socket with
SSL certification.
"""
cert_reqs = None
ca_certs = None
def set_cert(self, key_file=None, cert_file=None,
cert_reqs='CERT_NONE', ca_certs=None):
ssl_req_scheme = {
'CERT_NONE': ssl.CERT_NONE,
'CERT_OPTIONAL': ssl.CERT_OPTIONAL,
'CERT_REQUIRED': ssl.CERT_REQUIRED
}
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE
self.ca_certs = ca_certs
def connect(self):
# Add certificate verification
sock = socket.create_connection((self.host, self.port), self.timeout)
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
cert_reqs=self.cert_reqs,
ca_certs=self.ca_certs)
## Pool objects
class ConnectionPool(object):
"""
Base class for all connection pools, such as
:class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
"""
pass
class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
Thread-safe connection pool for one host.
:param host:
Host used for this HTTP Connection (e.g. "localhost"), passed into
:class:`httplib.HTTPConnection`.
:param port:
Port used for this HTTP Connection (None is equivalent to 80), passed
into :class:`httplib.HTTPConnection`.
:param strict:
Causes BadStatusLine to be raised if the status line can't be parsed
as a valid HTTP/1.0 or 1.1 status line, passed into
:class:`httplib.HTTPConnection`.
:param timeout:
Socket timeout for each individual connection, can be a float. None
disables timeout.
:param maxsize:
Number of connections to save that can be reused. More than 1 is useful
in multithreaded situations. If ``block`` is set to false, more
connections will be created but they will not be saved once they've
been used.
:param block:
If set to True, no more than ``maxsize`` connections will be used at
a time. When no free connections are available, the call will block
until a connection has been released. This is a useful side effect for
particular multithreaded situations where one does not want to use more
than maxsize connections per host to prevent flooding.
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.
"""
scheme = 'http'
def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
block=False, headers=None):
self.host = host
self.port = port
self.strict = strict
self.timeout = timeout
self.pool = Queue(maxsize)
self.block = block
self.headers = headers or {}
# Fill the queue up so that doing get() on it will block properly
for _ in xrange(maxsize):
self.pool.put(None)
# These are mostly for testing and debugging purposes.
self.num_connections = 0
self.num_requests = 0
def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPConnection`.
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
return HTTPConnection(host=self.host, port=self.port)
def _get_conn(self, timeout=None):
"""
Get a connection. Will return a pooled connection if one is available.
If no connections are available and :prop:`.block` is ``False``, then a
fresh connection is returned.
:param timeout:
Seconds to wait before giving up and raising
:class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
:prop:`.block` is ``True``.
"""
conn = None
try:
conn = self.pool.get(block=self.block, timeout=timeout)
# If this is a persistent connection, check if it got disconnected
if conn and conn.sock and select([conn.sock], [], [], 0.0)[0]:
# Either data is buffered (bad), or the connection is dropped.
log.info("Resetting dropped connection: %s" % self.host)
conn.close()
except Empty:
if self.block:
raise EmptyPoolError("Pool reached maximum size and no more "
"connections are allowed.")
pass # Oh well, we'll create a new connection then
return conn or self._new_conn()
def _put_conn(self, conn):
"""
Put a connection back into the pool.
:param conn:
Connection object for the current host and port as returned by
:meth:`._new_conn` or :meth:`._get_conn`.
If the pool is already full, the connection is discarded because we
exceeded maxsize. If connections are discarded frequently, then maxsize
should be increased.
"""
try:
self.pool.put(conn, block=False)
except Full:
# This should never happen if self.block == True
log.warning("HttpConnectionPool is full, discarding connection: %s"
% self.host)
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
Perform a request on a given httplib connection object taken from our
pool.
"""
self.num_requests += 1
if timeout is _Default:
timeout = self.timeout
conn.request(method, url, **httplib_request_kw)
conn.sock.settimeout(timeout)
httplib_response = conn.getresponse()
log.debug("\"%s %s %s\" %s %s" %
(method, url,
conn._http_vsn_str, # pylint: disable-msg=W0212
httplib_response.status, httplib_response.length))
return httplib_response
def is_same_host(self, url):
"""
Check if the given ``url`` is a member of the same host as this
conncetion pool.
"""
# TODO: Add optional support for socket.gethostbyname checking.
return (url.startswith('/') or
get_host(url) == (self.scheme, self.host, self.port))
def urlopen(self, method, url, body=None, headers=None, retries=3,
redirect=True, assert_same_host=True, timeout=_Default,
pool_timeout=None, release_conn=None, **response_kw):
"""
Get a connection from the pool and perform an HTTP request. This is the
lowest level call for making a request, so you'll need to specify all
the raw details.
.. note::
More commonly, it's appropriate to use a convenience method provided
by :class:`.RequestMethods`, such as :meth:`.request`.
:param method:
HTTP request method (such as GET, POST, PUT, etc.)
:param body:
Data to send in the request body (useful for creating
POST requests, see HTTPConnectionPool.post_url for
more convenience).
:param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
:param retries:
Number of retries to allow before raising a MaxRetryError exception.
:param redirect:
Automatically handle redirects (status codes 301, 302, 303, 307),
each redirect counts as a retry.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
consistent else will raise HostChangedError. When False, you can
use the pool on an HTTP proxy and request foreign hosts.
:param timeout:
If specified, overrides the default timeout for this one request.
:param pool_timeout:
If set and the pool is set to block=True, then this method will
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
connection is available within the time period.
:param release_conn:
If False, then the urlopen call will not release the connection
back into the pool once a response is received. This is useful if
you're not preloading the response's content immediately. You will
need to call ``r.release_conn()`` on the response ``r`` to return
the connection back into the pool. If None, it takes the value of
``response_kw.get('preload_content', True)``.
:param \**response_kw:
Additional parameters are passed to
:meth:`urllib3.response.HTTPResponse.from_httplib`
"""
if headers is None:
headers = self.headers
if retries < 0:
raise MaxRetryError("Max retries exceeded for url: %s" % url)
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
# Check host
if assert_same_host and not self.is_same_host(url):
host = "%s://%s" % (self.scheme, self.host)
if self.port:
host = "%s:%d" % (host, self.port)
raise HostChangedError("Connection pool with host '%s' tried to "
"open a foreign host: %s" % (host, url))
conn = None
try:
# Request a connection from the queue
# (Could raise SocketError: Bad file descriptor)
conn = self._get_conn(timeout=pool_timeout)
# Make the request on the httplib connection object
httplib_response = self._make_request(conn, method, url,
timeout=timeout,
body=body, headers=headers)
# If we're going to release the connection in ``finally:``, then
# the request doesn't need to know about the connection. Otherwise
# it will also try to release it and we'll have a double-release
# mess.
response_conn = not release_conn and conn
# Import httplib's response into our own wrapper object
response = HTTPResponse.from_httplib(httplib_response,
pool=self,
connection=response_conn,
**response_kw)
# else:
# The connection will be put back into the pool when
# ``response.release_conn()`` is called (implicitly by
# ``response.read()``)
except (SocketTimeout, Empty), e:
# Timed out either by socket or queue
raise TimeoutError("Request timed out after %f seconds" %
self.timeout)
except (BaseSSLError), e:
# SSL certificate error
raise SSLError(e)
except (HTTPException, SocketError), e:
# Connection broken, discard. It will be replaced next _get_conn().
conn = None
finally:
if conn and release_conn:
# Put the connection back to be reused
self._put_conn(conn)
if not conn:
log.warn("Retrying (%d attempts remain) after connection "
"broken by '%r': %s" % (retries, e, url))
return self.urlopen(method, url, body, headers, retries - 1,
redirect, assert_same_host) # Try again
# Handle redirection
if (redirect and
response.status in [301, 302, 303, 307] and
'location' in response.headers): # Redirect, retry
log.info("Redirecting %s -> %s" %
(url, response.headers.get('location')))
return self.urlopen(method, response.headers.get('location'), body,
headers, retries - 1, redirect,
assert_same_host)
return response
class HTTPSConnectionPool(HTTPConnectionPool):
"""
Same as :class:`.HTTPConnectionPool`, but HTTPS.
When Python is compiled with the :mod:`ssl` module, then
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
instead of :class:httplib.HTTPSConnection`.
The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters
are only used if :mod:`ssl` is available and are fed into
:meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket.
"""
scheme = 'https'
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
key_file=None, cert_file=None,
cert_reqs='CERT_NONE', ca_certs=None):
super(HTTPSConnectionPool, self).__init__(host, port,
strict, timeout, maxsize,
block, headers)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
self.ca_certs = ca_certs
def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPSConnection`.
"""
self.num_connections += 1
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
if not ssl:
return HTTPSConnection(host=self.host, port=self.port)
connection = VerifiedHTTPSConnection(host=self.host, port=self.port)
connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
return connection
## Helpers
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
basic_auth=None):
"""
Shortcuts for generating request headers.
:param keep_alive:
If ``True``, adds 'connection: keep-alive' header.
:param accept_encoding:
Can be a boolean, list, or string.
``True`` translates to 'gzip,deflate'.
List will get joined by comma.
String will be used as provided.
:param user_agent:
String representing the user-agent you want, such as
"python-urllib3/0.6"
:param basic_auth:
Colon-separated username:password string for 'authorization: basic ...'
auth header.
Example: ::
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
>>> make_headers(accept_encoding=True)
{'accept-encoding': 'gzip,deflate'}
"""
headers = {}
if accept_encoding:
if isinstance(accept_encoding, str):
pass
elif isinstance(accept_encoding, list):
accept_encoding = ','.join(accept_encoding)
else:
accept_encoding = 'gzip,deflate'
headers['accept-encoding'] = accept_encoding
if user_agent:
headers['user-agent'] = user_agent
if keep_alive:
headers['connection'] = 'keep-alive'
if basic_auth:
headers['authorization'] = 'Basic ' + \
basic_auth.encode('base64').strip()
return headers
def get_host(url):
"""
Given a url, return its scheme, host and port (None if it's not there).
For example: ::
>>> get_host('http://google.com/mail/')
('http', 'google.com', None)
>>> get_host('google.com:80')
('http', 'google.com', 80)
"""
# This code is actually similar to urlparse.urlsplit, but much
# simplified for our needs.
port = None
scheme = 'http'
if '//' in url:
scheme, url = url.split('://', 1)
if '/' in url:
url, _path = url.split('/', 1)
if ':' in url:
url, port = url.split(':', 1)
port = int(port)
return scheme, url, port
def connection_from_url(url, **kw):
"""
Given a url, return an :class:`.ConnectionPool` instance of its host.
This is a shortcut for not having to parse out the scheme, host, and port
of the url before creating an :class:`.ConnectionPool` instance.
:param url:
Absolute URL string that must include the scheme. Port is optional.
:param \**kw:
Passes additional parameters to the constructor of the appropriate
:class:`.ConnectionPool`. Useful for specifying things like
timeout, maxsize, headers, etc.
Example: ::
>>> conn = connection_from_url('http://google.com/')
>>> r = conn.request('GET', '/')
"""
scheme, host, port = get_host(url)
if scheme == 'https':
return HTTPSConnectionPool(host, port=port, **kw)
else:
return HTTPConnectionPool(host, port=port, **kw)
@@ -0,0 +1,117 @@
# urllib3/contrib/ntlmpool.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
"""
NTLM authenticating pool, contributed by erikcederstran
Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
"""
import httplib
from logging import getLogger
from ntlm import ntlm
from urllib3 import HTTPSConnectionPool
log = getLogger(__name__)
class NTLMConnectionPool(HTTPSConnectionPool):
"""
Implements an NTLM authentication version of an urllib3 connection pool
"""
scheme = 'https'
def __init__(self, user, pw, authurl, *args, **kwargs):
"""
authurl is a random URL on the server that is protected by NTLM.
user is the Windows user, probably in the DOMAIN\username format.
pw is the password for the user.
"""
super(NTLMConnectionPool, self).__init__(*args, **kwargs)
self.authurl = authurl
self.rawuser = user
user_parts = user.split('\\', 1)
self.domain = user_parts[0].upper()
self.user = user_parts[1]
self.pw = pw
def _new_conn(self):
# Performs the NTLM handshake that secures the connection. The socket
# must be kept open while requests are performed.
self.num_connections += 1
log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' %
(self.num_connections, self.host, self.authurl))
headers = {}
headers['Connection'] = 'Keep-Alive'
req_header = 'Authorization'
resp_header = 'www-authenticate'
conn = httplib.HTTPSConnection(host=self.host, port=self.port)
# Send negotiation message
headers[req_header] = (
'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
log.debug('Request headers: %s' % headers)
conn.request('GET', self.authurl, None, headers)
res = conn.getresponse()
reshdr = dict(res.getheaders())
log.debug('Response status: %s %s' % (res.status, res.reason))
log.debug('Response headers: %s' % reshdr)
log.debug('Response data: %s [...]' % res.read(100))
# Remove the reference to the socket, so that it can not be closed by
# the response object (we want to keep the socket open)
res.fp = None
# Server should respond with a challenge message
auth_header_values = reshdr[resp_header].split(', ')
auth_header_value = None
for s in auth_header_values:
if s[:5] == 'NTLM ':
auth_header_value = s[5:]
if auth_header_value is None:
raise Exception('Unexpected %s response header: %s' %
(resp_header, reshdr[resp_header]))
# Send authentication message
ServerChallenge, NegotiateFlags = \
ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
self.user,
self.domain,
self.pw,
NegotiateFlags)
headers[req_header] = 'NTLM %s' % auth_msg
log.debug('Request headers: %s' % headers)
conn.request('GET', self.authurl, None, headers)
res = conn.getresponse()
log.debug('Response status: %s %s' % (res.status, res.reason))
log.debug('Response headers: %s' % dict(res.getheaders()))
log.debug('Response data: %s [...]' % res.read()[:100])
if res.status != 200:
if res.status == 401:
raise Exception('Server rejected request: wrong '
'username or password')
raise Exception('Wrong server response: %s %s' %
(res.status, res.reason))
res.fp = None
log.debug('Connection established')
return conn
def urlopen(self, method, url, body=None, headers=None, retries=3,
redirect=True, assert_same_host=True):
if headers is None:
headers = {}
headers['Connection'] = 'Keep-Alive'
return super(NTLMConnectionPool, self).urlopen(method, url, body,
headers, retries,
redirect,
assert_same_host)
+35
View File
@@ -0,0 +1,35 @@
# urllib3/exceptions.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
## Exceptions
class HTTPError(Exception):
"Base exception used by this module."
pass
class SSLError(Exception):
"Raised when SSL certificate fails in an HTTPS connection."
pass
class MaxRetryError(HTTPError):
"Raised when the maximum number of retries is exceeded."
pass
class TimeoutError(HTTPError):
"Raised when a socket timeout occurs."
pass
class HostChangedError(HTTPError):
"Raised when an existing pool gets a request for a foreign host."
pass
class EmptyPoolError(HTTPError):
"Raised when a pool runs out of connections and no more are allowed."
pass
+71
View File
@@ -0,0 +1,71 @@
# urllib3/filepost.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import codecs
import mimetools
import mimetypes
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO # pylint: disable-msg=W0404
writer = codecs.lookup('utf-8')[3]
def get_content_type(filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
def encode_multipart_formdata(fields, boundary=None):
"""
Encode a dictionary of ``fields`` using the multipart/form-data mime format.
:param fields:
Dictionary of fields. The key is treated as the field name, and the
value as the body of the form-data. If the value is a tuple of two
elements, then the first element is treated as the filename of the
form-data section.
:param boundary:
If not specified, then a random boundary will be generated using
:func:`mimetools.choose_boundary`.
"""
body = StringIO()
if boundary is None:
boundary = mimetools.choose_boundary()
for fieldname, value in fields.iteritems():
body.write('--%s\r\n' % (boundary))
if isinstance(value, tuple):
filename, data = value
writer(body).write('Content-Disposition: form-data; name="%s"; '
'filename="%s"\r\n' % (fieldname, filename))
body.write('Content-Type: %s\r\n\r\n' %
(get_content_type(filename)))
else:
data = value
writer(body).write('Content-Disposition: form-data; name="%s"\r\n'
% (fieldname))
body.write('Content-Type: text/plain\r\n\r\n')
if isinstance(data, int):
data = str(data) # Backwards compatibility
if isinstance(data, unicode):
writer(body).write(data)
else:
body.write(data)
body.write('\r\n')
body.write('--%s--\r\n' % (boundary))
content_type = 'multipart/form-data; boundary=%s' % boundary
return body.getvalue(), content_type
+128
View File
@@ -0,0 +1,128 @@
# urllib3/poolmanager.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from ._collections import RecentlyUsedContainer
from .connectionpool import (
HTTPConnectionPool, HTTPSConnectionPool,
get_host, connection_from_url,
)
__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
from .request import RequestMethods
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
pool_classes_by_scheme = {
'http': HTTPConnectionPool,
'https': HTTPSConnectionPool,
}
port_by_scheme = {
'http': 80,
'https': 443,
}
class PoolManager(RequestMethods):
"""
Allows for arbitrary requests while transparently keeping track of
necessary connection pools for you.
:param num_pools:
Number of connection pools to cache before discarding the least recently
used pool.
:param \**connection_pool_kw:
Additional parameters are used to create fresh
:class:`urllib3.connectionpool.ConnectionPool` instances.
Example: ::
>>> manager = PoolManager()
>>> r = manager.urlopen("http://google.com/")
>>> r = manager.urlopen("http://google.com/mail")
>>> r = manager.urlopen("http://yahoo.com/")
>>> len(r.pools)
2
"""
# TODO: Make sure there are no memory leaks here.
def __init__(self, num_pools=10, **connection_pool_kw):
self.connection_pool_kw = connection_pool_kw
self.pools = RecentlyUsedContainer(num_pools)
def connection_from_host(self, host, port=80, scheme='http'):
"""
Get a :class:`ConnectionPool` based on the host, port, and scheme.
Note that an appropriate ``port`` value is required here to normalize
connection pools in our container most effectively.
"""
pool_key = (scheme, host, port)
# If the scheme, host, or port doesn't match existing open connections,
# open a new ConnectionPool.
pool = self.pools.get(pool_key)
if pool:
return pool
# Make a fresh ConnectionPool of the desired type
pool_cls = pool_classes_by_scheme[scheme]
pool = pool_cls(host, port, **self.connection_pool_kw)
self.pools[pool_key] = pool
return pool
def connection_from_url(self, url):
"""
Similar to :func:`urllib3.connectionpool.connection_from_url` but
doesn't pass any additional parameters to the
:class:`urllib3.connectionpool.ConnectionPool` constructor.
Additional parameters are taken from the :class:`.PoolManager`
constructor.
"""
scheme, host, port = get_host(url)
port = port or port_by_scheme.get(scheme, 80)
return self.connection_from_host(host, port=port, scheme=scheme)
def urlopen(self, method, url, **kw):
"""
Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`.
``url`` must be absolute, such that an appropriate
:class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
"""
conn = self.connection_from_url(url)
return conn.urlopen(method, url, assert_same_host=False, **kw)
class ProxyManager(object):
"""
Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method
will make requests to any url through the defined proxy.
"""
def __init__(self, proxy_pool):
self.proxy_pool = proxy_pool
def urlopen(self, method, url, **kw):
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
kw['assert_same_host'] = False
return self.proxy_pool.urlopen(method, url, **kw)
def proxy_from_url(url, **pool_kw):
proxy_pool = connection_from_url(url, **pool_kw)
return ProxyManager(proxy_pool)
+145
View File
@@ -0,0 +1,145 @@
# urllib3/request.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from urllib import urlencode
from .filepost import encode_multipart_formdata
__all__ = ['RequestMethods']
class RequestMethods(object):
"""
Convenience mixin for classes who implement a :meth:`urlopen` method, such
as :class:`~urllib3.connectionpool.HTTPConnectionPool` and
:class:`~urllib3.poolmanager.PoolManager`.
Provides behavior for making common types of HTTP request methods and
decides which type of request field encoding to use.
Specifically,
:meth:`.request_encode_url` is for sending requests whose fields are encoded
in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_body` is for sending requests whose fields are
encoded in the *body* of the request using multipart or www-orm-urlencoded
(such as for POST, PUT, PATCH).
:meth:`.request` is for making any kind of request, it will look up the
appropriate encoding format and use one of the above two methods to make
the request.
"""
_encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
_encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE'])
def urlopen(self, method, url, body=None, headers=None,
encode_multipart=True, multipart_boundary=None,
**kw):
raise NotImplemented("Classes extending RequestMethods must implement "
"their own ``urlopen`` method.")
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
"""
Make a request using :meth:`urlopen` with the appropriate encoding of
``fields`` based on the ``method`` used.
This is a convenience method that requires the least amount of manual
effort. It can be used in most situations, while still having the option
to drop down to more specific methods when necessary, such as
:meth:`request_encode_url`, :meth:`request_encode_body`,
or even the lowest level :meth:`urlopen`.
"""
method = method.upper()
if method in self._encode_url_methods:
return self.request_encode_url(method, url, fields=fields,
headers=headers,
**urlopen_kw)
else:
return self.request_encode_body(method, url, fields=fields,
headers=headers,
**urlopen_kw)
def request_encode_url(self, method, url, fields=None, **urlopen_kw):
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
"""
if fields:
url += '?' + urlencode(fields)
return self.urlopen(method, url, **urlopen_kw)
def request_encode_body(self, method, url, fields=None, headers=None,
encode_multipart=True, multipart_boundary=None,
**urlopen_kw):
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the body. This is useful for request methods like POST, PUT, PATCH, etc.
When ``encode_multipart=True`` (default), then
:meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the
payload with the appropriate content type. Otherwise
:meth:`urllib.urlencode` is used with the
'application/x-www-form-urlencoded' content type.
Multipart encoding must be used when posting files, and it's reasonably
safe to use it in other times too. However, it may break request signing,
such as with OAuth.
Supports an optional ``fields`` parameter of key/value strings AND
key/filetuple. A filetuple is a (filename, data) tuple. For example: ::
fields = {
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'realfile': ('barfile.txt', open('realfile').read()),
'nonamefile': ('contents of nonamefile field'),
}
When uploading a file, providing a filename (the first parameter of the
tuple) is optional but recommended to best mimick behavior of browsers.
Note that if ``headers`` are supplied, the 'Content-Type' header will be
overwritten because it depends on the dynamic random boundary string
which is used to compose the body of the request. The random boundary
string can be explicitly set with the ``multipart_boundary`` parameter.
"""
if encode_multipart:
body, content_type = encode_multipart_formdata(fields or {},
boundary=multipart_boundary)
else:
body, content_type = (urlencode(fields or {}),
'application/x-www-form-urlencoded')
headers = headers or {}
headers.update({'Content-Type': content_type})
return self.urlopen(method, url, body=body, headers=headers,
**urlopen_kw)
# Deprecated:
def get_url(self, url, fields=None, **urlopen_kw):
"""
.. deprecated:: 1.0
Use :meth:`request` instead.
"""
return self.request_encode_url('GET', url, fields=fields,
**urlopen_kw)
def post_url(self, url, fields=None, headers=None, **urlopen_kw):
"""
.. deprecated:: 1.0
Use :meth:`request` instead.
"""
return self.request_encode_body('POST', url, fields=fields,
headers=headers,
**urlopen_kw)
+181
View File
@@ -0,0 +1,181 @@
# urllib3/response.py
# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import gzip
import logging
import zlib
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO # pylint: disable-msg=W0404
from .exceptions import HTTPError
log = logging.getLogger(__name__)
def decode_gzip(data):
gzipper = gzip.GzipFile(fileobj=StringIO(data))
return gzipper.read()
def decode_deflate(data):
try:
return zlib.decompress(data)
except zlib.error:
return zlib.decompress(data, -zlib.MAX_WBITS)
class HTTPResponse(object):
"""
HTTP Response container.
Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
loaded and decoded on-demand when the ``data`` property is accessed.
Extra parameters for behaviour not present in httplib.HTTPResponse:
:param preload_content:
If True, the response's body will be preloaded during construction.
:param decode_content:
If True, attempts to decode specific content-encoding's based on headers
(like 'gzip' and 'deflate') will be skipped and raw data will be used
instead.
:param original_response:
When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
object, it's convenient to include the original for debug purposes. It's
otherwise unused.
"""
CONTENT_DECODERS = {
'gzip': decode_gzip,
'deflate': decode_deflate,
}
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
original_response=None, pool=None, connection=None):
self.headers = headers or {}
self.status = status
self.version = version
self.reason = reason
self.strict = strict
self._decode_content = decode_content
self._body = None
self._fp = None
self._original_response = original_response
self._pool = pool
self._connection = connection
if hasattr(body, 'read'):
self._fp = body
if preload_content:
self._body = self.read(decode_content=decode_content)
def release_conn(self):
if not self._pool or not self._connection:
return
self._pool._put_conn(self._connection)
self._connection = None
@property
def data(self):
# For backwords-compat with earlier urllib3 0.4 and earlier.
if self._body:
return self._body
if self._fp:
return self.read(decode_content=self._decode_content,
cache_content=True)
def read(self, amt=None, decode_content=True, cache_content=False):
"""
Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
parameters: ``decode_content`` and ``cache_content``.
:param amt:
How much of the content to read. If specified, decoding and caching
is skipped because we can't decode partial content nor does it make
sense to cache partial content as the full response.
:param decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header. (Overridden if ``amt`` is set.)
:param cache_content:
If True, will save the returned data such that the same result is
returned despite of the state of the underlying file object. This
is useful if you want the ``.data`` property to continue working
after having ``.read()`` the file object. (Overridden if ``amt`` is
set.)
"""
content_encoding = self.headers.get('content-encoding')
decoder = self.CONTENT_DECODERS.get(content_encoding)
data = self._fp and self._fp.read(amt)
try:
if amt:
return data
if not decode_content or not decoder:
if cache_content:
self._body = data
return data
try:
data = decoder(data)
except IOError:
raise HTTPError("Received response with content-encoding: %s, but "
"failed to decode it." % content_encoding)
if cache_content:
self._body = data
return data
finally:
if self._original_response and self._original_response.isclosed():
self.release_conn()
@staticmethod
def from_httplib(r, **response_kw):
"""
Given an :class:`httplib.HTTPResponse` instance ``r``, return a
corresponding :class:`urllib3.response.HTTPResponse` object.
Remaining parameters are passed to the HTTPResponse constructor, along
with ``original_response=r``.
"""
return HTTPResponse(body=r,
headers=dict(r.getheaders()),
status=r.status,
version=r.version,
reason=r.reason,
strict=r.strict,
original_response=r,
**response_kw)
# Backwards-compatibility methods for httplib.HTTPResponse
def getheaders(self):
return self.headers
def getheader(self, name, default=None):
return self.headers.get(name, default)