diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py new file mode 100644 index 00000000..c7ade88e --- /dev/null +++ b/requests/packages/urllib3/__init__.py @@ -0,0 +1,22 @@ +""" +urllib3 - Thread-safe connection pooling and re-using. +""" + +from connectionpool import ( + connection_from_url, + get_host, + HTTPConnectionPool, + HTTPSConnectionPool, + make_headers) +# Possible exceptions +from connectionpool import ( + HTTPError, + MaxRetryError, + SSLError, + TimeoutError) +from filepost import encode_multipart_formdata + + +__author__ = "Andrey Petrov (andrey.petrov@shazow.net)" +__license__ = "MIT" +__version__ = "$Rev$" diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py new file mode 100644 index 00000000..552ccd98 --- /dev/null +++ b/requests/packages/urllib3/connectionpool.py @@ -0,0 +1,544 @@ +import gzip +import zlib +import logging +import socket + + +from urllib import urlencode +from httplib import HTTPConnection, HTTPSConnection, HTTPException +from Queue import Queue, Empty, Full +from select import select +from socket import error as SocketError, timeout as SocketTimeout + + +try: + import ssl + BaseSSLError = ssl.SSLError +except ImportError, e: + ssl = None + BaseSSLError = None + +try: + from cStringIO import StringIO +except ImportError, e: + from StringIO import StringIO + + +from filepost import encode_multipart_formdata + + +log = logging.getLogger(__name__) + + +## Exceptions + +class HTTPError(Exception): + "Base exception used by this module." + pass + + +class SSLError(Exception): + "Raised when SSL certificate fails in an HTTPS connection." + pass + + +class MaxRetryError(HTTPError): + "Raised when the maximum number of retries is exceeded." + pass + + +class TimeoutError(HTTPError): + "Raised when a socket timeout occurs." + pass + + +class HostChangedError(HTTPError): + "Raised when an existing pool gets a request for a foreign host." + pass + + +## Response objects + +class HTTPResponse(object): + """ + HTTP Response container. + + Similar to httplib's HTTPResponse but the data is pre-loaded. + """ + + def __init__(self, data='', headers=None, status=0, version=0, reason=None, + strict=0): + self.data = data + self.headers = headers or {} + self.status = status + self.version = version + self.reason = reason + self.strict = strict + + @staticmethod + def from_httplib(r): + """ + Given an httplib.HTTPResponse instance, return a corresponding + urllib3.HTTPResponse object. + + NOTE: This method will perform r.read() which will have side effects + on the original http.HTTPResponse object. + """ + tmp_data = r.read() + try: + if r.getheader('content-encoding') == 'gzip': + log.debug("Received response with content-encoding: gzip, " + "decompressing with gzip.") + + gzipper = gzip.GzipFile(fileobj=StringIO(tmp_data)) + data = gzipper.read() + elif r.getheader('content-encoding') == 'deflate': + log.debug("Received response with content-encoding: deflate, " + "decompressing with zlib.") + try: + data = zlib.decompress(tmp_data) + except zlib.error, e: + data = zlib.decompress(tmp_data, -zlib.MAX_WBITS) + else: + data = tmp_data + + except IOError: + raise HTTPError("Received response with content-encoding: %s, " + "but failed to decompress it." % + (r.getheader('content-encoding'))) + + return HTTPResponse(data=data, + headers=dict(r.getheaders()), + status=r.status, + version=r.version, + reason=r.reason, + strict=r.strict) + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) + + +## Connection objects + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + + def set_cert(self, key_file=None, cert_file=None, cert_reqs='CERT_NONE', + ca_certs=None): + ssl_req_scheme = { + 'CERT_NONE': ssl.CERT_NONE, + 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, + 'CERT_REQUIRED': ssl.CERT_REQUIRED + } + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.ca_certs = ca_certs + + def connect(self): + # Add certificate verification + sock = socket.create_connection((self.host, self.port), self.timeout) + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs) + + +## Pool objects + +class HTTPConnectionPool(object): + """ + Thread-safe connection pool for one host. + + host + Host used for this HTTP Connection (e.g. "localhost"), passed into + httplib.HTTPConnection() + + port + Port used for this HTTP Connection (None is equivalent to 80), passed + into httplib.HTTPConnection() + + strict + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + httplib.HTTPConnection() + + timeout + Socket timeout for each individual connection, can be a float. None + disables timeout. + + maxsize + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to false, more + connections will be created but they will not be saved once they've + been used. + + block + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + headers + Headers to include with all requests, unless other headers are given + explicitly. + """ + + scheme = 'http' + + def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, + block=False, headers=None): + self.host = host + self.port = port + self.strict = strict + self.timeout = timeout + self.pool = Queue(maxsize) + self.block = block + self.headers = headers or {} + + # Fill the queue up so that doing get() on it will block properly + [self.pool.put(None) for i in xrange(maxsize)] + + self.num_connections = 0 + self.num_requests = 0 + + def _new_conn(self): + """ + Return a fresh HTTPConnection. + """ + self.num_connections += 1 + log.info("Starting new HTTP connection (%d): %s" % + (self.num_connections, self.host)) + return HTTPConnection(host=self.host, port=self.port) + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + Otherwise, a fresh connection is returned. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + # If this is a persistent connection, check if it got disconnected + if conn and conn.sock and select([conn.sock], [], [], 0.0)[0]: + # Either data is buffered (bad), or the connection is dropped. + log.warning("Connection pool detected dropped " + "connection, resetting: %s" % self.host) + conn.close() + + except Empty, e: + pass # Oh well, we'll create a new connection then + + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + If the pool is already full, the connection is discarded because we + exceeded maxsize. If connections are discarded frequently, then maxsize + should be increased. + """ + try: + self.pool.put(conn, block=False) + except Full, e: + # This should never happen if self.block == True + log.warning("HttpConnectionPool is full, discarding connection: %s" + % self.host) + + def is_same_host(self, url): + return (url.startswith('/') or + get_host(url) == (self.scheme, self.host, self.port)) + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + """ + Get a connection from the pool and perform an HTTP request. + + method + HTTP request method (such as GET, POST, PUT, etc.) + + body + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + headers + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + retries + Number of retries to allow before raising + a MaxRetryError exception. + + redirect + Automatically handle redirects (status codes 301, 302, 303, 307), + each redirect counts as a retry. + + assert_same_host + If True, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When False, you can + use the pool on an HTTP proxy and request foreign hosts. + """ + if headers == None: + headers = self.headers + + if retries < 0: + raise MaxRetryError("Max retries exceeded for url: %s" % url) + + # Check host + if assert_same_host and not self.is_same_host(url): + host = "%s://%s" % (self.scheme, self.host) + if self.port: + host = "%s:%d" % (host, self.port) + + raise HostChangedError("Connection pool with host '%s' tried to " + "open a foreign host: %s" % (host, url)) + + try: + # Request a connection from the queue + conn = self._get_conn() + + # Make the request + self.num_requests += 1 + conn.request(method, url, body=body, headers=headers) + conn.sock.settimeout(self.timeout) + httplib_response = conn.getresponse() + log.debug("\"%s %s %s\" %s %s" % + (method, url, conn._http_vsn_str, + httplib_response.status, httplib_response.length)) + + # from_httplib will perform httplib_response.read() which will have + # the side effect of letting us use this connection for another + # request. + response = HTTPResponse.from_httplib(httplib_response) + + # Put the connection back to be reused + self._put_conn(conn) + + except (SocketTimeout, Empty), e: + # Timed out either by socket or queue + raise TimeoutError("Request timed out after %f seconds" % + self.timeout) + + except (BaseSSLError), e: + # SSL certificate error + raise SSLError(e) + + except (HTTPException, SocketError), e: + log.warn("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, e, url)) + return self.urlopen(method, url, body, headers, retries - 1, + redirect, assert_same_host) # Try again + + # Handle redirection + if (redirect and + response.status in [301, 302, 303, 307] and + 'location' in response.headers): # Redirect, retry + log.info("Redirecting %s -> %s" % + (url, response.headers.get('location'))) + return self.urlopen(method, response.headers.get('location'), body, + headers, retries - 1, redirect, + assert_same_host) + + return response + + def get_url(self, url, fields=None, headers=None, retries=3, + redirect=True): + """ + Wrapper for performing GET with urlopen (see urlopen for more details). + + Supports an optional ``fields`` dictionary parameter key/value strings. + If provided, they will be added to the url. + """ + if fields: + url += '?' + urlencode(fields) + return self.urlopen('GET', url, headers=headers, retries=retries, + redirect=redirect) + + def post_url(self, url, fields=None, headers=None, retries=3, + redirect=True, encode_multipart=True): + """ + Wrapper for performing POST with urlopen (see urlopen + for more details). + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data) tuple. For example: + + fields = { + 'foo': 'bar', + 'foofile': ('foofile.txt', 'contents of foofile'), + } + + If encode_multipart=True (default), then + ``urllib3.filepost.encode_multipart_formdata`` is used to encode the + payload with the appropriate content type. Otherwise + ``urllib.urlencode`` is used with 'application/x-www-form-urlencoded' + content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it other times too. It may break request signing, such as + OAuth. + + NOTE: If ``headers`` are supplied, the 'Content-Type' value will be + overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. + """ + if encode_multipart: + body, content_type = encode_multipart_formdata(fields or {}) + else: + body, content_type = ( + urlencode(fields or {}), + 'application/x-www-form-urlencoded') + + headers = headers or {} + headers.update({'Content-Type': content_type}) + + return self.urlopen('POST', url, body, headers=headers, + retries=retries, redirect=redirect) + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as HTTPConnectionPool, but HTTPS. + """ + + scheme = 'https' + + def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, + block=False, headers=None, key_file=None, + cert_file=None, cert_reqs='CERT_NONE', ca_certs=None): + self.host = host + self.port = port + self.strict = strict + self.timeout = timeout + self.pool = Queue(maxsize) + self.block = block + self.headers = headers or {} + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + + # Fill the queue up so that doing get() on it will block properly + [self.pool.put(None) for i in xrange(maxsize)] + + self.num_connections = 0 + self.num_requests = 0 + + def _new_conn(self): + """ + Return a fresh HTTPSConnection. + """ + self.num_connections += 1 + log.info("Starting new HTTPS connection (%d): %s" + % (self.num_connections, self.host)) + + if not ssl: + return HTTPSConnection(host=self.host, port=self.port) + + connection = VerifiedHTTPSConnection(host=self.host, port=self.port) + connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, + cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) + return connection + + +## Helpers + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + keep_alive + If true, adds 'connection: keep-alive' header. + + accept_encoding + Can be a boolean, list, or string. + True translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + user_agent + String representing the user-agent you want, such as + "python-urllib3/0.6" + + basic_auth + Colon-separated username:password string for 'authorization: basic ...' + auth header. + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + basic_auth.encode('base64').strip() + + return headers + + +def get_host(url): + """ + Given a url, return its scheme, host and port (None if it's not there). + + For example: + >>> get_host('http://google.com/mail/') + http, google.com, None + >>> get_host('google.com:80') + http, google.com, 80 + """ + # This code is actually similar to urlparse.urlsplit, but much + # simplified for our needs. + port = None + scheme = 'http' + if '//' in url: + scheme, url = url.split('://', 1) + if '/' in url: + url, path = url.split('/', 1) + if ':' in url: + url, port = url.split(':', 1) + port = int(port) + return scheme, url, port + + +def connection_from_url(url, **kw): + """ + Given a url, return an HTTP(S)ConnectionPool instance of its host. + + This is a shortcut for not having to determine the host of the url + before creating an HTTP(S)ConnectionPool instance. + + Passes on whatever kw arguments to the constructor of + HTTP(S)ConnectionPool. (e.g. timeout, maxsize, block) + """ + scheme, host, port = get_host(url) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + else: + return HTTPConnectionPool(host, port=port, **kw) diff --git a/requests/packages/urllib3/contrib/__init__.py b/requests/packages/urllib3/contrib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py new file mode 100644 index 00000000..a4642fac --- /dev/null +++ b/requests/packages/urllib3/contrib/ntlmpool.py @@ -0,0 +1,111 @@ +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" + +import httplib +from logging import getLogger +from ntlm import ntlm + +from urllib3 import HTTPSConnectionPool + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % + (self.num_connections, self.host, self.authurl)) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = httplib.HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % reshdr) + log.debug('Response data: %s [...]' % res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % dict(res.getheaders())) + log.debug('Response data: %s [...]' % res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py new file mode 100644 index 00000000..181822ba --- /dev/null +++ b/requests/packages/urllib3/filepost.py @@ -0,0 +1,50 @@ +import codecs +import mimetools +import mimetypes +try: + from cStringIO import StringIO +except: + from StringIO import StringIO + + +writer = codecs.lookup('utf-8')[3] + + +def get_content_type(filename): + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' + + +def encode_multipart_formdata(fields): + body = StringIO() + BOUNDARY = mimetools.choose_boundary() + + for fieldname, value in fields.iteritems(): + body.write('--%s\r\n' % (BOUNDARY)) + + if isinstance(value, tuple): + filename, data = value + writer(body).write('Content-Disposition: form-data; name="%s"; ' + 'filename="%s"\r\n' % (fieldname, filename)) + body.write('Content-Type: %s\r\n\r\n' % + (get_content_type(filename))) + else: + data = value + writer(body).write('Content-Disposition: form-data; name="%s"\r\n' + % (fieldname)) + body.write('Content-Type: text/plain\r\n\r\n') + + if isinstance(data, int): + data = str(data) # Backwards compatibility + + if isinstance(data, unicode): + writer(body).write(data) + else: + body.write(data) + + body.write('\r\n') + + body.write('--%s--\r\n' % (BOUNDARY)) + + content_type = 'multipart/form-data; boundary=%s' % BOUNDARY + + return body.getvalue(), content_type