From 73635df1b2680e8b4ac7babc0ab0bc004fbb21b7 Mon Sep 17 00:00:00 2001 From: Ian Ross and Ian Cordasco Date: Fri, 19 Jul 2013 15:13:28 -0500 Subject: [PATCH 01/28] Start work on sending cookies back On 401's the cookies received aren't sent back to the server. See: #1336 --- requests/auth.py | 11 +++++++++-- test_requests.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/requests/auth.py b/requests/auth.py index fab05cf3..665b1db3 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -18,7 +18,6 @@ from base64 import b64encode from .compat import urlparse, str from .utils import parse_dict_header - log = logging.getLogger(__name__) CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' @@ -146,6 +145,7 @@ class HTTPDigestAuth(AuthBase): def handle_401(self, r, **kwargs): """Takes the given response and tries digest-auth, if needed.""" + from .models import PreparedRequest num_401_calls = getattr(self, 'num_401_calls', 1) s_auth = r.headers.get('www-authenticate', '') @@ -159,10 +159,17 @@ class HTTPDigestAuth(AuthBase): # to allow our new request to reuse the same one. r.content r.raw.release_conn() + prepared_request = PreparedRequest() + prepared_request.url = r.request.url + prepared_request.body = r.request.body + prepared_request.headers = r.request.headers.copy() + prepared_request.hooks = r.request.hooks + prepared_request.prepare_cookies(r.cookies) r.request.headers['Authorization'] = self.build_digest_header(r.request.method, r.request.url) - _r = r.connection.send(r.request, **kwargs) + _r = r.connection.send(prepared_request, **kwargs) _r.history.append(r) + _r.request = prepared_request return _r diff --git a/test_requests.py b/test_requests.py index 27d5e676..756bc879 100755 --- a/test_requests.py +++ b/test_requests.py @@ -289,6 +289,22 @@ class RequestsTestCase(unittest.TestCase): r = s.get(url) self.assertEqual(r.status_code, 200) + def test_DIGEST_AUTH_RETURNS_COOKIE(self): + url = httpbin('digest-auth', 'auth', 'user', 'pass') + auth = HTTPDigestAuth('user', 'pass') + r = requests.get(url) + assert r.cookies['fake'] == 'fake_value' + + r = requests.get(url, auth=auth) + assert r.status_code == 200 + + def test_DIGEST_AUTH_SETS_SESSION_COOKIES(self): + url = httpbin('digest-auth', 'auth', 'user', 'pass') + auth = HTTPDigestAuth('user', 'pass') + s = requests.Session() + s.get(url, auth=auth) + assert s.cookies['fake'] == 'fake_value' + def test_DIGEST_STREAM(self): auth = HTTPDigestAuth('user', 'pass') From c25a72ea248358641dd9048f0062d83625cc703b Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Fri, 19 Jul 2013 15:55:02 -0500 Subject: [PATCH 02/28] Make the regular tests pass I broke Digest Auth completely --- requests/auth.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/requests/auth.py b/requests/auth.py index 665b1db3..43b3beca 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -159,17 +159,19 @@ class HTTPDigestAuth(AuthBase): # to allow our new request to reuse the same one. r.content r.raw.release_conn() - prepared_request = PreparedRequest() - prepared_request.url = r.request.url - prepared_request.body = r.request.body - prepared_request.headers = r.request.headers.copy() - prepared_request.hooks = r.request.hooks - prepared_request.prepare_cookies(r.cookies) + prep = PreparedRequest() + prep.method = r.request.method + prep.url = r.request.url + prep.body = r.request.body + prep.headers = r.request.headers.copy() + prep.hooks = r.request.hooks + prep.prepare_cookies(r.cookies) - r.request.headers['Authorization'] = self.build_digest_header(r.request.method, r.request.url) - _r = r.connection.send(prepared_request, **kwargs) + prep.headers['Authorization'] = self.build_digest_header( + prep.method, prep.url) + _r = r.connection.send(prep, **kwargs) _r.history.append(r) - _r.request = prepared_request + _r.request = prep return _r From 972089826e830670cdaec774bb4eb8a3f349b661 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Fri, 26 Jul 2013 10:50:19 -0500 Subject: [PATCH 03/28] Fixed finally. Also requires updated httpbin.org --- requests/sessions.py | 4 ++++ test_requests.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/requests/sessions.py b/requests/sessions.py index 664465d8..ca7e2170 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -449,6 +449,10 @@ class Session(SessionRedirectMixin): r = dispatch_hook('response', hooks, r, **kwargs) # Persist cookies + if r.history: + # If the hooks create history then we want those cookies too + for resp in r.history: + extract_cookies_to_jar(self.cookies, resp.request, resp.raw) extract_cookies_to_jar(self.cookies, request, r.raw) # Redirect resolving generator. diff --git a/test_requests.py b/test_requests.py index 756bc879..1c581066 100755 --- a/test_requests.py +++ b/test_requests.py @@ -285,7 +285,7 @@ class RequestsTestCase(unittest.TestCase): self.assertEqual(r.status_code, 401) s = requests.session() - s.auth = auth + s.auth = HTTPDigestAuth('user', 'pass') r = s.get(url) self.assertEqual(r.status_code, 200) From 286ddb672d3ff39b1807402a7d9ca569a86516d6 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 27 Jul 2013 23:08:46 -0400 Subject: [PATCH 04/28] Take advantage of the new copy method --- requests/auth.py | 8 +------- requests/sessions.py | 5 +---- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/requests/auth.py b/requests/auth.py index 43b3beca..81a3d937 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -145,7 +145,6 @@ class HTTPDigestAuth(AuthBase): def handle_401(self, r, **kwargs): """Takes the given response and tries digest-auth, if needed.""" - from .models import PreparedRequest num_401_calls = getattr(self, 'num_401_calls', 1) s_auth = r.headers.get('www-authenticate', '') @@ -159,12 +158,7 @@ class HTTPDigestAuth(AuthBase): # to allow our new request to reuse the same one. r.content r.raw.release_conn() - prep = PreparedRequest() - prep.method = r.request.method - prep.url = r.request.url - prep.body = r.request.body - prep.headers = r.request.headers.copy() - prep.hooks = r.request.hooks + prep = r.request.copy() prep.prepare_cookies(r.cookies) prep.headers['Authorization'] = self.build_digest_header( diff --git a/requests/sessions.py b/requests/sessions.py index ca7e2170..11a90f23 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -74,10 +74,7 @@ class SessionRedirectMixin(object): # ((resp.status_code is codes.see_other)) while (('location' in resp.headers and resp.status_code in REDIRECT_STATI)): - prepared_request = PreparedRequest() - prepared_request.body = req.body - prepared_request.headers = req.headers.copy() - prepared_request.hooks = req.hooks + prepared_request = req.copy() resp.content # Consume socket so it can be released From 840540b6b1f07ef87faab73392c03fbef0dcc9fe Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Sun, 28 Jul 2013 07:16:06 +0100 Subject: [PATCH 05/28] Proxy urls should have explicit schemes. --- requests/adapters.py | 4 ++-- requests/utils.py | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/requests/adapters.py b/requests/adapters.py index 7e65c78e..e6cb50ed 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -15,7 +15,7 @@ from .packages.urllib3.poolmanager import PoolManager, ProxyManager from .packages.urllib3.response import HTTPResponse from .compat import urlparse, basestring, urldefrag, unquote from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - prepend_scheme_if_needed, get_auth_from_url) + except_on_missing_scheme, get_auth_from_url) from .structures import CaseInsensitiveDict from .packages.urllib3.exceptions import MaxRetryError from .packages.urllib3.exceptions import TimeoutError @@ -193,7 +193,7 @@ class HTTPAdapter(BaseAdapter): proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: - proxy = prepend_scheme_if_needed(proxy, urlparse(url.lower()).scheme) + except_on_missing_scheme(proxy) conn = ProxyManager(self.poolmanager.connection_from_url(proxy)) else: conn = self.poolmanager.connection_from_url(url.lower()) diff --git a/requests/utils.py b/requests/utils.py index 37aa19e4..618a5a19 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -25,6 +25,7 @@ from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse from .compat import getproxies, proxy_bypass from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict +from .exceptions import MissingSchema _hush_pyflakes = (RequestsCookieJar,) @@ -393,18 +394,18 @@ def get_environ_proxies(url): # we're getting isn't in the no_proxy list. no_proxy = get_proxy('no_proxy') netloc = urlparse(url).netloc - + if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the netloc, both with and without the port. no_proxy = no_proxy.split(',') - + for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. return {} - + # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. if proxy_bypass(netloc): @@ -414,7 +415,7 @@ def get_environ_proxies(url): # anywhere that no_proxy applies to, and the system settings don't require # bypassing the proxy for the current URL. return getproxies() - + def default_user_agent(): """Return a string representing the default user agent.""" @@ -524,18 +525,13 @@ def guess_json_utf(data): return None -def prepend_scheme_if_needed(url, new_scheme): - '''Given a URL that may or may not have a scheme, prepend the given scheme. - Does not replace a present scheme with the one provided as an argument.''' - scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) +def except_on_missing_scheme(url): + """Given a URL, raise a MissingSchema exception if the scheme is missing. + """ + scheme, netloc, path, params, query, fragment = urlparse(url) - # urlparse is a finicky beast, and sometimes decides that there isn't a - # netloc present. Assume that it's being over-cautious, and switch netloc - # and path if urlparse decided there was no netloc. - if not netloc: - netloc, path = path, netloc - - return urlunparse((scheme, netloc, path, params, query, fragment)) + if not scheme: + raise MissingSchema('Proxy URLs must have explicit schemes.') def get_auth_from_url(url): From f5775594ccc0e2947b965c1fcf40747301a93974 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Tue, 30 Apr 2013 20:45:37 +0100 Subject: [PATCH 06/28] Header keys should be native strings. This commit follows a discussion on IRC. For more information, see the Pull Request associated with it. --- requests/models.py | 4 ++-- requests/utils.py | 31 +++++++++++++++++++++++++------ test_requests.py | 16 +++++++++++++--- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/requests/models.py b/requests/models.py index 24391533..62cd0f56 100644 --- a/requests/models.py +++ b/requests/models.py @@ -23,7 +23,7 @@ from .exceptions import HTTPError, RequestException, MissingSchema, InvalidURL from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, - iter_slices, guess_json_utf, super_len) + iter_slices, guess_json_utf, super_len, to_native_string) from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, is_py2, chardet, json, builtin_str, basestring) @@ -346,7 +346,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): """Prepares the given HTTP headers.""" if headers: - headers = dict((name.encode('ascii'), value) for name, value in headers.items()) + headers = dict((to_native_string(name), value) for name, value in headers.items()) self.headers = CaseInsensitiveDict(headers) else: self.headers = CaseInsensitiveDict() diff --git a/requests/utils.py b/requests/utils.py index 37aa19e4..e592c8bb 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -21,8 +21,8 @@ from netrc import netrc, NetrcParseError from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header -from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse -from .compat import getproxies, proxy_bypass +from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, + is_py2, is_py3, builtin_str, getproxies, proxy_bypass) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict @@ -393,18 +393,18 @@ def get_environ_proxies(url): # we're getting isn't in the no_proxy list. no_proxy = get_proxy('no_proxy') netloc = urlparse(url).netloc - + if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the netloc, both with and without the port. no_proxy = no_proxy.split(',') - + for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. return {} - + # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. if proxy_bypass(netloc): @@ -414,7 +414,7 @@ def get_environ_proxies(url): # anywhere that no_proxy applies to, and the system settings don't require # bypassing the proxy for the current URL. return getproxies() - + def default_user_agent(): """Return a string representing the default user agent.""" @@ -546,3 +546,22 @@ def get_auth_from_url(url): return (parsed.username, parsed.password) else: return ('', '') + + +def to_native_string(string, encoding='ascii'): + """ + Given a string object, regardless of type, returns a representation of that + string in the native string type, encoding and decoding where necessary. + This assumes ASCII unless told otherwise. + """ + out = None + + if isinstance(string, builtin_str): + out = string + else: + if is_py2: + out = string.encode(encoding) + else: + out = string.decode(encoding) + + return out diff --git a/test_requests.py b/test_requests.py index 27d5e676..ca05f4af 100755 --- a/test_requests.py +++ b/test_requests.py @@ -199,13 +199,13 @@ class RequestsTestCase(unittest.TestCase): assert r.json()['cookies']['foo'] == 'bar' # Make sure the session cj is still the custom one assert s.cookies is cj - + def test_requests_in_history_are_not_overridden(self): resp = requests.get(httpbin('redirect/3')) urls = [r.url for r in resp.history] req_urls = [r.request.url for r in resp.history] self.assertEquals(urls, req_urls) - + def test_user_agent_transfers(self): heads = { @@ -263,7 +263,7 @@ class RequestsTestCase(unittest.TestCase): self.assertEqual(r.status_code, 401) s = requests.session() - + # Should use netrc and work. r = s.get(url) self.assertEqual(r.status_code, 200) @@ -640,6 +640,16 @@ class RequestsTestCase(unittest.TestCase): r = requests.Request('GET', url).prepare() self.assertEqual(r.url, url) + def test_header_keys_are_native(self): + headers = {u'unicode': 'blah', 'byte'.encode('ascii'): 'blah'} + r = requests.Request('GET', httpbin('get'), headers=headers) + p = r.prepare() + + # This is testing that they are builtin strings. A bit weird, but there + # we go. + self.assertTrue('unicode' in p.headers.keys()) + self.assertTrue('byte' in p.headers.keys()) + class TestCaseInsensitiveDict(unittest.TestCase): From ec2859c0ded048c8630f4561d9556117fa301794 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Wed, 1 May 2013 20:15:37 +0100 Subject: [PATCH 07/28] Modify new CID tests to use native string keys. --- test_requests.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test_requests.py b/test_requests.py index ca05f4af..85c22ee5 100755 --- a/test_requests.py +++ b/test_requests.py @@ -557,17 +557,16 @@ class RequestsTestCase(unittest.TestCase): s.headers.update({'accept': 'application/json'}) r = s.get(httpbin('get')) headers = r.request.headers - # ASCII encode because of key comparison changes in py3 self.assertEqual( - headers['accept'.encode('ascii')], + headers['accept'], 'application/json' ) self.assertEqual( - headers['Accept'.encode('ascii')], + headers['Accept'], 'application/json' ) self.assertEqual( - headers['ACCEPT'.encode('ascii')], + headers['ACCEPT'], 'application/json' ) From 7de6b1e1626b72ae473def49ccc1764d11323167 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Wed, 1 May 2013 21:49:46 +0100 Subject: [PATCH 08/28] Instantiate the CID directly. No need to do this the slow way now. Thanks to Colin (@cdunklau) for the idea. --- requests/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requests/models.py b/requests/models.py index 62cd0f56..10fa1f0f 100644 --- a/requests/models.py +++ b/requests/models.py @@ -346,8 +346,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): """Prepares the given HTTP headers.""" if headers: - headers = dict((to_native_string(name), value) for name, value in headers.items()) - self.headers = CaseInsensitiveDict(headers) + self.headers = CaseInsensitiveDict((to_native_string(name), value) for name, value in headers.items()) else: self.headers = CaseInsensitiveDict() From 5a26241287162ff592c81dbfbbd013e309c85c05 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Fri, 2 Aug 2013 12:29:23 +0100 Subject: [PATCH 09/28] Better percent-escape exception. --- requests/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/requests/utils.py b/requests/utils.py index 347f2be1..00da2669 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -25,7 +25,7 @@ from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, is_py2, is_py3, builtin_str, getproxies, proxy_bypass) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict -from .exceptions import MissingSchema +from .exceptions import MissingSchema, InvalidURL _hush_pyflakes = (RequestsCookieJar,) @@ -363,7 +363,11 @@ def unquote_unreserved(uri): for i in range(1, len(parts)): h = parts[i][0:2] if len(h) == 2 and h.isalnum(): - c = chr(int(h, 16)) + try: + c = chr(int(h, 16)) + except ValueError: + raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) + if c in UNRESERVED_SET: parts[i] = c + parts[i][2:] else: From d06dabab59b1b9f07289d8a8cd533a3adedbfaba Mon Sep 17 00:00:00 2001 From: ms4py Date: Mon, 4 Mar 2013 09:20:24 +0100 Subject: [PATCH 10/28] Update urllib3 to 14b8945 --- requests/packages/urllib3/connectionpool.py | 76 +++++++++++--- requests/packages/urllib3/poolmanager.py | 105 +++++++++++++++----- requests/packages/urllib3/response.py | 2 +- 3 files changed, 142 insertions(+), 41 deletions(-) diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 030eae89..93c0b4b1 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -101,6 +101,12 @@ class VerifiedHTTPSConnection(HTTPSConnection): resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) + if self._tunnel_host: + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Wrap socket using verification with the root certs in # trusted_root_certs self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, @@ -174,12 +180,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param headers: Headers to include with all requests, unless other headers are given explicitly. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" """ scheme = 'http' def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, - block=False, headers=None): + block=False, headers=None, _proxy=None, _proxy_headers=None): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) @@ -188,6 +202,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.pool = self.QueueCls(maxsize) self.block = block + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): self.pool.put(None) @@ -526,13 +543,14 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, + _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers) + block, headers, _proxy, _proxy_headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -541,6 +559,34 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + def _prepare_conn(self, connection): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(connection, VerifiedHTTPSConnection): + connection.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + connection.ssl_version = self.ssl_version + + if self.proxy is not None: + # Python 2.7+ + try: + set_tunnel = connection.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = connection._set_tunnel + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib + # would improperly set Host: header to proxy's IP:port. + connection.connect() + + return connection + def _new_conn(self): """ Return a fresh :class:`httplib.HTTPSConnection`. @@ -549,26 +595,24 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + if not ssl: # Platform-specific: Python compiled without +ssl if not HTTPSConnection or HTTPSConnection is object: raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") + connection_class = HTTPSConnection + else: + connection_class = VerifiedHTTPSConnection - return HTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) + connection = connection_class(host=actual_host, port=actual_port, + strict=self.strict) - connection = VerifiedHTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) - connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - - connection.ssl_version = self.ssl_version - - return connection + return self._prepare_conn(connection) def connection_from_url(url, **kw): diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index 66ceb1b4..e7f8667e 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -13,7 +13,7 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import connection_from_url, port_by_scheme +from .connectionpool import port_by_scheme from .request import RequestMethods from .util import parse_url @@ -60,6 +60,8 @@ class PoolManager(RequestMethods): """ + proxy = None + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw @@ -99,21 +101,23 @@ class PoolManager(RequestMethods): If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. """ + scheme = scheme or 'http' + port = port or port_by_scheme.get(scheme, 80) pool_key = (scheme, host, port) with self.pools.lock: - # If the scheme, host, or port doesn't match existing open connections, - # open a new ConnectionPool. - pool = self.pools.get(pool_key) - if pool: - return pool + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool - # Make a fresh ConnectionPool of the desired type - pool = self._new_pool(scheme, host, port) - self.pools[pool_key] = pool + # Make a fresh ConnectionPool of the desired type + pool = self._new_pool(scheme, host, port) + self.pools[pool_key] = pool return pool def connection_from_url(self, url): @@ -145,7 +149,10 @@ class PoolManager(RequestMethods): if 'headers' not in kw: kw['headers'] = self.headers - response = conn.urlopen(method, u.request_uri, **kw) + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) redirect_location = redirect and response.get_redirect_location() if not redirect_location: @@ -164,15 +171,59 @@ class PoolManager(RequestMethods): return self.urlopen(method, redirect_location, **kw) -class ProxyManager(RequestMethods): +class ProxyManager(PoolManager): """ - Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. The ProxyManager - class will automatically set the 'Host' header if it is not provided. + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param poxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + """ - def __init__(self, proxy_pool): - self.proxy_pool = proxy_pool + def __init__(self, proxy_url, num_pools=10, headers=None, + proxy_headers=None, **connection_pool_kw): + + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, + proxy_url.port) + proxy = parse_url(proxy_url) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + assert self.proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme='http'): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme) def _set_proxy_headers(self, url, headers=None): """ @@ -187,16 +238,22 @@ class ProxyManager(RequestMethods): if headers: headers_.update(headers) - return headers_ - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." - kw['assert_same_host'] = False - kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) - return self.proxy_pool.urlopen(method, url, **kw) + u = parse_url(url) + + if u.scheme == "http": + # It's too late to set proxy headers on per-request basis for + # tunnelled HTTPS connections, should use + # constructor's proxy_headers instead. + kw['headers'] = self._set_proxy_headers(url, kw.get('headers', + self.headers)) + kw['headers'].update(self.proxy_headers) + + return super(ProxyManager, self).urlopen(method, url, redirect, **kw) -def proxy_from_url(url, **pool_kw): - proxy_pool = connection_from_url(url, **pool_kw) - return ProxyManager(proxy_pool) +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 007a5ab9..c7f93b82 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -191,7 +191,7 @@ class HTTPResponse(io.IOBase): "failed to decode it." % content_encoding, e) - if flush_decoder and self._decoder: + if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) data += buf + self._decoder.flush() From 134699faf9c7cb2d8fcbe505d0385b68b7d7a819 Mon Sep 17 00:00:00 2001 From: ms4py Date: Mon, 4 Mar 2013 09:20:47 +0100 Subject: [PATCH 11/28] Fix for new proxy API. --- requests/adapters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requests/adapters.py b/requests/adapters.py index e6cb50ed..01cbaf47 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -11,7 +11,7 @@ and maintain connections. import socket from .models import Response -from .packages.urllib3.poolmanager import PoolManager, ProxyManager +from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse from .compat import urlparse, basestring, urldefrag, unquote from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, @@ -194,7 +194,7 @@ class HTTPAdapter(BaseAdapter): if proxy: except_on_missing_scheme(proxy) - conn = ProxyManager(self.poolmanager.connection_from_url(proxy)) + conn = proxy_from_url(proxy).connection_from_url(url) else: conn = self.poolmanager.connection_from_url(url.lower()) From c951f0543c86ad7a3f90ead98f581c0af1865abd Mon Sep 17 00:00:00 2001 From: ms4py Date: Mon, 4 Mar 2013 09:39:16 +0100 Subject: [PATCH 12/28] Cache manager for proxy connection. --- requests/adapters.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/requests/adapters.py b/requests/adapters.py index 01cbaf47..5dd00d4a 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -71,6 +71,7 @@ class HTTPAdapter(BaseAdapter): pool_block=DEFAULT_POOLBLOCK): self.max_retries = max_retries self.config = {} + self.proxy_manager = {} super(HTTPAdapter, self).__init__() @@ -194,7 +195,10 @@ class HTTPAdapter(BaseAdapter): if proxy: except_on_missing_scheme(proxy) - conn = proxy_from_url(proxy).connection_from_url(url) + if not proxy in self.proxy_manager: + self.proxy_manager[proxy] = proxy_from_url(proxy) + + conn = self.proxy_manager[proxy].connection_from_url(url) else: conn = self.poolmanager.connection_from_url(url.lower()) From 6a3ca7372f33a5d685ab9fb80a38c1316e15696e Mon Sep 17 00:00:00 2001 From: schlamar Date: Thu, 1 Aug 2013 22:08:22 +0200 Subject: [PATCH 13/28] Added proxy support to feature list. --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 50701321..7c1ec5af 100644 --- a/README.rst +++ b/README.rst @@ -54,6 +54,7 @@ Features - Multipart File Uploads - Connection Timeouts - Thread-safety +- HTTP(S) proxy support Installation From 09d97c112642674c4310ec41fdc3e9d8b6978824 Mon Sep 17 00:00:00 2001 From: schlamar Date: Thu, 1 Aug 2013 22:09:06 +0200 Subject: [PATCH 14/28] Added entry to AUTHORS. --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index fcab91a3..4f77687d 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -133,3 +133,4 @@ Patches and Suggestions - Kevin Burke - Flavio Curella - David Pursehouse @dpursehouse +- Marc Schlaich @schlamar From a5e5a51fb4e7202910172624bcc732700422a738 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Sat, 3 Aug 2013 19:24:23 +0100 Subject: [PATCH 15/28] Catch UnsupportedOperation. --- requests/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requests/models.py b/requests/models.py index ee9c329b..6b4a2994 100644 --- a/requests/models.py +++ b/requests/models.py @@ -11,7 +11,7 @@ import collections import logging import datetime -from io import BytesIO +from io import BytesIO, UnsupportedOperation from .hooks import default_hooks from .structures import CaseInsensitiveDict @@ -385,7 +385,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): try: length = super_len(data) - except (TypeError, AttributeError): + except (TypeError, AttributeError, UnsupportedOperation): length = None if is_stream: From 61617e74ed23d82393509c61aac07bfc5a5ea2e4 Mon Sep 17 00:00:00 2001 From: Paul Matthews Date: Mon, 12 Aug 2013 14:20:25 +0100 Subject: [PATCH 16/28] Raise an error for unsupported hook event * Raises a ValueError for an unsupported hook event --- requests/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requests/models.py b/requests/models.py index 6b4a2994..007a9f1c 100644 --- a/requests/models.py +++ b/requests/models.py @@ -141,6 +141,9 @@ class RequestHooksMixin(object): def register_hook(self, event, hook): """Properly register a hook.""" + if event not in self.hooks: + raise ValueError('Unsupported event specified, with event name "%s"' % (event)) + if isinstance(hook, collections.Callable): self.hooks[event].append(hook) elif hasattr(hook, '__iter__'): From 2e5a6746019140bf2ef3a497c30c17992de36426 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Wed, 14 Aug 2013 09:47:44 +0100 Subject: [PATCH 17/28] Add proxy_headers functionality. This brings us in line with urllib3's fancy new magic for sending headers to HTTP(S) proxies. --- requests/adapters.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/requests/adapters.py b/requests/adapters.py index 5dd00d4a..750afece 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -195,8 +195,12 @@ class HTTPAdapter(BaseAdapter): if proxy: except_on_missing_scheme(proxy) + proxy_headers = self.proxy_headers(proxy) + if not proxy in self.proxy_manager: - self.proxy_manager[proxy] = proxy_from_url(proxy) + self.proxy_manager[proxy] = proxy_from_url( + proxy, + proxy_headers=proxy_headers) conn = self.proxy_manager[proxy].connection_from_url(url) else: @@ -236,8 +240,9 @@ class HTTPAdapter(BaseAdapter): return url def add_headers(self, request, **kwargs): - """Add any headers needed by the connection. Currently this adds a - Proxy-Authorization header. + """Add any headers needed by the connection. As of v2.0 this does + nothing by default, but is left for overriding by users that subclass + the :class:`HTTPAdapter `. This should not be called from user code, and is only exposed for use when subclassing the @@ -246,12 +251,22 @@ class HTTPAdapter(BaseAdapter): :param request: The :class:`PreparedRequest ` to add headers to. :param kwargs: The keyword arguments from the call to send(). """ - proxies = kwargs.get('proxies', {}) + pass - if proxies is None: - proxies = {} + def proxy_headers(self, proxy): + """Returns a dictionary of the headers to add to any request sent + through a proxy. This works with urllib3 magic to ensure that they are + correctly sent to the proxy, rather than in a tunnelled request if + CONNECT is being used. - proxy = proxies.get(urlparse(request.url).scheme) + This should not be called from user code, and is only exposed for use + when subclassing the + :class:`HTTPAdapter `. + + :param proxies: The url of the proxy being used for this request. + :param kwargs: Optional additional keyword arguments. + """ + headers = {} username, password = get_auth_from_url(proxy) if username and password: @@ -259,8 +274,10 @@ class HTTPAdapter(BaseAdapter): # to decode them. username = unquote(username) password = unquote(password) - request.headers['Proxy-Authorization'] = _basic_auth_str(username, - password) + headers['Proxy-Authorization'] = _basic_auth_str(username, + password) + + return headers def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object. @@ -277,7 +294,7 @@ class HTTPAdapter(BaseAdapter): self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) - self.add_headers(request, proxies=proxies) + self.add_headers(request) chunked = not (request.body is None or 'Content-Length' in request.headers) From 3bc01eed6f8a112e40ba6fdb79fafef5376c1d4c Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Sat, 17 Aug 2013 07:17:03 +0100 Subject: [PATCH 18/28] RequestException subclasses IOError. --- requests/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requests/exceptions.py b/requests/exceptions.py index 63f2c9ce..22207e35 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -9,7 +9,7 @@ This module contains the set of Requests' exceptions. """ -class RequestException(RuntimeError): +class RequestException(IOError): """There was an ambiguous exception that occurred while handling your request.""" From f44b86e03962b08ebf3b77d980ebeb8f11f6f704 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Sat, 17 Aug 2013 07:27:58 +0100 Subject: [PATCH 19/28] Allow non-string objects to be data with files --- requests/models.py | 4 ++++ test_requests.py | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/requests/models.py b/requests/models.py index 6b4a2994..2428a5e4 100644 --- a/requests/models.py +++ b/requests/models.py @@ -106,6 +106,10 @@ class RequestEncodingMixin(object): val = [val] for v in val: if v is not None: + # Don't call str() on bytestrings: in Py3 it all goes wrong. + if not isinstance(v, bytes): + v = str(v) + new_fields.append( (field.decode('utf-8') if isinstance(field, bytes) else field, v.encode('utf-8') if isinstance(v, str) else v)) diff --git a/test_requests.py b/test_requests.py index c9572a67..e20d45a1 100755 --- a/test_requests.py +++ b/test_requests.py @@ -663,6 +663,14 @@ class RequestsTestCase(unittest.TestCase): self.assertTrue('unicode' in p.headers.keys()) self.assertTrue('byte' in p.headers.keys()) + def test_can_send_nonstring_objects_with_files(self): + data = {'a': 0.0} + files = {'b': 'foo'} + r = requests.Request('POST', httpbin('post'), data=data, files=files) + p = r.prepare() + + self.assertTrue('multipart/form-data' in p.headers['Content-Type']) + class TestCaseInsensitiveDict(unittest.TestCase): From 148c1b5c13ddabd77b4f25981aaecdbe5c05bf9d Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Mon, 19 Aug 2013 13:28:05 +0100 Subject: [PATCH 20/28] Begin work on the 2.0 changelog. --- HISTORY.rst | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 693f0177..41f4b9a6 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,47 @@ History ------- +2.0.0 (2013-XX-XX) +++++++++++++++++++ + +**API Changes:** + +- Keys in the Headers dictionary are now native strings on all Python versions, + i.e. bytestrings on Python 2, unicode on Python 3. +- Proxy URLs now *must* have an explicit scheme. A ``MissingSchema`` exception + will be raised if they don't. +- Added new method to ``PreparedRequest`` objects: ``PreparedRequest.copy()``. +- Added new method to ``Session`` objects: ``Session.update_request()``. This + method updates a ``Request`` object with the data (e.g. cookies) stored on + the ``Session``. +- Added new method to ``Session`` objects: ``Session.prepare_request()``. This + method updates and prepares a ``Request`` object, and returns the + corresponding ``PreparedRequest`` object. +- Added new method to ``HTTPAdapter`` objects: ``HTTPAdapter.proxy_headers()``. + This should not be called directly, but improves the subclass interface. +- ``httplib.IncompleteRead`` exceptions caused by incorrect chunked encoding + will now raise a Requests ``ChunkedEncodingError`` instead. +- Invalid percent-escape sequences now cause a Requests ``InvalidURL`` + exception to be raised. +- HTTP 208 no longer uses reason phrase ``"im_used"``. Correctly uses + ``"already_reported"``. +- HTTP 226 reason added (``"im_used"``). + +**Bugfixes:** + +- Vastly improved proxy support, including the CONNECT verb. Special thanks to + the many contributors who worked towards this improvement. +- Chunked encoding fixes. +- Support for mixed case schemes. +- Retrieve environment proxies from more locations. +- Minor cookies fixes. +- Improved streaming behaviour, particularly for compressed data. +- Miscellaneous small Python 3 text encoding bugs. +- ``.netrc`` no longer overrides explicit auth. +- Cookies set by hooks are now correctly persisted on Sessions. +- Fix problem with cookies that specify port numbers in their host field. +- ``BytesIO`` can be used to perform streaming uploads. + 1.2.3 (2013-05-25) ++++++++++++++++++ From f040b2aa5d5e278ce8a474a7dac3cf5533b36071 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Fri, 23 Aug 2013 12:37:03 +0100 Subject: [PATCH 21/28] Allow spaces in the no_proxy environ variable. --- requests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requests/utils.py b/requests/utils.py index 00da2669..450b2bd3 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -402,7 +402,7 @@ def get_environ_proxies(url): if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the netloc, both with and without the port. - no_proxy = no_proxy.split(',') + no_proxy = no_proxy.replace(' ', '').split(',') for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): From 1aa765825619be338678ec746abd5d312e6eb377 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Sun, 1 Sep 2013 18:10:09 +0100 Subject: [PATCH 22/28] Don't absolute import vendored things. --- requests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requests/__init__.py b/requests/__init__.py index 1af8d8ed..6a5f283f 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -50,7 +50,7 @@ __copyright__ = 'Copyright 2013 Kenneth Reitz' # Attempt to enable urllib3's SNI support, if possible try: - from requests.packages.urllib3.contrib import pyopenssl + from .packages.urllib3.contrib import pyopenssl pyopenssl.inject_into_urllib3() except ImportError: pass From 88011f4032c3c53c2c989c528c1025339f43daa5 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Sun, 1 Sep 2013 18:46:46 +0100 Subject: [PATCH 23/28] Bring release note current to 1st Sept. --- HISTORY.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 41f4b9a6..e698965a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -12,6 +12,7 @@ History i.e. bytestrings on Python 2, unicode on Python 3. - Proxy URLs now *must* have an explicit scheme. A ``MissingSchema`` exception will be raised if they don't. +- ``RequestException`` is now a subclass of ``IOError``, not ``RuntimeError``. - Added new method to ``PreparedRequest`` objects: ``PreparedRequest.copy()``. - Added new method to ``Session`` objects: ``Session.update_request()``. This method updates a ``Request`` object with the data (e.g. cookies) stored on @@ -43,6 +44,8 @@ History - Cookies set by hooks are now correctly persisted on Sessions. - Fix problem with cookies that specify port numbers in their host field. - ``BytesIO`` can be used to perform streaming uploads. +- More generous parsing of the ``no_proxy`` environment variable. +- Non-string objects can be passed in data values alongside files. 1.2.3 (2013-05-25) ++++++++++++++++++ From 61421843bc794ce9f12e6c72d0f0d160df7bdbe7 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Fri, 13 Sep 2013 14:10:09 +0100 Subject: [PATCH 24/28] Python defaults arguments are hard. --- requests/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requests/models.py b/requests/models.py index aecc307b..db923004 100644 --- a/requests/models.py +++ b/requests/models.py @@ -193,8 +193,8 @@ class Request(RequestHooksMixin): url=None, headers=None, files=None, - data=dict(), - params=dict(), + data=None, + params=None, auth=None, cookies=None, hooks=None): From 22e31b4b737c2a3b61b3ab4fccd534b2eee65a87 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Fri, 13 Sep 2013 22:29:39 -0500 Subject: [PATCH 25/28] Handle case when WWW-Authenticate returns multiple qops In Digest Access Authentication there are two possible values (four if you count the not-present and both cases) for authentication. We were narrowly handling one of the four cases. Now we handle two. --- requests/auth.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requests/auth.py b/requests/auth.py index 81a3d937..30529e29 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -105,7 +105,9 @@ class HTTPDigestAuth(AuthBase): A1 = '%s:%s:%s' % (self.username, realm, self.password) A2 = '%s:%s' % (method, path) - if qop == 'auth': + if qop is None: + respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2))) + elif qop == 'auth' or 'auth' in qop.split(','): if nonce == self.last_nonce: self.nonce_count += 1 else: @@ -120,8 +122,6 @@ class HTTPDigestAuth(AuthBase): cnonce = (hashlib.sha1(s).hexdigest()[:16]) noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, hash_utf8(A2)) respdig = KD(hash_utf8(A1), noncebit) - elif qop is None: - respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2))) else: # XXX handle auth-int. return None From 2f39e0e2aa791da13a8358f7a48c0365461029d8 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 24 Sep 2013 13:59:38 -0400 Subject: [PATCH 26/28] new urllib3 --- requests/packages/urllib3/__init__.py | 2 +- requests/packages/urllib3/connectionpool.py | 199 +++++++++++---- .../packages/urllib3/contrib/pyopenssl.py | 187 +++++++++++++- requests/packages/urllib3/exceptions.py | 30 ++- requests/packages/urllib3/fields.py | 177 +++++++++++++ requests/packages/urllib3/filepost.py | 57 +++-- .../packages/ssl_match_hostname/__init__.py | 67 +++-- requests/packages/urllib3/response.py | 3 +- requests/packages/urllib3/util.py | 235 +++++++++++++++++- 9 files changed, 851 insertions(+), 106 deletions(-) create mode 100644 requests/packages/urllib3/fields.py diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index bff80b8e..73071f70 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -23,7 +23,7 @@ from . import exceptions from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host +from .util import make_headers, get_host, Timeout # Set default logging handler to avoid "No handler found" warnings. diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 93c0b4b1..691d4e23 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -4,12 +4,11 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import logging -import socket import errno +import logging from socket import error as SocketError, timeout as SocketTimeout -from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint +import socket try: # Python 3 from http.client import HTTPConnection, HTTPException @@ -22,6 +21,7 @@ try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full + import Queue as _ # Platform-specific: Windows try: # Compiled with SSL? @@ -44,21 +44,29 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. pass -from .request import RequestMethods -from .response import HTTPResponse -from .util import get_host, is_connection_dropped, ssl_wrap_socket from .exceptions import ( ClosedPoolError, + ConnectTimeoutError, EmptyPoolError, HostChangedError, MaxRetryError, SSLError, - TimeoutError, + ReadTimeoutError, + ProxyError, ) - -from .packages.ssl_match_hostname import match_hostname, CertificateError +from .packages.ssl_match_hostname import CertificateError, match_hostname from .packages import six - +from .request import RequestMethods +from .response import HTTPResponse +from .util import ( + assert_fingerprint, + get_host, + is_connection_dropped, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, + Timeout, +) xrange = six.moves.xrange @@ -96,7 +104,14 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - sock = socket.create_connection((self.host, self.port), self.timeout) + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) @@ -123,6 +138,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): match_hostname(self.sock.getpeercert(), self.assert_hostname or self.host) + ## Pool objects class ConnectionPool(object): @@ -135,6 +151,9 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): + # httplib doesn't like it when we include brackets in ipv6 addresses + host = host.strip('[]') + self.host = host self.port = port @@ -142,6 +161,8 @@ class ConnectionPool(object): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) class HTTPConnectionPool(ConnectionPool, RequestMethods): """ @@ -160,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): as a valid HTTP/1.0 or 1.1 status line, passed into :class:`httplib.HTTPConnection`. + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + :param timeout: - Socket timeout in seconds for each individual connection, can be - a float. None disables timeout. + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. :param maxsize: Number of connections to save that can be reused. More than 1 is useful @@ -192,13 +219,21 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): scheme = 'http' - def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, - block=False, headers=None, _proxy=None, _proxy_headers=None): + def __init__(self, host, port=None, strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, + headers=None, _proxy=None, _proxy_headers=None): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict + + # This is for backwards compatibility and can be removed once a timeout + # can only be set to a Timeout object + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + self.timeout = timeout + self.pool = self.QueueCls(maxsize) self.block = block @@ -220,9 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - return HTTPConnection(host=self.host, - port=self.port, - strict=self.strict) + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + + return HTTPConnection(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + **extra_params) + def _get_conn(self, timeout=None): """ @@ -283,31 +323,89 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): % self.host) # Connection never got put back into the pool, close it. - conn.close() + if conn: + conn.close() + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ Perform a request on a given httplib connection object taken from our pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. """ self.num_requests += 1 - if timeout is _Default: - timeout = self.timeout + timeout_obj = self._get_timeout(timeout) - conn.timeout = timeout # This only does anything in Py26+ - conn.request(method, url, **httplib_request_kw) + try: + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # conn.request() calls httplib.*.request, not the method in + # request.py. It also calls makefile (recv) on the socket + conn.request(method, url, **httplib_request_kw) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, timeout_obj.connect_timeout)) - # Set timeout - sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. - if sock: - sock.settimeout(timeout) + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr + if hasattr(conn, 'sock') and \ + read_timeout is not None and \ + read_timeout is not Timeout.DEFAULT_TIMEOUT: + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + conn.sock.settimeout(read_timeout) + + # Receive the response from the server + try: + try: # Python 2.7+, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older + httplib_response = conn.getresponse() + except SocketTimeout: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) + + except SocketError as e: # Platform-specific: Python 2 + # See the above comment about EAGAIN in Python 3. In Python 2 we + # have to specifically catch it and throw the timeout error + if e.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + raise - try: # Python 2.7+, use buffering of HTTP responses - httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older - httplib_response = conn.getresponse() # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -387,7 +485,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -395,8 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): use the pool on an HTTP proxy and request foreign hosts. :param timeout: - If specified, overrides the default timeout for this one request. - It may be a float (in seconds). + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. :param pool_timeout: If set and the pool is set to block=True, then this method will @@ -423,9 +522,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if retries < 0: raise MaxRetryError(self, url) - if timeout is _Default: - timeout = self.timeout - if release_conn is None: release_conn = response_kw.get('preload_content', True) @@ -461,20 +557,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.release_conn()`` is called (implicitly by # ``response.read()``) - except Empty as e: + except Empty: # Timed out by queue - raise TimeoutError(self, url, - "Request timed out. (pool_timeout=%s)" % - pool_timeout) + raise ReadTimeoutError( + self, url, "Read timed out, no pool connections are available.") - except SocketTimeout as e: + except SocketTimeout: # Timed out by socket - raise TimeoutError(self, url, - "Request timed out. (timeout=%s)" % - timeout) + raise ReadTimeoutError(self, url, "Read timed out.") except BaseSSLError as e: # SSL certificate error + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") raise SSLError(e) except CertificateError as e: @@ -482,6 +578,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise SSLError(e) except (HTTPException, SocketError) as e: + if isinstance(e, SocketError) and self.proxy is not None: + raise ProxyError('Cannot connect to proxy. ' + 'Socket error: %s.' % e) + # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below @@ -548,8 +648,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None): - HTTPConnectionPool.__init__(self, host, port, - strict, timeout, maxsize, + HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, block, headers, _proxy, _proxy_headers) self.key_file = key_file self.cert_file = cert_file @@ -609,8 +708,12 @@ class HTTPSConnectionPool(HTTPConnectionPool): else: connection_class = VerifiedHTTPSConnection + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict connection = connection_class(host=actual_host, port=actual_port, - strict=self.strict) + timeout=self.timeout.connect_timeout, + **extra_params) return self._prepare_conn(connection) diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index 6d0255f6..d43bcd60 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -20,13 +20,13 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. ''' -from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification, - SUBJ_ALT_NAME_SUPPORT) +from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT from ndg.httpsclient.subj_alt_name import SubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from socket import _fileobject import ssl +from cStringIO import StringIO from .. import connectionpool from .. import util @@ -99,6 +99,172 @@ def get_subj_alt_name(peer_cert): return dns_name +class fileobject(_fileobject): + + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + try: + data = self._sock.recv(left) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self._sock.recv + while True: + try: + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + except OpenSSL.SSL.WantReadError: + continue + break + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + + class WrappedSocket(object): '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' @@ -110,7 +276,7 @@ class WrappedSocket(object): return self.socket.fileno() def makefile(self, mode, bufsize=-1): - return _fileobject(self.connection, mode, bufsize) + return fileobject(self.connection, mode, bufsize) def settimeout(self, timeout): return self.socket.settimeout(timeout) @@ -123,8 +289,9 @@ class WrappedSocket(object): def getpeercert(self, binary_form=False): x509 = self.connection.get_peer_certificate() + if not x509: - raise ssl.SSLError('') + return x509 if binary_form: return OpenSSL.crypto.dump_certificate( @@ -165,9 +332,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) cnx.set_connect_state() - try: - cnx.do_handshake() - except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad handshake', e) + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError: + continue + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake', e) + break return WrappedSocket(cnx, sock) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 2e2a259c..98ef9abc 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -39,6 +39,11 @@ class SSLError(HTTPError): pass +class ProxyError(HTTPError): + "Raised when the connection to a proxy fails." + pass + + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass @@ -70,8 +75,29 @@ class HostChangedError(RequestError): self.retries = retries -class TimeoutError(RequestError): - "Raised when a socket timeout occurs." +class TimeoutStateError(HTTPError): + """ Raised when passing an invalid state to a timeout """ + pass + + +class TimeoutError(HTTPError): + """ Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + ` and :exc:`ConnectTimeoutErrors `. + """ + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + "Raised when a socket timeout occurs while receiving data from a server" + pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + "Raised when a socket timeout occurs while connecting to a server" pass diff --git a/requests/packages/urllib3/fields.py b/requests/packages/urllib3/fields.py new file mode 100644 index 00000000..ed017657 --- /dev/null +++ b/requests/packages/urllib3/fields.py @@ -0,0 +1,177 @@ +# urllib3/fields.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetimes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + return default + + +def format_header_param(name, value): + """ + Helper function to format and quote a single header parameter. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows RFC 2231, as + suggested by RFC 2388 Section 4.4. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + if not any(ch in value for ch in '"\\\r\n'): + result = '%s="%s"' % (name, value) + try: + result.encode('ascii') + except UnicodeEncodeError: + pass + else: + return result + if not six.PY3: # Python 2: + value = value.encode('utf-8') + value = email.utils.encode_rfc2231(value, 'utf-8') + value = '%s*=%s' % (name, value) + return value + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. + :param headers: + An optional dict-like object of headers to initially use for the field. + """ + def __init__(self, name, data, filename=None, headers=None): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + + @classmethod + def from_tuples(cls, fieldname, value): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from parameter + of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) + tuple where the MIME type is optional. For example: :: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + + request_param = cls(fieldname, data, filename=filename) + request_param.make_multipart(content_type=content_type) + + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + return format_header_param(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as + `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + + for name, value in iterable: + if value: + parts.append(self._render_part(name, value)) + + return '; '.join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + + sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append('%s: %s' % (header_name, header_value)) + + lines.append('\r\n') + return '\r\n'.join(lines) + + def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers['Content-Disposition'] = content_disposition or 'form-data' + self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Type'] = content_type + self.headers['Content-Location'] = content_location diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py index 526a7409..4575582e 100644 --- a/requests/packages/urllib3/filepost.py +++ b/requests/packages/urllib3/filepost.py @@ -12,6 +12,7 @@ from io import BytesIO from .packages import six from .packages.six import b +from .fields import RequestField writer = codecs.lookup('utf-8')[3] @@ -23,15 +24,38 @@ def choose_boundary(): return uuid4().hex -def get_content_type(filename): - return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + + for field in i: + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): """ Iterate over fields. + .. deprecated :: + + The addition of `~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + `~urllib3.fields.RequestField` objects, instead. + Supports list of (k, v) tuples and dicts. + """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) @@ -44,15 +68,7 @@ def encode_multipart_formdata(fields, boundary=None): Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: - Dictionary of fields or list of (key, value) or (key, value, MIME type) - field tuples. The key is treated as the field name, and the value as - the body of the form-data bytes. If the value is a tuple of two - elements, then the first element is treated as the filename of the - form-data section and a suitable MIME type is guessed based on the - filename. If the value is a tuple of three elements, then the third - element is treated as an explicit MIME type of the form-data section. - - Field names and filenames must be unicode. + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). :param boundary: If not specified, then a random boundary will be generated using @@ -62,24 +78,11 @@ def encode_multipart_formdata(fields, boundary=None): if boundary is None: boundary = choose_boundary() - for fieldname, value in iter_fields(fields): + for field in iter_field_objects(fields): body.write(b('--%s\r\n' % (boundary))) - if isinstance(value, tuple): - if len(value) == 3: - filename, data, content_type = value - else: - filename, data = value - content_type = get_content_type(filename) - writer(body).write('Content-Disposition: form-data; name="%s"; ' - 'filename="%s"\r\n' % (fieldname, filename)) - body.write(b('Content-Type: %s\r\n\r\n' % - (content_type,))) - else: - data = value - writer(body).write('Content-Disposition: form-data; name="%s"\r\n' - % (fieldname)) - body.write(b'\r\n') + writer(body).write(field.render_headers()) + data = field.data if isinstance(data, int): data = str(data) # Backwards compatibility diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 9560b045..2d61ac21 100644 --- a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,23 +7,60 @@ __version__ = '3.2.2' class CertificateError(ValueError): pass -def _dnsname_to_pat(dn): +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ pats = [] - for frag in dn.split(r'.'): - if frag == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - else: - # Otherwise, '*' matches any dotless fragment. - frag = re.escape(frag) - pats.append(frag.replace(r'\*', '[^.]*')) - return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + if not dn: + return False + + parts = dn.split(r'.') + leftmost = parts[0] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survery of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in parts[1:]: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + def match_hostname(cert, hostname): """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules - are mostly followed, but IP addresses are not accepted for *hostname*. + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. CertificateError is raised on failure. On success, the function returns nothing. @@ -34,7 +71,7 @@ def match_hostname(cert, hostname): san = cert.get('subjectAltName', ()) for key, value in san: if key == 'DNS': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if not dnsnames: @@ -45,7 +82,7 @@ def match_hostname(cert, hostname): # XXX according to RFC 2818, the most specific Common Name # must be used. if key == 'commonName': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if len(dnsnames) > 1: diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index c7f93b82..4efff5a1 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -74,6 +74,7 @@ class HTTPResponse(io.IOBase): """ CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -107,7 +108,7 @@ class HTTPResponse(io.IOBase): code and valid location. ``None`` if redirect status and no location. ``False`` if not a redirect status code. """ - if self.status in [301, 302, 303, 307]: + if self.status in self.REDIRECT_STATUSES: return self.headers.get('location') return False diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py index 39bceaba..266c9ed3 100644 --- a/requests/packages/urllib3/util.py +++ b/requests/packages/urllib3/util.py @@ -6,10 +6,11 @@ from base64 import b64encode -from collections import namedtuple -from socket import error as SocketError -from hashlib import md5, sha1 from binascii import hexlify, unhexlify +from collections import namedtuple +from hashlib import md5, sha1 +from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT +import time try: from select import poll, POLLIN @@ -32,7 +33,233 @@ except ImportError: pass from .packages import six -from .exceptions import LocationParseError, SSLError +from .exceptions import LocationParseError, SSLError, TimeoutStateError + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +def current_time(): + """ + Retrieve the current time, this function is mocked out in unit testing. + """ + return time.time() + + +class Timeout(object): + """ + Utility object for storing timeout values. + + Example usage: + + .. code-block:: python + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) + pool.request(...) # Etc, etc + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + :param total: + The maximum amount of time to wait for an HTTP request to connect and + return. This combines the connect and read timeouts into one. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + + :type total: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. Specifically, Python's DNS resolver does not obey the + timeout specified on the socket. Other factors that can affect total + request time include high CPU load, high swap, the program running at a + low priority level, or other behaviors. The observed running time for + urllib3 to return a response may be greater than the value passed to + `total`. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, not + the total amount of time for the request to return a complete response. + As an example, you may want a request to return within 7 seconds or + fail, so you set the ``total`` timeout to 7 seconds. If the server + sends one byte to you every 5 seconds, the request will **not** trigger + time out. This case is admittedly rare. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, connect=_Default, read=_Default, total=None): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is used + for clear error messages + :return: the value + :raises ValueError: if the type is not an integer or a float, or if it + is a numeric value less than zero + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value passed + to this function. + + :param timeout: The legacy timeout value + :type timeout: integer, float, sentinel default object, or None + :return: a Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: the elapsed time + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: the connect timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: the value to use for the read timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # in case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): From c64c0ab1215168adf2384888f3d52bd99217d723 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 24 Sep 2013 14:06:24 -0400 Subject: [PATCH 27/28] prototype for new urllib3 timeouts --- requests/adapters.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/requests/adapters.py b/requests/adapters.py index 750afece..b9a12b0c 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -13,6 +13,7 @@ import socket from .models import Response from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse +from .packages.urllib3.util import Timeout from .compat import urlparse, basestring, urldefrag, unquote from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, except_on_missing_scheme, get_auth_from_url) @@ -298,6 +299,11 @@ class HTTPAdapter(BaseAdapter): chunked = not (request.body is None or 'Content-Length' in request.headers) + if stream: + timeout = Timeout(connect=timeout) + else: + timeout = Timeout(connect=timeout, read=timeout) + try: if not chunked: resp = conn.urlopen( From 12760f2295e332111abcd2a96e17ef41ad4640d6 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 24 Sep 2013 14:08:28 -0400 Subject: [PATCH 28/28] runscope! --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 243f9923..0716c65c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -38,7 +38,7 @@ Requests takes all of the work out of Python HTTP/1.1 — making your integrati Testimonials ------------ -Her Majesty's Government, Amazon, Google, Twilio, Mozilla, Heroku, PayPal, NPR, Obama for America, Transifex, Native Instruments, The Washington Post, Twitter, SoundCloud, Kippt, Readability, and Federal US Institutions use Requests internally. It has been downloaded over 3,000,000 times from PyPI. +Her Majesty's Government, Amazon, Google, Twilio, Runscope, Mozilla, Heroku, PayPal, NPR, Obama for America, Transifex, Native Instruments, The Washington Post, Twitter, SoundCloud, Kippt, Readability, and Federal US Institutions use Requests internally. It has been downloaded over 5,000,000 times from PyPI. **Armin Ronacher** Requests is the perfect example how beautiful an API can be with the