From 2bc7762cb6c91f49e5b116d3eb9f93af01e331f2 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Wed, 10 Sep 2014 11:57:20 -0500 Subject: [PATCH 1/4] Update how we handle retries to be consistent with documentation --- requests/adapters.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/requests/adapters.py b/requests/adapters.py index 40088900..df3345fd 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -33,7 +33,7 @@ from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 -DEFAULT_RETRIES = 0 +DEFAULT_RETRIES = object() class BaseAdapter(object): @@ -79,7 +79,10 @@ class HTTPAdapter(BaseAdapter): def __init__(self, pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES, pool_block=DEFAULT_POOLBLOCK): - self.max_retries = max_retries + if max_retries is DEFAULT_RETRIES: + self.max_retries = Retry(0, read=False) + else: + self.max_retries = Retry.from_int(max_retries) self.config = {} self.proxy_manager = {} @@ -360,7 +363,7 @@ class HTTPAdapter(BaseAdapter): assert_same_host=False, preload_content=False, decode_content=False, - retries=Retry(self.max_retries, read=False), + retries=self.max_retries, timeout=timeout ) From f54a4e3de186495a5254d21ae698c677c71b90c3 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Fri, 24 Oct 2014 21:58:11 -0500 Subject: [PATCH 2/4] Update urllib3 to df4ec5cce1 --- requests/packages/urllib3/_collections.py | 7 ++- requests/packages/urllib3/connection.py | 12 ++++- requests/packages/urllib3/connectionpool.py | 4 +- .../packages/urllib3/contrib/pyopenssl.py | 18 +++++++- requests/packages/urllib3/exceptions.py | 13 +++--- requests/packages/urllib3/util/retry.py | 24 ++++++---- requests/packages/urllib3/util/url.py | 45 ++++++++++++++++++- 7 files changed, 97 insertions(+), 26 deletions(-) diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index d77ebb8d..784342a4 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -14,7 +14,7 @@ try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict -from .packages.six import itervalues +from .packages.six import iterkeys, itervalues __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] @@ -85,8 +85,7 @@ class RecentlyUsedContainer(MutableMapping): def clear(self): with self.lock: # Copy pointers to all values, then wipe the mapping - # under Python 2, this copies the list of values twice :-| - values = list(self._container.values()) + values = list(itervalues(self._container)) self._container.clear() if self.dispose_func: @@ -95,7 +94,7 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: - return self._container.keys() + return list(iterkeys(self._container)) class HTTPHeaderDict(MutableMapping): diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py index c6e1959a..cebdd867 100644 --- a/requests/packages/urllib3/connection.py +++ b/requests/packages/urllib3/connection.py @@ -3,6 +3,7 @@ import sys import socket from socket import timeout as SocketTimeout import warnings +from .packages import six try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException @@ -26,12 +27,19 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. pass +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + class ConnectionError(Exception): + pass + + from .exceptions import ( ConnectTimeoutError, SystemTimeWarning, ) from .packages.ssl_match_hostname import match_hostname -from .packages import six from .util.ssl_ import ( resolve_cert_reqs, @@ -40,8 +48,8 @@ from .util.ssl_ import ( assert_fingerprint, ) -from .util import connection +from .util import connection port_by_scheme = { 'http': 80, diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 9cc2a955..ac6e0ca6 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -32,7 +32,7 @@ from .connection import ( port_by_scheme, DummyConnection, HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, - HTTPException, BaseSSLError, + HTTPException, BaseSSLError, ConnectionError ) from .request import RequestMethods from .response import HTTPResponse @@ -542,7 +542,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn = True raise SSLError(e) - except (TimeoutError, HTTPException, SocketError) as e: + except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: if conn: # Discard the connection for these exceptions. It will be # be replaced during the next _get_conn() call. diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index 24de9e40..3c6b26c1 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -29,7 +29,7 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. Activating this module also has the positive side effect of disabling SSL/TLS -encryption in Python 2 (see `CRIME attack`_). +compression in Python 2 (see `CRIME attack`_). If you want to configure the default list of supported cipher suites, you can set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. @@ -199,8 +199,21 @@ class WrappedSocket(object): def settimeout(self, timeout): return self.socket.settimeout(timeout) + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + except OpenSSL.SSL.WantWriteError: + _, wlist, _ = select.select([], [self.socket], [], + self.socket.gettimeout()) + if not wlist: + raise timeout() + continue + def sendall(self, data): - return self.connection.sendall(data) + while len(data): + sent = self._send_until_done(data) + data = data[sent:] def close(self): if self._makefile_refs < 1: @@ -248,6 +261,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ssl_version=None): ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) if certfile: + keyfile = keyfile or certfile # Match behaviour of the normal python ssl library ctx.use_certificate_file(certfile) if keyfile: ctx.use_privatekey_file(keyfile) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 7519ba98..0c6fd3c5 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -72,11 +72,8 @@ class MaxRetryError(RequestError): def __init__(self, pool, url, reason=None): self.reason = reason - message = "Max retries exceeded with url: %s" % url - if reason: - message += " (Caused by %r)" % reason - else: - message += " (Caused by redirect)" + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason) RequestError.__init__(self, pool, url, message) @@ -141,6 +138,12 @@ class LocationParseError(LocationValueError): self.location = location +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + class SecurityWarning(HTTPWarning): "Warned when perfoming security reducing actions" pass diff --git a/requests/packages/urllib3/util/retry.py b/requests/packages/urllib3/util/retry.py index eb560dfc..aeaf8a02 100644 --- a/requests/packages/urllib3/util/retry.py +++ b/requests/packages/urllib3/util/retry.py @@ -2,10 +2,11 @@ import time import logging from ..exceptions import ( - ProtocolError, ConnectTimeoutError, - ReadTimeoutError, MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, ) from ..packages import six @@ -36,7 +37,6 @@ class Retry(object): Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless retries are disabled, in which case the causing exception will be raised. - :param int total: Total number of retries to allow. Takes precedence over other counts. @@ -184,8 +184,8 @@ class Retry(object): return isinstance(err, ConnectTimeoutError) def _is_read_error(self, err): - """ Errors that occur after the request has been started, so we can't - assume that the server did not process any of it. + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. """ return isinstance(err, (ReadTimeoutError, ProtocolError)) @@ -198,8 +198,7 @@ class Retry(object): return self.status_forcelist and status_code in self.status_forcelist def is_exhausted(self): - """ Are we out of retries? - """ + """ Are we out of retries? """ retry_counts = (self.total, self.connect, self.read, self.redirect) retry_counts = list(filter(None, retry_counts)) if not retry_counts: @@ -230,6 +229,7 @@ class Retry(object): connect = self.connect read = self.read redirect = self.redirect + cause = 'unknown' if error and self._is_connection_error(error): # Connect retry? @@ -251,10 +251,16 @@ class Retry(object): # Redirect retry? if redirect is not None: redirect -= 1 + cause = 'too many redirects' else: - # FIXME: Nothing changed, scenario doesn't make sense. + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist _observed_errors += 1 + cause = ResponseError.GENERIC_ERROR + if response and response.status: + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) new_retry = self.new( total=total, @@ -262,7 +268,7 @@ class Retry(object): _observed_errors=_observed_errors) if new_retry.is_exhausted(): - raise MaxRetryError(_pool, url, error) + raise MaxRetryError(_pool, url, error or ResponseError(cause)) log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) diff --git a/requests/packages/urllib3/util/url.py b/requests/packages/urllib3/util/url.py index 487d456c..b2ec834f 100644 --- a/requests/packages/urllib3/util/url.py +++ b/requests/packages/urllib3/util/url.py @@ -40,6 +40,48 @@ class Url(namedtuple('Url', url_attrs)): return '%s:%d' % (self.host, self.port) return self.host + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url def split_first(s, delims): """ @@ -84,7 +126,7 @@ def parse_url(url): Example:: >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) >>> parse_url('google.com:80') Url(scheme=None, host='google.com', port=80, path=None, ...) >>> parse_url('/foo?bar') @@ -162,7 +204,6 @@ def parse_url(url): return Url(scheme, auth, host, port, path, query, fragment) - def get_host(url): """ Deprecated. Use :func:`.parse_url` instead. From 2eb7e3c80b345f6c6e13fadf6f1ba98efccdd2e2 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Fri, 24 Oct 2014 22:23:15 -0500 Subject: [PATCH 3/4] Add last few changes and add a quick test --- requests/adapters.py | 10 +++++++--- requests/exceptions.py | 5 +++++ test_requests.py | 14 +++++++++++++- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/requests/adapters.py b/requests/adapters.py index df3345fd..723b8179 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -26,14 +26,15 @@ from .packages.urllib3.exceptions import ProxyError as _ProxyError from .packages.urllib3.exceptions import ProtocolError from .packages.urllib3.exceptions import ReadTimeoutError from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import ResponseError from .cookies import extract_cookies_to_jar from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError) + ProxyError, RetryError) from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 -DEFAULT_RETRIES = object() +DEFAULT_RETRIES = 0 class BaseAdapter(object): @@ -79,7 +80,7 @@ class HTTPAdapter(BaseAdapter): def __init__(self, pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES, pool_block=DEFAULT_POOLBLOCK): - if max_retries is DEFAULT_RETRIES: + if max_retries == DEFAULT_RETRIES: self.max_retries = Retry(0, read=False) else: self.max_retries = Retry.from_int(max_retries) @@ -415,6 +416,9 @@ class HTTPAdapter(BaseAdapter): if isinstance(e.reason, ConnectTimeoutError): raise ConnectTimeout(e, request=request) + if isinstance(e.reason, ResponseError): + raise RetryError(e, request=request) + raise ConnectionError(e, request=request) except _ProxyError as e: diff --git a/requests/exceptions.py b/requests/exceptions.py index 34c7a0db..89135a80 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -90,5 +90,10 @@ class ChunkedEncodingError(RequestException): class ContentDecodingError(RequestException, BaseHTTPError): """Failed to decode response content""" + class StreamConsumedError(RequestException, TypeError): """The content for this response was already consumed""" + + +class RetryError(RequestException): + """Custom retries logic failed""" diff --git a/test_requests.py b/test_requests.py index 4a05cb2e..4624f095 100755 --- a/test_requests.py +++ b/test_requests.py @@ -20,7 +20,7 @@ from requests.compat import ( from requests.cookies import cookiejar_from_dict, morsel_to_cookie from requests.exceptions import (ConnectionError, ConnectTimeout, InvalidSchema, InvalidURL, MissingSchema, - ReadTimeout, Timeout) + ReadTimeout, Timeout, RetryError) from requests.models import PreparedRequest from requests.structures import CaseInsensitiveDict from requests.sessions import SessionRedirectMixin @@ -1520,6 +1520,7 @@ def test_prepared_request_complete_copy(): ) assert_copy(p, p.copy()) + def test_prepare_unicode_url(): p = PreparedRequest() p.prepare( @@ -1529,5 +1530,16 @@ def test_prepare_unicode_url(): ) assert_copy(p, p.copy()) + +def test_urllib3_retries(): + from requests.packages.urllib3.util import Retry + s = requests.Session() + s.mount('https://', HTTPAdapter(max_retries=Retry( + total=2, status_forcelist=[500] + ))) + + with pytest.raises(RetryError): + s.get('https://httpbin.org/status/500') + if __name__ == '__main__': unittest.main() From adf475ef82cbd29f63814c0626f64926deb2355b Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 25 Oct 2014 08:31:03 -0500 Subject: [PATCH 4/4] Update HTTPAdapter docstring --- requests/adapters.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requests/adapters.py b/requests/adapters.py index 723b8179..c892853b 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -64,7 +64,9 @@ class HTTPAdapter(BaseAdapter): should attempt. Note, this applies only to failed DNS lookups, socket connections and connection timeouts, never to requests where data has made it to the server. By default, Requests does not retry failed - connections. + connections. If you need granular control over the conditions under + which we retry a request, import urllib3's ``Retry`` class and pass + that instead. :param pool_block: Whether the connection pool should block for connections. Usage::