diff --git a/requests/__init__.py b/requests/__init__.py index d26abb2a..5a4dccd4 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -55,6 +55,12 @@ try: except ImportError: pass +import warnings + +# urllib3's DependencyWarnings should be silenced. +from .packages.urllib3.exceptions import DependencyWarning +warnings.simplefilter('ignore', DependencyWarning) + from . import utils from .models import Request, Response, PreparedRequest from .api import request, get, head, post, patch, put, delete, options diff --git a/requests/adapters.py b/requests/adapters.py index db62c09c..23e448f4 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -19,7 +19,7 @@ from .packages.urllib3.util.retry import Retry from .compat import urlparse, basestring from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, prepend_scheme_if_needed, get_auth_from_url, urldefragauth, - select_proxy) + select_proxy, to_native_string) from .structures import CaseInsensitiveDict from .packages.urllib3.exceptions import ClosedPoolError from .packages.urllib3.exceptions import ConnectTimeoutError @@ -33,9 +33,15 @@ from .packages.urllib3.exceptions import SSLError as _SSLError from .packages.urllib3.exceptions import ResponseError from .cookies import extract_cookies_to_jar from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError, RetryError) + ProxyError, RetryError, InvalidSchema) from .auth import _basic_auth_str +try: + from .packages.urllib3.contrib.socks import SOCKSProxyManager +except ImportError: + def SOCKSProxyManager(*args, **kwargs): + raise InvalidSchema("Missing dependencies for SOCKS support.") + DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 DEFAULT_RETRIES = 0 @@ -149,9 +155,22 @@ class HTTPAdapter(BaseAdapter): :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager. :returns: ProxyManager """ - if not proxy in self.proxy_manager: + if proxy in self.proxy_manager: + manager = self.proxy_manager[proxy] + elif proxy.lower().startswith('socks'): + username, password = get_auth_from_url(proxy) + manager = self.proxy_manager[proxy] = SOCKSProxyManager( + proxy, + username=username, + password=password, + num_pools=self._pool_connections, + maxsize=self._pool_maxsize, + block=self._pool_block, + **proxy_kwargs + ) + else: proxy_headers = self.proxy_headers(proxy) - self.proxy_manager[proxy] = proxy_from_url( + manager = self.proxy_manager[proxy] = proxy_from_url( proxy, proxy_headers=proxy_headers, num_pools=self._pool_connections, @@ -159,7 +178,7 @@ class HTTPAdapter(BaseAdapter): block=self._pool_block, **proxy_kwargs) - return self.proxy_manager[proxy] + return manager def cert_verify(self, conn, url, verify, cert): """Verify a SSL certificate. This method should not be called from user @@ -286,10 +305,16 @@ class HTTPAdapter(BaseAdapter): """ proxy = select_proxy(request.url, proxies) scheme = urlparse(request.url).scheme - if proxy and scheme != 'https': + + is_proxied_http_request = (proxy and scheme != 'https') + using_socks_proxy = False + if proxy: + proxy_scheme = urlparse(proxy).scheme.lower() + using_socks_proxy = proxy_scheme.startswith('socks') + + url = request.path_url + if is_proxied_http_request and not using_socks_proxy: url = urldefragauth(request.url) - else: - url = request.path_url return url diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index e43991a9..a5ad1332 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -1,7 +1,6 @@ """ urllib3 - Thread-safe connection pooling and re-using. """ - from __future__ import absolute_import import warnings @@ -32,7 +31,7 @@ except ImportError: __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.13.1' +__version__ = 'dev' __all__ = ( 'HTTPConnectionPool', @@ -68,7 +67,7 @@ def add_stderr_logger(level=logging.DEBUG): handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) logger.addHandler(handler) logger.setLevel(level) - logger.debug('Added a stderr logging handler to logger: %s' % __name__) + logger.debug('Added a stderr logging handler to logger: %s', __name__) return handler # ... Clean up. diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 7d220b13..9f415052 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -203,8 +203,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Return a fresh :class:`HTTPConnection`. """ self.num_connections += 1 - log.info("Starting new HTTP connection (%d): %s" % - (self.num_connections, self.host)) + log.info("Starting new HTTP connection (%d): %s", + self.num_connections, self.host) conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, @@ -239,7 +239,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # If this is a persistent connection, check if it got disconnected if conn and is_connection_dropped(conn): - log.info("Resetting dropped connection: %s" % self.host) + log.info("Resetting dropped connection: %s", self.host) conn.close() if getattr(conn, 'auto_open', 1) == 0: # This is a proxied connection that has been mutated by @@ -272,7 +272,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): except Full: # This should never happen if self.block == True log.warning( - "Connection pool is full, discarding connection: %s" % + "Connection pool is full, discarding connection: %s", self.host) # Connection never got put back into the pool, close it. @@ -382,9 +382,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') - log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, - httplib_response.status, - httplib_response.length)) + log.debug("\"%s %s %s\" %s %s", method, url, http_version, + httplib_response.status, httplib_response.length) try: assert_header_parsing(httplib_response.msg) @@ -622,7 +621,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again log.warning("Retrying (%r) after connection " - "broken by '%r': %s" % (retries, err, url)) + "broken by '%r': %s", retries, err, url) return self.urlopen(method, url, body, headers, retries, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, @@ -644,7 +643,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise return response - log.info("Redirecting %s -> %s" % (url, redirect_location)) + log.info("Redirecting %s -> %s", url, redirect_location) return self.urlopen( method, redirect_location, body, headers, retries=retries, redirect=redirect, @@ -656,7 +655,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if retries.is_forced_retry(method, status_code=response.status): retries = retries.increment(method, url, response=response, _pool=self) retries.sleep() - log.info("Forced retry: %s" % url) + log.info("Forced retry: %s", url) return self.urlopen( method, url, body, headers, retries=retries, redirect=redirect, @@ -754,8 +753,8 @@ class HTTPSConnectionPool(HTTPConnectionPool): Return a fresh :class:`httplib.HTTPSConnection`. """ self.num_connections += 1 - log.info("Starting new HTTPS connection (%d): %s" - % (self.num_connections, self.host)) + log.info("Starting new HTTPS connection (%d): %s", + self.num_connections, self.host) if not self.ConnectionCls or self.ConnectionCls is DummyConnection: raise SSLError("Can't connect to HTTPS URL because the SSL " diff --git a/requests/packages/urllib3/contrib/appengine.py b/requests/packages/urllib3/contrib/appengine.py index 4f8f1312..1579476c 100644 --- a/requests/packages/urllib3/contrib/appengine.py +++ b/requests/packages/urllib3/contrib/appengine.py @@ -144,7 +144,7 @@ class AppEngineManager(RequestMethods): if retries.is_forced_retry(method, status_code=http_response.status): retries = retries.increment( method, url, response=http_response, _pool=self) - log.info("Forced retry: %s" % url) + log.info("Forced retry: %s", url) retries.sleep() return self.urlopen( method, url, @@ -164,6 +164,14 @@ class AppEngineManager(RequestMethods): if content_encoding == 'deflate': del urlfetch_resp.headers['content-encoding'] + transfer_encoding = urlfetch_resp.headers.get('transfer-encoding') + # We have a full response's content, + # so let's make sure we don't report ourselves as chunked data. + if transfer_encoding == 'chunked': + encodings = transfer_encoding.split(",") + encodings.remove('chunked') + urlfetch_resp.headers['transfer-encoding'] = ','.join(encodings) + return HTTPResponse( # In order for decoding to work, we must present the content as # a file-like object. @@ -177,7 +185,7 @@ class AppEngineManager(RequestMethods): if timeout is Timeout.DEFAULT_TIMEOUT: return 5 # 5s is the default timeout for URLFetch. if isinstance(timeout, Timeout): - if timeout.read is not timeout.connect: + if timeout._read is not timeout._connect: warnings.warn( "URLFetch does not support granular timeout settings, " "reverting to total timeout.", AppEnginePlatformWarning) diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py index c136a238..11d0b5c3 100644 --- a/requests/packages/urllib3/contrib/ntlmpool.py +++ b/requests/packages/urllib3/contrib/ntlmpool.py @@ -43,8 +43,8 @@ class NTLMConnectionPool(HTTPSConnectionPool): # Performs the NTLM handshake that secures the connection. The socket # must be kept open while requests are performed. self.num_connections += 1 - log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % - (self.num_connections, self.host, self.authurl)) + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s', + self.num_connections, self.host, self.authurl) headers = {} headers['Connection'] = 'Keep-Alive' @@ -56,13 +56,13 @@ class NTLMConnectionPool(HTTPSConnectionPool): # Send negotiation message headers[req_header] = ( 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) - log.debug('Request headers: %s' % headers) + log.debug('Request headers: %s', headers) conn.request('GET', self.authurl, None, headers) res = conn.getresponse() reshdr = dict(res.getheaders()) - log.debug('Response status: %s %s' % (res.status, res.reason)) - log.debug('Response headers: %s' % reshdr) - log.debug('Response data: %s [...]' % res.read(100)) + log.debug('Response status: %s %s', res.status, res.reason) + log.debug('Response headers: %s', reshdr) + log.debug('Response data: %s [...]', res.read(100)) # Remove the reference to the socket, so that it can not be closed by # the response object (we want to keep the socket open) @@ -87,12 +87,12 @@ class NTLMConnectionPool(HTTPSConnectionPool): self.pw, NegotiateFlags) headers[req_header] = 'NTLM %s' % auth_msg - log.debug('Request headers: %s' % headers) + log.debug('Request headers: %s', headers) conn.request('GET', self.authurl, None, headers) res = conn.getresponse() - log.debug('Response status: %s %s' % (res.status, res.reason)) - log.debug('Response headers: %s' % dict(res.getheaders())) - log.debug('Response data: %s [...]' % res.read()[:100]) + log.debug('Response status: %s %s', res.status, res.reason) + log.debug('Response headers: %s', dict(res.getheaders())) + log.debug('Response data: %s [...]', res.read()[:100]) if res.status != 200: if res.status == 401: raise Exception('Server rejected request: wrong ' diff --git a/requests/packages/urllib3/contrib/socks.py b/requests/packages/urllib3/contrib/socks.py new file mode 100644 index 00000000..3748fee5 --- /dev/null +++ b/requests/packages/urllib3/contrib/socks.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +""" +SOCKS support for urllib3 +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This contrib module contains provisional support for SOCKS proxies from within +urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and +SOCKS5. To enable its functionality, either install PySocks or install this +module with the ``socks`` extra. + +Known Limitations: + +- Currently PySocks does not support contacting remote websites via literal + IPv6 addresses. Any such connection attempt will fail. +- Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any + such connection attempt will fail. +""" +from __future__ import absolute_import + +try: + import socks +except ImportError: + import warnings + from ..exceptions import DependencyWarning + + warnings.warn(( + 'SOCKS support in urllib3 requires the installation of optional ' + 'dependencies: specifically, PySocks. For more information, see ' + 'https://urllib3.readthedocs.org/en/latest/contrib.html#socks-proxies' + ), + DependencyWarning + ) + raise + +from socket import error as SocketError, timeout as SocketTimeout + +from ..connection import ( + HTTPConnection, HTTPSConnection +) +from ..connectionpool import ( + HTTPConnectionPool, HTTPSConnectionPool +) +from ..exceptions import ConnectTimeoutError, NewConnectionError +from ..poolmanager import PoolManager +from ..util.url import parse_url + +try: + import ssl +except ImportError: + ssl = None + + +class SOCKSConnection(HTTPConnection): + """ + A plain-text HTTP connection that connects via a SOCKS proxy. + """ + def __init__(self, *args, **kwargs): + self._socks_options = kwargs.pop('_socks_options') + super(SOCKSConnection, self).__init__(*args, **kwargs) + + def _new_conn(self): + """ + Establish a new connection via the SOCKS proxy. + """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + + try: + conn = socks.create_connection( + (self.host, self.port), + proxy_type=self._socks_options['socks_version'], + proxy_addr=self._socks_options['proxy_host'], + proxy_port=self._socks_options['proxy_port'], + proxy_username=self._socks_options['username'], + proxy_password=self._socks_options['password'], + timeout=self.timeout, + **extra_kw + ) + + except SocketTimeout as e: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + except socks.ProxyError as e: + # This is fragile as hell, but it seems to be the only way to raise + # useful errors here. + if e.socket_err: + error = e.socket_err + if isinstance(error, SocketTimeout): + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout) + ) + else: + raise NewConnectionError( + self, + "Failed to establish a new connection: %s" % error + ) + else: + raise NewConnectionError( + self, + "Failed to establish a new connection: %s" % e + ) + + except SocketError as e: # Defensive: PySocks should catch all these. + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e) + + return conn + + +# We don't need to duplicate the Verified/Unverified distinction from +# urllib3/connection.py here because the HTTPSConnection will already have been +# correctly set to either the Verified or Unverified form by that module. This +# means the SOCKSHTTPSConnection will automatically be the correct type. +class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection): + pass + + +class SOCKSHTTPConnectionPool(HTTPConnectionPool): + ConnectionCls = SOCKSConnection + + +class SOCKSHTTPSConnectionPool(HTTPSConnectionPool): + ConnectionCls = SOCKSHTTPSConnection + + +class SOCKSProxyManager(PoolManager): + """ + A version of the urllib3 ProxyManager that routes connections via the + defined SOCKS proxy. + """ + pool_classes_by_scheme = { + 'http': SOCKSHTTPConnectionPool, + 'https': SOCKSHTTPSConnectionPool, + } + + def __init__(self, proxy_url, username=None, password=None, + num_pools=10, headers=None, **connection_pool_kw): + parsed = parse_url(proxy_url) + + if parsed.scheme == 'socks5': + socks_version = socks.PROXY_TYPE_SOCKS5 + elif parsed.scheme == 'socks4': + socks_version = socks.PROXY_TYPE_SOCKS4 + else: + raise ValueError( + "Unable to determine SOCKS version from %s" % proxy_url + ) + + self.proxy_url = proxy_url + + socks_options = { + 'socks_version': socks_version, + 'proxy_host': parsed.host, + 'proxy_port': parsed.port, + 'username': username, + 'password': password, + } + connection_pool_kw['_socks_options'] = socks_options + + super(SOCKSProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw + ) + + self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 8e07eb61..f2e65917 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -180,6 +180,14 @@ class SNIMissingWarning(HTTPWarning): pass +class DependencyWarning(HTTPWarning): + """ + Warned when an attempt is made to import a module with missing optional + dependencies. + """ + pass + + class ResponseNotChunked(ProtocolError, ValueError): "Response needs to be chunked in order to read it as chunks." pass diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index f13e673d..1023dcba 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -18,16 +18,16 @@ from .util.retry import Retry __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] -pool_classes_by_scheme = { - 'http': HTTPConnectionPool, - 'https': HTTPSConnectionPool, -} - log = logging.getLogger(__name__) SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', 'ssl_version', 'ca_cert_dir') +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, + 'https': HTTPSConnectionPool, +} + class PoolManager(RequestMethods): """ @@ -65,6 +65,9 @@ class PoolManager(RequestMethods): self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + # Locally set the pool classes so other PoolManagers can override them. + self.pool_classes_by_scheme = pool_classes_by_scheme + def __enter__(self): return self @@ -81,7 +84,7 @@ class PoolManager(RequestMethods): by :meth:`connection_from_url` and companion methods. It is intended to be overridden for customization. """ - pool_cls = pool_classes_by_scheme[scheme] + pool_cls = self.pool_classes_by_scheme[scheme] kwargs = self.connection_pool_kw if scheme == 'http': kwargs = self.connection_pool_kw.copy() @@ -186,7 +189,7 @@ class PoolManager(RequestMethods): kw['retries'] = retries kw['redirect'] = redirect - log.info("Redirecting %s -> %s" % (url, redirect_location)) + log.info("Redirecting %s -> %s", url, redirect_location) return self.urlopen(method, redirect_location, **kw) diff --git a/requests/packages/urllib3/util/response.py b/requests/packages/urllib3/util/response.py index bc723272..0b5c75c1 100644 --- a/requests/packages/urllib3/util/response.py +++ b/requests/packages/urllib3/util/response.py @@ -61,7 +61,7 @@ def assert_header_parsing(headers): def is_response_to_head(response): """ - Checks, wether a the request of a response has been a HEAD-request. + Checks whether the request of a response has been a HEAD-request. Handles the quirks of AppEngine. :param conn: diff --git a/requests/packages/urllib3/util/retry.py b/requests/packages/urllib3/util/retry.py index 03a01249..862174ab 100644 --- a/requests/packages/urllib3/util/retry.py +++ b/requests/packages/urllib3/util/retry.py @@ -153,7 +153,7 @@ class Retry(object): redirect = bool(redirect) and None new_retries = cls(retries, redirect=redirect) - log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) + log.debug("Converted retries value: %r -> %r", retries, new_retries) return new_retries def get_backoff_time(self): @@ -272,7 +272,7 @@ class Retry(object): if new_retry.is_exhausted(): raise MaxRetryError(_pool, url, error or ResponseError(cause)) - log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) + log.debug("Incremented Retry for (url='%s'): %r", url, new_retry) return new_retry diff --git a/requests/packages/urllib3/util/ssl_.py b/requests/packages/urllib3/util/ssl_.py index dc01645c..780dbc8c 100644 --- a/requests/packages/urllib3/util/ssl_.py +++ b/requests/packages/urllib3/util/ssl_.py @@ -110,7 +110,7 @@ except ImportError: ) self.ciphers = cipher_suite - def wrap_socket(self, socket, server_hostname=None): + def wrap_socket(self, socket, server_hostname=None, server_side=False): warnings.warn( 'A true SSLContext object is not available. This prevents ' 'urllib3 from configuring SSL appropriately and may cause ' @@ -125,6 +125,7 @@ except ImportError: 'ca_certs': self.ca_certs, 'cert_reqs': self.verify_mode, 'ssl_version': self.protocol, + 'server_side': server_side, } if self.supports_set_ciphers: # Platform-specific: Python 2.7+ return wrap_socket(socket, ciphers=self.ciphers, **kwargs) diff --git a/setup.py b/setup.py index 5991f93e..3a390522 100755 --- a/setup.py +++ b/setup.py @@ -93,5 +93,6 @@ setup( tests_require=test_requirements, extras_require={ 'security': ['pyOpenSSL>=0.13', 'ndg-httpsclient', 'pyasn1'], + 'socks': ['PySocks>=1.5.6'], }, )