diff --git a/HISTORY.rst b/HISTORY.rst index 8bdb6ab1..19c2ff1b 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,17 @@ Release History --------------- +2.3.0 (YYYY-MM-DD) +++++++++++++++++++ + +**API Changes** + +- New ``Response`` property ``is_redirect``, which is true when the + library could have processed this response as a redirection (whether + or not it actually did). +- The ``timeout`` parameter now affects requests with both ``stream=True`` and + ``stream=False`` equally. + 2.2.1 (2014-01-23) ++++++++++++++++++ @@ -128,6 +139,8 @@ Release History 1.2.1 (2013-05-20) ++++++++++++++++++ +- 301 and 302 redirects now change the verb to GET for all verbs, not just + POST, improving browser compatibility. - Python 3.3.2 compatibility - Always percent-encode location headers - Fix connection adapter matching to be most-specific first diff --git a/NOTICE b/NOTICE index 4d69475c..223101a0 100644 --- a/NOTICE +++ b/NOTICE @@ -24,8 +24,8 @@ FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TOR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -Charade License -================ +Chardet License +=============== This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -46,18 +46,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA CA Bundle License ================= -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -02110-1301 - +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/docs/api.rst b/docs/api.rst index 77f1f02d..86061be9 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -249,7 +249,9 @@ Behavioral Changes * Timeouts behave slightly differently. On streaming requests, the timeout only applies to the connection attempt. On regular requests, the timeout - is applied to the connection process and downloading the full body. + is applied to the connection process and on to all attempts to read data from + the underlying socket. It does *not* apply to the total download time for the + request. :: diff --git a/docs/community/faq.rst b/docs/community/faq.rst index edbf9b70..4e792eca 100644 --- a/docs/community/faq.rst +++ b/docs/community/faq.rst @@ -60,3 +60,26 @@ supported: * Python 3.2 * Python 3.3 * PyPy 1.9 + +What are "hostname doesn't match" errors? +----------------------------------------- + +These errors occur when :ref:`SSL certificate verification ` +fails to match the certificate the server responds with to the hostname +Requests thinks it's contacting. If you're certain the server's SSL setup is +correct (for example, because you can visit the site with your browser) and +you're using Python 2.6 or 2.7, a possible explanation is that you need +Server-Name-Indication. + +`Server-Name-Indication`_, or SNI, is an official extension to SSL where the +client tells the server what hostname it is contacting. This enables `virtual +hosting`_ on SSL protected sites, the server being to able to respond with a +certificate appropriate for the hostname the client is contacting. + +Python3's SSL module includes native support for SNI. This support has not been +back ported to Python2. For information on using SNI with Requests on Python2 +refer to this `Stack Overflow answer`_. + +.. _`Server-Name-Indication`: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _`virtual hosting`: https://en.wikipedia.org/wiki/Virtual_hosting +.. _`Stack Overflow answer`: https://stackoverflow.com/questions/18578439/using-requests-with-tls-doesnt-give-sni-support/18579484#18579484 diff --git a/docs/user/advanced.rst b/docs/user/advanced.rst index f5e8e59f..80c1e6ae 100644 --- a/docs/user/advanced.rst +++ b/docs/user/advanced.rst @@ -145,6 +145,8 @@ applied, replace the call to :meth:`Request.prepare() print(resp.status_code) +.. _verification: + SSL Cert Verification --------------------- diff --git a/requests/__init__.py b/requests/__init__.py index 2e9f3a0b..bba19002 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -42,8 +42,8 @@ is at . """ __title__ = 'requests' -__version__ = '2.2.1' -__build__ = 0x020201 +__version__ = '2.3.0' +__build__ = 0x020300 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2014 Kenneth Reitz' diff --git a/requests/adapters.py b/requests/adapters.py index dd10e959..28bea07c 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -310,10 +310,7 @@ class HTTPAdapter(BaseAdapter): chunked = not (request.body is None or 'Content-Length' in request.headers) - if stream: - timeout = TimeoutSauce(connect=timeout) - else: - timeout = TimeoutSauce(connect=timeout, read=timeout) + timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: @@ -372,19 +369,19 @@ class HTTPAdapter(BaseAdapter): conn._put_conn(low_conn) except socket.error as sockerr: - raise ConnectionError(sockerr) + raise ConnectionError(sockerr, request=request) except MaxRetryError as e: - raise ConnectionError(e) + raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): - raise SSLError(e) + raise SSLError(e, request=request) elif isinstance(e, TimeoutError): - raise Timeout(e) + raise Timeout(e, request=request) else: raise diff --git a/requests/api.py b/requests/api.py index baf43dd6..01d853d5 100644 --- a/requests/api.py +++ b/requests/api.py @@ -26,7 +26,7 @@ def request(method, url, **kwargs): :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. + :param timeout: (optional) Float describing the timeout of the request in seconds. :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. diff --git a/requests/auth.py b/requests/auth.py index 6664cd80..9f831b7a 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -11,7 +11,6 @@ import os import re import time import hashlib -import logging from base64 import b64encode @@ -19,8 +18,6 @@ from .compat import urlparse, str from .cookies import extract_cookies_to_jar from .utils import parse_dict_header -log = logging.getLogger(__name__) - CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' CONTENT_TYPE_MULTI_PART = 'multipart/form-data' diff --git a/requests/exceptions.py b/requests/exceptions.py index cd3c7600..a4ee9d63 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -14,15 +14,22 @@ class RequestException(IOError): """There was an ambiguous exception that occurred while handling your request.""" + def __init__(self, *args, **kwargs): + """ + Initialize RequestException with `request` and `response` objects. + """ + response = kwargs.pop('response', None) + self.response = response + self.request = kwargs.pop('request', None) + if (response is not None and not self.request and + hasattr(response, 'request')): + self.request = self.response.request + super(RequestException, self).__init__(*args, **kwargs) + class HTTPError(RequestException): """An HTTP error occurred.""" - def __init__(self, *args, **kwargs): - """ Initializes HTTPError with optional `response` object. """ - self.response = kwargs.pop('response', None) - super(HTTPError, self).__init__(*args, **kwargs) - class ConnectionError(RequestException): """A Connection error occurred.""" diff --git a/requests/models.py b/requests/models.py index 014ca789..e2fa09f8 100644 --- a/requests/models.py +++ b/requests/models.py @@ -8,7 +8,6 @@ This module contains the primary objects that power Requests. """ import collections -import logging import datetime from io import BytesIO, UnsupportedOperation @@ -31,12 +30,20 @@ from .utils import ( from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, is_py2, chardet, json, builtin_str, basestring, IncompleteRead) +from .status_codes import codes +#: The set of HTTP status codes that indicate an automatically +#: processable redirect. +REDIRECT_STATI = ( + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 + codes.temporary_moved, # 307 +) +DEFAULT_REDIRECT_LIMIT = 30 CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 -log = logging.getLogger(__name__) - class RequestEncodingMixin(object): @property @@ -517,7 +524,7 @@ class Response(object): self._content = False self._content_consumed = False - #: Integer Code of responded HTTP Status. + #: Integer Code of responded HTTP Status, e.g. 404 or 200. self.status_code = None #: Case-insensitive Dictionary of Response Headers. @@ -541,6 +548,7 @@ class Response(object): #: up here. The list is sorted from the oldest to the most recent request. self.history = [] + #: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK". self.reason = None #: A CookieJar of Cookies the server sent back. @@ -567,6 +575,7 @@ class Response(object): # pickled objects do not have .raw setattr(self, '_content_consumed', True) + setattr(self, 'raw', None) def __repr__(self): return '' % (self.status_code) @@ -591,10 +600,16 @@ class Response(object): return False return True + @property + def is_redirect(self): + """True if this Response is a well-formed HTTP redirect that could have + been processed automatically (by :meth:`Session.resolve_redirects`). + """ + return ('location' in self.headers and self.status_code in REDIRECT_STATI) + @property def apparent_encoding(self): - """The apparent encoding, provided by the lovely Charade library - (Thanks, Ian!).""" + """The apparent encoding, provided by the chardet library""" return chardet.detect(self.content)['encoding'] def iter_content(self, chunk_size=1, decode_unicode=False): @@ -735,7 +750,14 @@ class Response(object): # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: - return json.loads(self.content.decode(encoding), **kwargs) + try: + return json.loads(self.content.decode(encoding), **kwargs) + except UnicodeDecodeError: + # Wrong UTF codec detected; usually because it's not UTF-8 + # but some other 8-bit codec. This is an RFC violation, + # and the server didn't bother to tell us what codec *was* + # used. + pass return json.loads(self.text, **kwargs) @property diff --git a/requests/sessions.py b/requests/sessions.py index 2236e83f..79ea7773 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -12,10 +12,11 @@ import os from collections import Mapping from datetime import datetime +from .auth import _basic_auth_str from .compat import cookielib, OrderedDict, urljoin, urlparse, builtin_str from .cookies import ( cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) -from .models import Request, PreparedRequest +from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT from .hooks import default_hooks, dispatch_hook from .utils import to_key_val_list, default_headers, to_native_string from .exceptions import TooManyRedirects, InvalidSchema @@ -23,16 +24,15 @@ from .structures import CaseInsensitiveDict from .adapters import HTTPAdapter -from .utils import requote_uri, get_environ_proxies, get_netrc_auth +from .utils import ( + requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, + get_auth_from_url +) from .status_codes import codes -REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 - codes.temporary_moved, # 307 -) -DEFAULT_REDIRECT_LIMIT = 30 + +# formerly defined here, reexposed here for backward compatibility +from .models import REDIRECT_STATI def merge_setting(request_setting, session_setting, dict_class=OrderedDict): @@ -63,6 +63,8 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): if v is None: del merged_setting[k] + merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None) + return merged_setting @@ -89,8 +91,7 @@ class SessionRedirectMixin(object): i = 0 - # ((resp.status_code is codes.see_other)) - while ('location' in resp.headers and resp.status_code in REDIRECT_STATI): + while resp.is_redirect: prepared_request = req.copy() resp.content # Consume socket so it can be released @@ -157,19 +158,9 @@ class SessionRedirectMixin(object): prepared_request._cookies.update(self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) - if 'Authorization' in headers: - # If we get redirected to a new host, we should strip out any - # authentication headers. - original_parsed = urlparse(resp.request.url) - redirect_parsed = urlparse(url) - - if (original_parsed.hostname != redirect_parsed.hostname): - del headers['Authorization'] - - # .netrc might have more auth for us. - new_auth = get_netrc_auth(url) if self.trust_env else None - if new_auth is not None: - prepared_request.prepare_auth(new_auth) + # Rebuild auth and proxy information. + proxies = self.rebuild_proxies(prepared_request, proxies) + self.rebuild_auth(prepared_request, resp) resp = self.send( prepared_request, @@ -186,6 +177,68 @@ class SessionRedirectMixin(object): i += 1 yield resp + def rebuild_auth(self, prepared_request, response): + """ + When being redirected we may want to strip authentication from the + request to avoid leaking credentials. This method intelligently removes + and reapplies authentication where possible to avoid credential loss. + """ + headers = prepared_request.headers + url = prepared_request.url + + if 'Authorization' in headers: + # If we get redirected to a new host, we should strip out any + # authentication headers. + original_parsed = urlparse(response.request.url) + redirect_parsed = urlparse(url) + + if (original_parsed.hostname != redirect_parsed.hostname): + del headers['Authorization'] + + # .netrc might have more auth for us on our new host. + new_auth = get_netrc_auth(url) if self.trust_env else None + if new_auth is not None: + prepared_request.prepare_auth(new_auth) + + return + + def rebuild_proxies(self, prepared_request, proxies): + """ + This method re-evaluates the proxy configuration by considering the + environment variables. If we are redirected to a URL covered by + NO_PROXY, we strip the proxy configuration. Otherwise, we set missing + proxy keys for this URL (in case they were stripped by a previous + redirect). + + This method also replaces the Proxy-Authorization header where + necessary. + """ + headers = prepared_request.headers + url = prepared_request.url + new_proxies = {} + + if not should_bypass_proxies(url): + environ_proxies = get_environ_proxies(url) + scheme = urlparse(url).scheme + + proxy = environ_proxies.get(scheme) + + if proxy: + new_proxies.setdefault(scheme, environ_proxies[scheme]) + + if 'Proxy-Authorization' in headers: + del headers['Proxy-Authorization'] + + try: + username, password = get_auth_from_url(new_proxies[scheme]) + except KeyError: + username, password = None, None + + if username and password: + headers['Proxy-Authorization'] = _basic_auth_str(username, password) + + return new_proxies + class Session(SessionRedirectMixin): """A Requests session. @@ -333,7 +386,7 @@ class Session(SessionRedirectMixin): :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) Float describing the timeout of the - request. + request in seconds. :param allow_redirects: (optional) Boolean. Set to True by default. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. diff --git a/requests/utils.py b/requests/utils.py index 7b7ff0a7..6f4eb500 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -466,9 +466,10 @@ def is_valid_cidr(string_network): return True -def get_environ_proxies(url): - """Return a dict of environment proxies.""" - +def should_bypass_proxies(url): + """ + Returns whether we should bypass proxies or not. + """ get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL @@ -486,13 +487,13 @@ def get_environ_proxies(url): for proxy_ip in no_proxy: if is_valid_cidr(proxy_ip): if address_in_network(ip, proxy_ip): - return {} + return True else: for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. - return {} + return True # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. @@ -506,12 +507,16 @@ def get_environ_proxies(url): bypass = False if bypass: - return {} + return True - # If we get here, we either didn't have no_proxy set or we're not going - # anywhere that no_proxy applies to, and the system settings don't require - # bypassing the proxy for the current URL. - return getproxies() + return False + +def get_environ_proxies(url): + """Return a dict of environment proxies.""" + if should_bypass_proxies(url): + return {} + else: + return getproxies() def default_user_agent(name="python-requests"): @@ -548,7 +553,7 @@ def default_user_agent(name="python-requests"): def default_headers(): return CaseInsensitiveDict({ 'User-Agent': default_user_agent(), - 'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')), + 'Accept-Encoding': ', '.join(('gzip', 'deflate')), 'Accept': '*/*' }) diff --git a/test_requests.py b/test_requests.py index ee9c7b78..1bebb1ad 100755 --- a/test_requests.py +++ b/test_requests.py @@ -17,6 +17,7 @@ from requests.compat import ( Morsel, cookielib, getproxies, str, urljoin, urlparse) from requests.cookies import cookiejar_from_dict, morsel_to_cookie from requests.exceptions import InvalidURL, MissingSchema +from requests.models import PreparedRequest, Response from requests.structures import CaseInsensitiveDict try: @@ -115,6 +116,8 @@ class RequestsTestCase(unittest.TestCase): def test_HTTP_302_ALLOW_REDIRECT_GET(self): r = requests.get(httpbin('redirect', '1')) assert r.status_code == 200 + assert r.history[0].status_code == 302 + assert r.history[0].is_redirect # def test_HTTP_302_ALLOW_REDIRECT_POST(self): # r = requests.post(httpbin('status', '302'), data={'some': 'data'}) @@ -209,6 +212,14 @@ class RequestsTestCase(unittest.TestCase): req_urls = [r.request.url for r in resp.history] assert urls == req_urls + def test_headers_on_session_with_None_are_not_sent(self): + """Do not send headers in Session.headers with None values.""" + ses = requests.Session() + ses.headers['Accept-Encoding'] = None + req = requests.Request('GET', 'http://httpbin.org/get') + prep = ses.prepare_request(req) + assert 'Accept-Encoding' not in prep.headers + def test_user_agent_transfers(self): heads = { @@ -855,6 +866,22 @@ class RequestsTestCase(unittest.TestCase): preq = req.prepare() assert test_url == preq.url + def test_auth_is_stripped_on_redirect_off_host(self): + r = requests.get( + httpbin('redirect-to'), + params={'url': 'http://www.google.co.uk'}, + auth=('user', 'pass'), + ) + assert r.history[0].request.headers['Authorization'] + assert not r.request.headers.get('Authorization', '') + + def test_auth_is_retained_for_redirect_on_host(self): + r = requests.get(httpbin('redirect/1'), auth=('user', 'pass')) + h1 = r.history[0].request.headers['Authorization'] + h2 = r.request.headers['Authorization'] + + assert h1 == h2 + class TestContentEncodingDetection(unittest.TestCase): @@ -1169,5 +1196,13 @@ class TestMorselToCookieMaxAge(unittest.TestCase): morsel_to_cookie(morsel) +class TestTimeout: + def test_stream_timeout(self): + try: + r = requests.get('https://httpbin.org/delay/10', timeout=5.0) + except requests.exceptions.Timeout as e: + assert 'Read timed out' in e.args[0].args[0] + + if __name__ == '__main__': unittest.main()