From 2669ab797ce769ecedf5493b04cb976f33e37210 Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Wed, 29 Jun 2016 13:46:40 -0400 Subject: [PATCH] check and test for headers containing return characters or leading whitespace --- AUTHORS.rst | 1 + requests/exceptions.py | 6 +++++- requests/models.py | 12 ++++++++---- requests/utils.py | 20 +++++++++++++++++++- tests/test_requests.py | 43 +++++++++++++++++++++++++++++++++++++++++- 5 files changed, 75 insertions(+), 7 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 37b66698..b0ddcabb 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -166,3 +166,4 @@ Patches and Suggestions - Dmitry Dygalo (`@Stranger6667 `_) - piotrjurkiewicz - Jesse Shapiro (`@haikuginger `_) +- Nate Prewitt (`@nateprewitt `_) diff --git a/requests/exceptions.py b/requests/exceptions.py index ba0b910e..3f056492 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -80,7 +80,11 @@ class InvalidSchema(RequestException, ValueError): class InvalidURL(RequestException, ValueError): - """ The URL provided was somehow invalid. """ + """The URL provided was somehow invalid.""" + + +class InvalidHeader(RequestException, ValueError): + """The header value provided was somehow invalid.""" class ChunkedEncodingError(RequestException): diff --git a/requests/models.py b/requests/models.py index fbb3c7e6..369f790f 100644 --- a/requests/models.py +++ b/requests/models.py @@ -27,7 +27,8 @@ from .exceptions import ( from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, - iter_slices, guess_json_utf, super_len, to_native_string) + iter_slices, guess_json_utf, super_len, to_native_string, + check_header_validity) from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, is_py2, chardet, builtin_str, basestring) @@ -403,10 +404,13 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): def prepare_headers(self, headers): """Prepares the given HTTP headers.""" + self.headers = CaseInsensitiveDict() if headers: - self.headers = CaseInsensitiveDict((to_native_string(name), value) for name, value in headers.items()) - else: - self.headers = CaseInsensitiveDict() + for header in headers.items(): + # Raise exception on invalid header value. + check_header_validity(header) + name, value = header + self.headers[to_native_string(name)] = value def prepare_body(self, data, files, json=None): """Prepares the given HTTP body data.""" diff --git a/requests/utils.py b/requests/utils.py index 62d023fa..769b4012 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -27,7 +27,7 @@ from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, basestring) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict -from .exceptions import InvalidURL, FileModeWarning +from .exceptions import InvalidURL, InvalidHeader, FileModeWarning _hush_pyflakes = (RequestsCookieJar,) @@ -732,6 +732,24 @@ def to_native_string(string, encoding='ascii'): return out +# Moved outside of function to avoid recompile every call +_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$') +_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$') + +def check_header_validity(header): + """Verifies that header value doesn't contain leading whitespace or + return characters. This prevents unintended header injection. + + :param header: tuple, in the format (name, value). + """ + name, value = header + + if isinstance(value, bytes): + pat = _CLEAN_HEADER_REGEX_BYTE + else: + pat = _CLEAN_HEADER_REGEX_STR + if not pat.match(value): + raise InvalidHeader("Invalid return character or leading space in header: %s" % name) def urldefragauth(url): """ diff --git a/tests/test_requests.py b/tests/test_requests.py index 4393814f..5abe00aa 100755 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -23,7 +23,7 @@ from requests.cookies import cookiejar_from_dict, morsel_to_cookie from requests.exceptions import ( ConnectionError, ConnectTimeout, InvalidSchema, InvalidURL, MissingSchema, ReadTimeout, Timeout, RetryError, TooManyRedirects, - ProxyError) + ProxyError, InvalidHeader) from requests.models import PreparedRequest from requests.structures import CaseInsensitiveDict from requests.sessions import SessionRedirectMixin @@ -1128,6 +1128,47 @@ class TestRequests: assert 'unicode' in p.headers.keys() assert 'byte' in p.headers.keys() + def test_header_validation(self,httpbin): + """Ensure prepare_headers regex isn't flagging valid header contents.""" + headers_ok = {'foo': 'bar baz qux', + 'bar': '1', + 'baz': '', + 'qux': str.encode(u'fbbq')} + r = requests.get(httpbin('get'), headers=headers_ok) + assert r.request.headers['foo'] == headers_ok['foo'] + + def test_header_no_return_chars(self, httpbin): + """Ensure that a header containing return character sequences raise an + exception. Otherwise, multiple headers are created from single string. + """ + headers_ret = {'foo': 'bar\r\nbaz: qux'} + headers_lf = {'foo': 'bar\nbaz: qux'} + headers_cr = {'foo': 'bar\rbaz: qux'} + + # Test for newline + with pytest.raises(InvalidHeader): + r = requests.get(httpbin('get'), headers=headers_ret) + # Test for line feed + with pytest.raises(InvalidHeader): + r = requests.get(httpbin('get'), headers=headers_lf) + # Test for carriage return + with pytest.raises(InvalidHeader): + r = requests.get(httpbin('get'), headers=headers_cr) + + def test_header_no_leading_space(self, httpbin): + """Ensure headers containing leading whitespace raise + InvalidHeader Error before sending. + """ + headers_space = {'foo': ' bar'} + headers_tab = {'foo': ' bar'} + + # Test for whitespace + with pytest.raises(InvalidHeader): + r = requests.get(httpbin('get'), headers=headers_space) + # Test for tab + with pytest.raises(InvalidHeader): + r = requests.get(httpbin('get'), headers=headers_tab) + @pytest.mark.parametrize('files', ('foo', b'foo', bytearray(b'foo'))) def test_can_send_objects_with_files(self, httpbin, files): data = {'a': 'this is a string'}