diff --git a/3.0-HISTORY.rst b/3.0-HISTORY.rst index a65aad1f..f9b1ec07 100644 --- a/3.0-HISTORY.rst +++ b/3.0-HISTORY.rst @@ -23,9 +23,9 @@ - Streaming responses with ``Response.iter_lines`` or ``Response.iter_content`` now requires an encoding to be set if one isn't provided by the server. - -- Exception raised during read timeout for ``Response.iter_content`` and - ``Response.iter_lines`` changed from ``ConnectionError`` to more + +- Exception raised during read timeout for ``Response.iter_content`` and + ``Response.iter_lines`` changed from ``ConnectionError`` to more specific ``ReadTimeout``. - Raise exception if multiple locations are returned during a redirect. @@ -73,5 +73,9 @@ - ``Response.raise_for_status()`` now returns the response object for good responses +- Use ``HTTPHeaderDict`` for response headers, allowing easier access to + individual values when multiple response headers are sent using the same + header name. + .. _#2002: https://github.com/kennethreitz/requests/issues/2002 .. _#2631: https://github.com/kennethreitz/requests/issues/2631 diff --git a/AUTHORS.rst b/AUTHORS.rst index b35b2595..ef7ffccd 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -179,7 +179,11 @@ Patches and Suggestions - Shmuel Amar (`@shmuelamar `_) - Gary Wu (`@garywu `_) - Ryan Pineo (`@ryanpineo `_) +<<<<<<< HEAD - Ed Morley (`@edmorley `_) - Matt Liu (`@mlcrazy `_) - Taylor Hoff (`@PrimordialHelios `_) - Hugo van Kemenade (`@hugovk `_) +======= +- Allan Crooks (`@the-allanc `_) +>>>>>>> 4f0b496d2b3c91d72a951bebdd6be9cec33c17d6 diff --git a/docs/user/quickstart.rst b/docs/user/quickstart.rst index 109c3415..574ffcfc 100644 --- a/docs/user/quickstart.rst +++ b/docs/user/quickstart.rst @@ -420,6 +420,10 @@ represented in the dictionary within a single mapping, as per of the message, by appending each subsequent field value to the combined field value in order, separated by a comma. +If you do need to access each individual value sent with the same header, then +you can use the ``getlist`` method to get a sequence of all the values returned +for a particular header. + Cookies ------- diff --git a/requests/adapters.py b/requests/adapters.py index fd6c9e3e..2df54955 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -35,7 +35,7 @@ from .utils import ( urldefragauth, select_proxy, ) -from .structures import CaseInsensitiveDict +from .structures import HTTPHeaderDict from .cookies import extract_cookies_to_jar from .exceptions import ( ConnectionError, @@ -304,7 +304,8 @@ class HTTPAdapter(BaseAdapter): # Fallback to None if there's no status_code, for whatever reason. response.status_code = getattr(resp, 'status', None) # Make headers case-insensitive. - response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})) + response.headers = HTTPHeaderDict(getattr(resp, 'headers', {})) + # Set encoding. response.encoding = get_encoding_from_headers(response.headers) response.raw = resp diff --git a/requests/structures.py b/requests/structures.py index fb56a100..0011baed 100644 --- a/requests/structures.py +++ b/requests/structures.py @@ -8,6 +8,8 @@ Data structures that power Requests. import collections +from .compat import basestring, OrderedDict + class CaseInsensitiveDict(collections.MutableMapping): """A case-insensitive ``dict``-like object. @@ -35,7 +37,6 @@ class CaseInsensitiveDict(collections.MutableMapping): operations are given keys that have equal ``.lower()``s, the behavior is undefined. """ - def __init__(self, data=None, **kwargs): self._store = collections.OrderedDict() if data is None: @@ -83,6 +84,115 @@ class CaseInsensitiveDict(collections.MutableMapping): return str(dict(self.items())) +class HTTPHeaderDict(CaseInsensitiveDict): + """A case-insensitive ``dict``-like object suitable for HTTP headers that + supports multiple values with the same key, via the ``add``, ``extend``, + ``multiget`` and ``multiset`` methods. + """ + + def __init__(self, data=None, **kwargs): + super(HTTPHeaderDict, self).__init__() + self.extend({} if data is None else data, **kwargs) + + # + # We'll store tuples in the internal dictionary, but present them as a + # concatenated string when we use item access methods. + # + + def __setitem__(self, key, val): + if not isinstance(val, basestring): + raise ValueError('only string-type values are allowed') + super(HTTPHeaderDict, self).__setitem__(key, (val,)) + + def __getitem__(self, key): + return ', '.join(super(HTTPHeaderDict, self).__getitem__(key)) + + def lower_items(self): + return ( + (lk, ', '.join(vals)) + for (lk, (k, vals)) + in self._store.items() + ) + + def copy(self): + return type(self)(self) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key isn't present in the dictionary.""" + return list(self._store.get(key.lower(), (None, []))[1]) + + def setlist(self, key, values): + """Set a sequence of strings to the associated key - this will overwrite + any previously stored value.""" + if not isinstance(values, (list, tuple)): + raise ValueError('argument is not sequence') + if any(not isinstance(v, basestring) for v in values): + raise ValueError('non-string items in sequence') + if not values: + self.pop(key, None) + return + super(HTTPHeaderDict, self).__setitem__(key, tuple(values)) + + def _extend(self, key, values): + new_value_tpl = key, values + + # Inspired by urllib3's implementation - use one call which should be + # suitable for the common case. + old_value_tpl = self._store.setdefault(key.lower(), new_value_tpl) + if old_value_tpl is not new_value_tpl: + old_key, old_values = old_value_tpl + self._store[key.lower()] = (old_key, old_values + values) + + def add(self, key, val): + """Adds a key, value pair to this dictionary - if there is already a + value for this key, then the value will be appended to those values. + """ + if not isinstance(val, basestring): + raise ValueError('value must be a string-type object') + self._extend(key, (val,)) + + def extend(self, *args, **kwargs): + """Like update, but will add values to existing sequences rather than + replacing them. You can pass a mapping object or a sequence of two + tuples - values in these objects can be strings or sequence of strings. + """ + if len(args) > 1: + raise TypeError("extend() takes at most 1 positional " + "arguments ({0} given)".format(len(args))) + + for other in args + (kwargs,): + if isinstance(other, collections.Mapping): + + # See if looks like a HTTPHeaderDict (either urllib3's + # implementation or ours). If so, then we have to add values + # in one go for each key. + multiget = getattr(other, 'getlist', None) + if multiget: + for key in other: + self._extend(key, tuple(multiget(key))) + continue + + # Otherwise, just walk over items to get them. + item_seq = other.items() + else: + item_seq = other + + for ik, iv in item_seq: + if isinstance(iv, basestring): + self._extend(ik, (iv,)) + elif any(not isinstance(v, basestring) for v in iv): + raise ValueError('non-string items in sequence') + else: + self._extend(ik, tuple(iv)) + + def __repr__(self): + d = {} + for k, vals in self._store.values(): + d[k] = vals[0] if len(vals) == 1 else vals + return repr(d) + + class LookupDict(dict): """Dictionary lookup object.""" diff --git a/tests/test_requests.py b/tests/test_requests.py index 44f8e358..3e1b1db6 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -2,6 +2,7 @@ """Tests for Requests.""" from __future__ import division +import itertools import json import os import pickle @@ -2221,6 +2222,36 @@ class TestRequests: assert not r.history[1].is_redirect assert r.url == urls_test[2] + def test_multiple_response_headers_with_same_name_same_case(self, httpbin): + qs = 'Fruit=Apple&Fruit=Blood+Orange&Fruit=Banana&Fruit=Berry,+Blue' + resp = requests.get(httpbin('response-headers?' + qs)) + fruits = resp.headers['fruit'] + assert fruits == 'Apple, Blood Orange, Banana, Berry, Blue' + + # As we are using HTTPHeaderDict, we should be able to extract the + # individual header values too. + assert resp.headers.getlist('fruit') == [ + 'Apple', 'Blood Orange', 'Banana', 'Berry, Blue' + ] + + def test_multiple_response_headers_with_same_name_diff_case(self, httpbin): + # urllib3 seems to have trouble guaranteeing the order of the items when + # the case is different, so we just need to make sure all of the items + # are there, rather than asserting a particular order. + qs = 'Fruit=Apple&Fruit=Blood+Orange&Fruit=Banana&Fruit=Berry,+Blue' + resp = requests.get(httpbin('response-headers?' + qs)) + + # These are all possible acceptable combinations for the header. + fruit_choices = ['Apple', 'Blood Orange', 'Banana', 'Berry, Blue'] + fruit_permutations = itertools.permutations(fruit_choices) + fruit_multiheaders = [list(fp) for fp in fruit_permutations] + fruit_headers = set(', '.join(fp) for fp in fruit_multiheaders) + assert resp.headers['fruit'] in fruit_headers + + # As we are using HTTPHeaderDict, we should be able to extract the + # individual header values too. + assert resp.headers.getlist('fruit') in fruit_multiheaders + class TestCaseInsensitiveDict: diff --git a/tests/test_structures.py b/tests/test_structures.py index 7d92516b..5803079f 100644 --- a/tests/test_structures.py +++ b/tests/test_structures.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- import pytest -from requests.structures import CaseInsensitiveDict, LookupDict +from requests.structures import CaseInsensitiveDict, LookupDict, HTTPHeaderDict +from urllib3._collections import HTTPHeaderDict as U3HeaderDict class TestCaseInsensitiveDict: @@ -51,6 +52,208 @@ class TestCaseInsensitiveDict: assert (self.case_insensitive_dict == other) is result +class TestHTTPHeaderDictCompatibility(TestCaseInsensitiveDict): + + """HTTPHeaderDict should be completely compatible with CaseInsensitiveDict + when used for headers, so ensure that all the tests for the base class + also pass for this one.""" + + @pytest.fixture(autouse=True) + def setup(self): + self.case_insensitive_dict = HTTPHeaderDict() + self.case_insensitive_dict['Accept'] = 'application/json' + + +class TestHTTPHeaderDict: + + @pytest.fixture(autouse=True) + def setup(self): + self.kvs = [ + ('animal', 'chicken'), + ('AnimaL', 'Cow'), + ('CAKE', 'Cheese!'), + ('Sauce', 'Bread'), + ('Sauce', 'Cherry, or Plum Tomato'), + ] + + # HTTPHeaderDict from urllib3. + self.u3dict = ud = U3HeaderDict() + [ud.add(*tpl) for tpl in self.kvs] + + # Regular dictionary. + self.ddict = dict(self.kvs) + self.ddict['Sauce'] = ['Bread!', 'Cherry, or Plum Tomato'] + + # Used by test_extend. All of these "extra" values are mostly + # equivalent to each other. + self.extra_hd = hd2 = HTTPHeaderDict(ANIMAL=['Dog', 'elephant']) + hd2['cake'] = 'Babka' + hd2.setlist('sound', ['quiet', 'LOUD']) + hd2['CUTLERY'] = 'fork' + + self.extra_tuple_pairs = tuple_pairs = [ + ('ANIMAL', 'Dog'), + ('Animal', 'elephant'), + ('cake', ['Babka']), + ('sound', 'quiet'), + ('sound', 'LOUD'), + ('CUTLERY', 'fork'), + ] + + self.extra_simple_dict = dict(tuple_pairs) + self.extra_simple_dict['sound'] = ('quiet', 'LOUD') + + self.extra_u3 = U3HeaderDict() + for k, v in tuple_pairs: + if isinstance(v, (tuple, list)): + for vi in v: + self.extra_u3.add(k, vi) + else: + self.extra_u3.add(k, v) + + def test_item_access(self): + hd = HTTPHeaderDict(self.kvs) + + # Test that values are combined. + assert hd['Sauce'] == 'Bread, Cherry, or Plum Tomato' + assert hd['ANIMAL'] == 'chicken, Cow' + + # Test we can overwrite values. + hd['animal'] = 'Goat!' + assert hd['anIMal'] == 'Goat!' + + # Test deletion works. + del hd['sauce'] + pytest.raises(KeyError, hd.__getitem__, 'sauce') + + # Only string types allowed. + pytest.raises(ValueError, hd.__setitem__, 'cake', ['Cheese', 'sponge']) + + def test_equality(self): + hd = HTTPHeaderDict(self.u3dict) + assert hd == self.u3dict + assert hd == HTTPHeaderDict(hd) + + # Test that we still work even if we are comparing to a + # CaseInsensitiveDict instance. + cid = CaseInsensitiveDict(hd) + assert hd == cid + assert cid == hd + + def test_lower_items(self): + hd = HTTPHeaderDict(self.kvs, cutlery='fork') + assert list(hd.lower_items()) == [ + ('animal', 'chicken, Cow'), + ('cake', 'Cheese!'), + ('sauce', 'Bread, Cherry, or Plum Tomato'), + ('cutlery', 'fork'), + ] + + def test_copy(self): + hd = HTTPHeaderDict(self.u3dict) + hd2 = hd.copy() + assert hd is not hd2 + assert hd == hd2 + + def test_get_and_set_list(self): + hd = HTTPHeaderDict(self.kvs) + assert hd.getlist('SAUCE') == ['Bread', 'Cherry, or Plum Tomato'] + assert hd.getlist('CAKE') == ['Cheese!'] + assert hd.getlist('DRINK') == [] + + # Needs to be a regular sequence type containing just strings. + pytest.raises(ValueError, hd.setlist, 'Drink', 'Water') + pytest.raises(ValueError, hd.setlist, 'Drink', ['H', 2, 'O']) + + # Test multi-setting. + hd.setlist('Drink', ['Water', 'Juice']) + assert hd.getlist('DRINK') == ['Water', 'Juice'] + + # Setting to an empty sequence should remove the entry. + hd.setlist('DRInk', []) + pytest.raises(KeyError, hd.__getitem__, 'DrinK') + assert hd.getlist('DRiNK') == [] + + def test_add(self): + hd = HTTPHeaderDict() + hd.add('sound', 'quiet') + hd.add('SOUND', 'LOUD') + assert hd.getlist('Sound') == ['quiet', 'LOUD'] + + # Enforce type-checking in the add method. + pytest.raises(ValueError, hd.add, 'Sound', 5) + + @pytest.mark.parametrize('attr,as_arg,animal_arg_is_ordered', [ + # These types will have the "animal" arguments in our preferred order. + ('extra_hd', True, True), + ('extra_tuple_pairs', True, True), + + # And these types will lose the ordering, so we can't make assertions + # about the final order of those values. + ('extra_simple_dict', True, False), + ('extra_u3', True, False), + ('extra_simple_dict', False, False), + ]) + def test_extend(self, attr, as_arg, animal_arg_is_ordered): + item = getattr(self, attr) + + # Call extend with the associated values - we should see all of the + # merged data in the HTTPHeaderDict instance. + extras = {'cutlery': 'knife'} + hd = HTTPHeaderDict(self.kvs) + + if as_arg: + hd.extend(item, **extras) + else: + hd.extend(extras, **item) + + # Test all the stored values are what we expect. + mget = hd.getlist + + # Depending on the item we merged in, we might be able to make + # assumptions what the overall order of the structure is. + animal_seq = mget('animal') + if animal_arg_is_ordered: + assert animal_seq == ['chicken', 'Cow', 'Dog', 'elephant'] + else: + # The existing order in HTTPHeadersDict of the first two values + # should be preserved - no guarantees in which order the other + # two values are added. + assert animal_seq in [ + ['chicken', 'Cow', 'Dog', 'elephant'], + ['chicken', 'Cow', 'elephant', 'Dog'] + ] + + assert mget('cake') == ['Cheese!', 'Babka'] + assert mget('sound') == ['quiet', 'LOUD'] + + # We don't mandate the order in which these dictionaries are + # processed, so it's fine whichever order it is. + assert mget('cutlery') in [ + ['fork', 'knife'], ['knife', 'fork'] + ] + + def test_extend_type_checking(self): + hd = HTTPHeaderDict() + pytest.raises(ValueError, hd.extend, dict(type=['xml', None, 'html'])) + + def test_repr(self): + hd = HTTPHeaderDict() + assert repr(hd) == '{}' + hd.add('type', 'xml') + assert repr(hd) == "{'type': 'xml'}" + hd.add('type', 'html') + assert repr(hd) == "{'type': ('xml', 'html')}" + + # We can't guarantee order once we have more than one key. + hd.add('Accept', 'text/html') + assert repr(hd) in [ + "{'type': ('xml', 'html'), 'Accept': 'text/html'}", + "{'Accept': 'text/html', 'type': ('xml', 'html')}", + ] + assert str(hd) == repr(hd) + + class TestLookupDict: @pytest.fixture(autouse=True)