From c121b98c4eb1e4ee8927995d9b9f1d6bdccd9349 Mon Sep 17 00:00:00 2001 From: David Fontenot Date: Tue, 21 Feb 2017 17:57:21 -0800 Subject: [PATCH] wrapped proxy_bypass() with cache lookup Used to alleviate long gethostbyaddr calls Made new TimedCache and decorator to wrap a function with a cache * Entries looked up older than a minute (default amount) are evicted. * When full, evicts the oldest entry --- AUTHORS.rst | 1 + requests/structures.py | 89 ++++++++++++++++++++++++++++++++++++++++ requests/utils.py | 14 ++++++- tests/test_structures.py | 74 ++++++++++++++++++++++++++++++++- 4 files changed, 175 insertions(+), 3 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 48cd155b..e4a325bf 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -178,4 +178,5 @@ Patches and Suggestions - Moinuddin Quadri (`@moin18 `_) - Matt Kohl (`@mattkohl `_) - Jonathan Vanasco (`@jvanasco `_) +- David Fontenot (`@davidfontenot `_) diff --git a/requests/structures.py b/requests/structures.py index 05d2b3f5..beb268e3 100644 --- a/requests/structures.py +++ b/requests/structures.py @@ -8,9 +8,12 @@ Data structures that power Requests. """ import collections +import time from .compat import OrderedDict +current_time = getattr(time, 'monotonic', time.time) + class CaseInsensitiveDict(collections.MutableMapping): """A case-insensitive ``dict``-like object. @@ -103,3 +106,89 @@ class LookupDict(dict): def get(self, key, default=None): return self.__dict__.get(key, default) + + +class TimedCacheManaged(object): + """ + Wrap a function call in a timed cache + """ + def __init__(self, fnc): + self.fnc = fnc + self.cache = TimedCache() + + def __call__(self, *args, **kwargs): + key = args[0] + found = None + try: + found = self.cache[key] + except KeyError: + found = self.fnc(key, **kwargs) + self.cache[key] = found + + return found + + +class TimedCache(collections.MutableMapping): + """ + Evicts entries after expiration_secs. If none are expired and maxlen is hit, + will evict the oldest cached entry + """ + def __init__(self, maxlen=32, expiration_secs=60): + """ + :param maxlen: most number of entries to hold on to + :param expiration_secs: the number of seconds to hold on + to entries + """ + self.maxlen = maxlen + self.expiration_secs = expiration_secs + self._dict = OrderedDict() + + def __repr__(self): + return '' % \ + (self.maxlen, len(self._dict), self.expiration_secs) + + def __iter__(self): + return map(lambda kv: (kv[0], kv[1][1]), self._dict.items()).__iter__() + + def __delitem__(self, item): + return self._dict.__delitem__(item) + + def __getitem__(self, key): + """ + Look up an item in the cache. If the item + has already expired, it will be invalidated and not returned + + :param key: which entry to look up + :return: the value in the cache, or None + """ + occurred, value = self._dict[key] + now = int(current_time()) + + if now - occurred > self.expiration_secs: + del self._dict[key] + raise KeyError + else: + return value + + def __setitem__(self, key, value): + """ + Locates the value at lookup key, if cache is full, will evict the + oldest entry + + :param key: the key to search the cache for + :param value: the value to be added to the cache + """ + now = int(current_time()) + + while len(self._dict) >= self.maxlen: + self._dict.popitem(last=False) + + return self._dict.__setitem__(key, (now, value)) + + def __len__(self): + """:return: the length of the cache""" + return len(self._dict) + + def clear(self): + """Clears the cache""" + return self._dict.clear() diff --git a/requests/utils.py b/requests/utils.py index 6365034c..b6fcc5f5 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -28,7 +28,7 @@ from .compat import ( quote, urlparse, bytes, str, OrderedDict, unquote, getproxies, proxy_bypass, urlunparse, basestring, integer_types) from .cookies import RequestsCookieJar, cookiejar_from_dict -from .structures import CaseInsensitiveDict +from .structures import CaseInsensitiveDict, TimedCache, TimedCacheManaged from .exceptions import ( InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError) @@ -579,6 +579,16 @@ def set_environ(env_name, value): os.environ[env_name] = old_value +@TimedCacheManaged +def _proxy_bypass_cached(netloc): + """ + Looks for netloc in the cache, if not found, will call proxy_bypass + for the netloc and store its result in the cache + + :rtype: bool + """ + return proxy_bypass(netloc) + def should_bypass_proxies(url, no_proxy): """ Returns whether we should bypass proxies or not. @@ -626,7 +636,7 @@ def should_bypass_proxies(url, no_proxy): # legitimate problems. with set_environ('no_proxy', no_proxy_arg): try: - bypass = proxy_bypass(netloc) + bypass = _proxy_bypass_cached(netloc) except (TypeError, socket.gaierror): bypass = False diff --git a/tests/test_structures.py b/tests/test_structures.py index e4d2459f..a28e041e 100644 --- a/tests/test_structures.py +++ b/tests/test_structures.py @@ -2,7 +2,7 @@ import pytest -from requests.structures import CaseInsensitiveDict, LookupDict +from requests.structures import CaseInsensitiveDict, LookupDict, TimedCache, TimedCacheManaged class TestCaseInsensitiveDict: @@ -74,3 +74,75 @@ class TestLookupDict: @get_item_parameters def test_get(self, key, value): assert self.lookup_dict.get(key) == value + + +class TestTimedCache(object): + @pytest.fixture(autouse=True) + def setup(self): + self.any_value = 'some value' + self.expiration_secs = 60 + self.cache = TimedCache(expiration_secs=self.expiration_secs) + yield + self.cache.clear() + + def test_get(self): + self.cache['a'] = self.any_value + assert self.cache['a'] is self.any_value + + def test_repr(self): + repr = str(self.cache) + assert repr == '' + + def test_get_expired_item(self, mocker): + self.cache = TimedCache(maxlen=1, expiration_secs=self.expiration_secs) + + mocker.patch('requests.structures.current_time', lambda: 0) + self.cache['a'] = self.any_value + mocker.patch('requests.structures.current_time', lambda: self.expiration_secs + 1) + assert self.cache.get('a') is None + + def test_evict_first_entry_when_full(self, mocker): + self.cache = TimedCache(maxlen=2, expiration_secs=2) + mocker.patch('requests.structures.current_time', lambda: 0) + self.cache['a'] = self.any_value + mocker.patch('requests.structures.current_time', lambda: 1) + self.cache['b'] = self.any_value + mocker.patch('requests.structures.current_time', lambda: 3) + self.cache['c'] = self.any_value + assert len(self.cache) is 2 + with pytest.raises(KeyError, message='Expected key not found'): + self.cache['a'] + assert self.cache['b'] is self.any_value + assert self.cache['c'] is self.any_value + + def test_delete_item_removes_item(self): + self.cache['a'] = self.any_value + del self.cache['a'] + with pytest.raises(KeyError, message='Expected key not found'): + self.cache['a'] + + def test_iterating_hides_timestamps(self): + self.cache['a'] = 1 + self.cache['b'] = 2 + expected = [('a', 1), ('b', 2)] + actual = [(key, val) for key, val in self.cache] + assert expected == actual + + +class TestTimedCacheManagedDecorator(object): + def test_caches_repeated_calls(self, mocker): + mocker.patch('requests.structures.current_time', lambda: 0) + + nonlocals = {'value': 0} + + @TimedCacheManaged + def some_method(x): + nonlocals['value'] = nonlocals['value'] + x + return nonlocals['value'] + + first_result = some_method(1) + assert first_result is 1 + second_result = some_method(1) + assert second_result is 1 + third_result = some_method(2) + assert third_result is 3