wrapped proxy_bypass() with cache lookup

Used to alleviate long gethostbyaddr calls
Made new TimedCache and decorator to wrap
a function with a cache
* Entries looked up older than a minute (default amount)
are evicted.
* When full, evicts the oldest entry
This commit is contained in:
David Fontenot
2017-02-21 17:57:21 -08:00
parent d6f4818c0b
commit c121b98c4e
4 changed files with 175 additions and 3 deletions
+1
View File
@@ -178,4 +178,5 @@ Patches and Suggestions
- Moinuddin Quadri <moin18@gmail.com> (`@moin18 <https://github.com/moin18>`_)
- Matt Kohl (`@mattkohl <https://github.com/mattkohl>`_)
- Jonathan Vanasco (`@jvanasco <https://github.com/jvanasco>`_)
- David Fontenot (`@davidfontenot <https://github.com/davidfontenot>`_)
+89
View File
@@ -8,9 +8,12 @@ Data structures that power Requests.
"""
import collections
import time
from .compat import OrderedDict
current_time = getattr(time, 'monotonic', time.time)
class CaseInsensitiveDict(collections.MutableMapping):
"""A case-insensitive ``dict``-like object.
@@ -103,3 +106,89 @@ class LookupDict(dict):
def get(self, key, default=None):
return self.__dict__.get(key, default)
class TimedCacheManaged(object):
"""
Wrap a function call in a timed cache
"""
def __init__(self, fnc):
self.fnc = fnc
self.cache = TimedCache()
def __call__(self, *args, **kwargs):
key = args[0]
found = None
try:
found = self.cache[key]
except KeyError:
found = self.fnc(key, **kwargs)
self.cache[key] = found
return found
class TimedCache(collections.MutableMapping):
"""
Evicts entries after expiration_secs. If none are expired and maxlen is hit,
will evict the oldest cached entry
"""
def __init__(self, maxlen=32, expiration_secs=60):
"""
:param maxlen: most number of entries to hold on to
:param expiration_secs: the number of seconds to hold on
to entries
"""
self.maxlen = maxlen
self.expiration_secs = expiration_secs
self._dict = OrderedDict()
def __repr__(self):
return '<TimedCache maxlen:%d len:%d expiration_secs:%d>' % \
(self.maxlen, len(self._dict), self.expiration_secs)
def __iter__(self):
return map(lambda kv: (kv[0], kv[1][1]), self._dict.items()).__iter__()
def __delitem__(self, item):
return self._dict.__delitem__(item)
def __getitem__(self, key):
"""
Look up an item in the cache. If the item
has already expired, it will be invalidated and not returned
:param key: which entry to look up
:return: the value in the cache, or None
"""
occurred, value = self._dict[key]
now = int(current_time())
if now - occurred > self.expiration_secs:
del self._dict[key]
raise KeyError
else:
return value
def __setitem__(self, key, value):
"""
Locates the value at lookup key, if cache is full, will evict the
oldest entry
:param key: the key to search the cache for
:param value: the value to be added to the cache
"""
now = int(current_time())
while len(self._dict) >= self.maxlen:
self._dict.popitem(last=False)
return self._dict.__setitem__(key, (now, value))
def __len__(self):
""":return: the length of the cache"""
return len(self._dict)
def clear(self):
"""Clears the cache"""
return self._dict.clear()
+12 -2
View File
@@ -28,7 +28,7 @@ from .compat import (
quote, urlparse, bytes, str, OrderedDict, unquote, getproxies,
proxy_bypass, urlunparse, basestring, integer_types)
from .cookies import RequestsCookieJar, cookiejar_from_dict
from .structures import CaseInsensitiveDict
from .structures import CaseInsensitiveDict, TimedCache, TimedCacheManaged
from .exceptions import (
InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
@@ -579,6 +579,16 @@ def set_environ(env_name, value):
os.environ[env_name] = old_value
@TimedCacheManaged
def _proxy_bypass_cached(netloc):
"""
Looks for netloc in the cache, if not found, will call proxy_bypass
for the netloc and store its result in the cache
:rtype: bool
"""
return proxy_bypass(netloc)
def should_bypass_proxies(url, no_proxy):
"""
Returns whether we should bypass proxies or not.
@@ -626,7 +636,7 @@ def should_bypass_proxies(url, no_proxy):
# legitimate problems.
with set_environ('no_proxy', no_proxy_arg):
try:
bypass = proxy_bypass(netloc)
bypass = _proxy_bypass_cached(netloc)
except (TypeError, socket.gaierror):
bypass = False
+73 -1
View File
@@ -2,7 +2,7 @@
import pytest
from requests.structures import CaseInsensitiveDict, LookupDict
from requests.structures import CaseInsensitiveDict, LookupDict, TimedCache, TimedCacheManaged
class TestCaseInsensitiveDict:
@@ -74,3 +74,75 @@ class TestLookupDict:
@get_item_parameters
def test_get(self, key, value):
assert self.lookup_dict.get(key) == value
class TestTimedCache(object):
@pytest.fixture(autouse=True)
def setup(self):
self.any_value = 'some value'
self.expiration_secs = 60
self.cache = TimedCache(expiration_secs=self.expiration_secs)
yield
self.cache.clear()
def test_get(self):
self.cache['a'] = self.any_value
assert self.cache['a'] is self.any_value
def test_repr(self):
repr = str(self.cache)
assert repr == '<TimedCache maxlen:32 len:0 expiration_secs:60>'
def test_get_expired_item(self, mocker):
self.cache = TimedCache(maxlen=1, expiration_secs=self.expiration_secs)
mocker.patch('requests.structures.current_time', lambda: 0)
self.cache['a'] = self.any_value
mocker.patch('requests.structures.current_time', lambda: self.expiration_secs + 1)
assert self.cache.get('a') is None
def test_evict_first_entry_when_full(self, mocker):
self.cache = TimedCache(maxlen=2, expiration_secs=2)
mocker.patch('requests.structures.current_time', lambda: 0)
self.cache['a'] = self.any_value
mocker.patch('requests.structures.current_time', lambda: 1)
self.cache['b'] = self.any_value
mocker.patch('requests.structures.current_time', lambda: 3)
self.cache['c'] = self.any_value
assert len(self.cache) is 2
with pytest.raises(KeyError, message='Expected key not found'):
self.cache['a']
assert self.cache['b'] is self.any_value
assert self.cache['c'] is self.any_value
def test_delete_item_removes_item(self):
self.cache['a'] = self.any_value
del self.cache['a']
with pytest.raises(KeyError, message='Expected key not found'):
self.cache['a']
def test_iterating_hides_timestamps(self):
self.cache['a'] = 1
self.cache['b'] = 2
expected = [('a', 1), ('b', 2)]
actual = [(key, val) for key, val in self.cache]
assert expected == actual
class TestTimedCacheManagedDecorator(object):
def test_caches_repeated_calls(self, mocker):
mocker.patch('requests.structures.current_time', lambda: 0)
nonlocals = {'value': 0}
@TimedCacheManaged
def some_method(x):
nonlocals['value'] = nonlocals['value'] + x
return nonlocals['value']
first_result = some_method(1)
assert first_result is 1
second_result = some_method(1)
assert second_result is 1
third_result = some_method(2)
assert third_result is 3