diff options
author | Paul Ganssle <pganssle@users.noreply.github.com> | 2018-10-17 05:02:28 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-17 05:02:28 -0400 |
commit | 83df62a15a8cfe3a35f59482fce70b98bc94b262 (patch) | |
tree | 6d8868d937e0775b738d92ac3068793c8da7ab2d | |
parent | 770b8f05e0f4de22a2cae4c6ac81c8df725369e7 (diff) | |
parent | 02ace35402e1bc352ce08a6eee9bffae7b1593f3 (diff) | |
download | dateutil-83df62a15a8cfe3a35f59482fce70b98bc94b262.tar.gz |
Merge pull request #761 from gokcennurlu/lru_cache_tz
Add LRU caching to tzoffset, tzstr and gettz
-rw-r--r-- | AUTHORS.md | 1 | ||||
-rw-r--r-- | changelog.d/761.feature.rst | 1 | ||||
-rw-r--r-- | dateutil/test/test_tz.py | 50 | ||||
-rw-r--r-- | dateutil/tz/_factories.py | 24 | ||||
-rw-r--r-- | dateutil/tz/tz.py | 18 |
5 files changed, 91 insertions, 3 deletions
@@ -44,6 +44,7 @@ switch, and thus all their contributions are dual-licensed. - Florian Rathgeber (gh: @kynan) **D** - Gabriel Bianconi <gabriel@MASKED> (gh: @GabrielBianconi) **D** - Gabriel Poesia <gabriel.poesia@MASKED> +- Gökçen Nurlu <gnurlu1@bloomberg.net> (gh: @gokcennurlu) **D** - Gustavo Niemeyer <gustavo@niemeyer.net> (gh: @niemeyer) - Holger Joukl <holger.joukl@MASKED> (gh: @hjoukl) - Igor <mrigor83@MASKED> diff --git a/changelog.d/761.feature.rst b/changelog.d/761.feature.rst new file mode 100644 index 0000000..37c9b4d --- /dev/null +++ b/changelog.d/761.feature.rst @@ -0,0 +1 @@ +Added a small "strong value" cache into ``tz.gettz``, ``tz.tzoffset`` and ``tz.tzstr`` to improve performance in the situation where transient references are repeatedly created to the same time zones, but no strong reference is continuously held. Patch by Gökçen Nurlu (gh issue #691, gh pr #761) diff --git a/dateutil/test/test_tz.py b/dateutil/test/test_tz.py index 16d2bda..97a0c68 100644 --- a/dateutil/test/test_tz.py +++ b/dateutil/test/test_tz.py @@ -746,7 +746,15 @@ def test_tzoffset_weakref(): del UTC1 gc.collect() - assert UTC_ref() is None + assert UTC_ref() is not None # Should be in the strong cache + assert UTC_ref() is tz.tzoffset('UTC', 0) + + # Fill the strong cache with other items + for offset in range(5,15): + tz.tzoffset('RandomZone', offset) + + gc.collect() + assert UTC_ref() is None assert UTC_ref() is not tz.tzoffset('UTC', 0) @@ -1106,12 +1114,33 @@ def test_gettz_cache_clear(): assert NYC1 is not NYC2 +@pytest.mark.gettz +@pytest.mark.xfail(IS_WIN, reason='zoneinfo separately cached') +def test_gettz_set_cache_size(): + tz.gettz.cache_clear() + tz.gettz.set_cache_size(3) + + MONACO_ref = weakref.ref(tz.gettz('Europe/Monaco')) + EASTER_ref = weakref.ref(tz.gettz('Pacific/Easter')) + CURRIE_ref = weakref.ref(tz.gettz('Australia/Currie')) + + gc.collect() + + assert MONACO_ref() is not None + assert EASTER_ref() is not None + assert CURRIE_ref() is not None + + tz.gettz.set_cache_size(2) + gc.collect() + + assert MONACO_ref() is None @pytest.mark.xfail(IS_WIN, reason="Windows does not use system zoneinfo") @pytest.mark.smoke @pytest.mark.gettz def test_gettz_weakref(): tz.gettz.cache_clear() + tz.gettz.set_cache_size(2) NYC1 = tz.gettz('America/New_York') NYC_ref = weakref.ref(tz.gettz('America/New_York')) @@ -1120,9 +1149,17 @@ def test_gettz_weakref(): del NYC1 gc.collect() - assert NYC_ref() is None - assert tz.gettz('America/New_York') is not NYC_ref() + assert NYC_ref() is not None # Should still be in the strong cache + assert tz.gettz('America/New_York') is NYC_ref() + + # Populate strong cache with other timezones + tz.gettz('Europe/Monaco') + tz.gettz('Pacific/Easter') + tz.gettz('Australia/Currie') + gc.collect() + assert NYC_ref() is None # Should have been pushed out + assert tz.gettz('America/New_York') is not NYC_ref() class ZoneInfoGettzTest(GettzTest, WarningTestMixin): def gettz(self, name): @@ -1462,6 +1499,13 @@ def test_tzstr_weakref(): del tz_t1 gc.collect() + assert tz_t2_ref() is not None + assert tz.tzstr('EST5EDT') is tz_t2_ref() + + for offset in range(5,15): + tz.tzstr('GMT+{}'.format(offset)) + gc.collect() + assert tz_t2_ref() is None assert tz.tzstr('EST5EDT') is not tz_t2_ref() diff --git a/dateutil/tz/_factories.py b/dateutil/tz/_factories.py index 152aa85..d2560eb 100644 --- a/dateutil/tz/_factories.py +++ b/dateutil/tz/_factories.py @@ -1,5 +1,7 @@ from datetime import timedelta import weakref +from collections import OrderedDict + class _TzSingleton(type): def __init__(cls, *args, **kwargs): @@ -11,6 +13,7 @@ class _TzSingleton(type): cls.__instance = super(_TzSingleton, cls).__call__() return cls.__instance + class _TzFactory(type): def instance(cls, *args, **kwargs): """Alternate constructor that returns a fresh instance""" @@ -20,6 +23,8 @@ class _TzFactory(type): class _TzOffsetFactory(_TzFactory): def __init__(cls, *args, **kwargs): cls.__instances = weakref.WeakValueDictionary() + cls.__strong_cache = OrderedDict() + cls.__strong_cache_size = 8 def __call__(cls, name, offset): if isinstance(offset, timedelta): @@ -31,12 +36,22 @@ class _TzOffsetFactory(_TzFactory): if instance is None: instance = cls.__instances.setdefault(key, cls.instance(name, offset)) + + cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance) + + # Remove an item if the strong cache is overpopulated + # TODO: Maybe this should be under a lock? + if len(cls.__strong_cache) > cls.__strong_cache_size: + cls.__strong_cache.popitem(last=False) + return instance class _TzStrFactory(_TzFactory): def __init__(cls, *args, **kwargs): cls.__instances = weakref.WeakValueDictionary() + cls.__strong_cache = OrderedDict() + cls.__strong_cache_size = 8 def __call__(cls, s, posix_offset=False): key = (s, posix_offset) @@ -45,5 +60,14 @@ class _TzStrFactory(_TzFactory): if instance is None: instance = cls.__instances.setdefault(key, cls.instance(s, posix_offset)) + + cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance) + + + # Remove an item if the strong cache is overpopulated + # TODO: Maybe this should be under a lock? + if len(cls.__strong_cache) > cls.__strong_cache_size: + cls.__strong_cache.popitem(last=False) + return instance diff --git a/dateutil/tz/tz.py b/dateutil/tz/tz.py index 101f446..161f4d8 100644 --- a/dateutil/tz/tz.py +++ b/dateutil/tz/tz.py @@ -14,6 +14,7 @@ import sys import os import bisect import weakref +from collections import OrderedDict import six from six import string_types @@ -1538,6 +1539,8 @@ def __get_gettz(): def __init__(self): self.__instances = weakref.WeakValueDictionary() + self.__strong_cache_size = 8 + self.__strong_cache = OrderedDict() self._cache_lock = _thread.allocate_lock() def __call__(self, name=None): @@ -1556,12 +1559,27 @@ def __get_gettz(): # We also cannot store weak references to None, so we # will also not store that. self.__instances[name] = rv + else: + # No need for strong caching, return immediately + return rv + + self.__strong_cache[name] = self.__strong_cache.pop(name, rv) + + if len(self.__strong_cache) > self.__strong_cache_size: + self.__strong_cache.popitem(last=False) return rv + def set_cache_size(self, size): + with self._cache_lock: + self.__strong_cache_size = size + while len(self.__strong_cache) > size: + self.__strong_cache.popitem(last=False) + def cache_clear(self): with self._cache_lock: self.__instances = weakref.WeakValueDictionary() + self.__strong_cache.clear() @staticmethod def nocache(name=None): |