aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Ganssle <pganssle@users.noreply.github.com>2018-10-17 05:02:28 -0400
committerGitHub <noreply@github.com>2018-10-17 05:02:28 -0400
commit83df62a15a8cfe3a35f59482fce70b98bc94b262 (patch)
tree6d8868d937e0775b738d92ac3068793c8da7ab2d
parent770b8f05e0f4de22a2cae4c6ac81c8df725369e7 (diff)
parent02ace35402e1bc352ce08a6eee9bffae7b1593f3 (diff)
downloaddateutil-83df62a15a8cfe3a35f59482fce70b98bc94b262.tar.gz
Merge pull request #761 from gokcennurlu/lru_cache_tz
Add LRU caching to tzoffset, tzstr and gettz
-rw-r--r--AUTHORS.md1
-rw-r--r--changelog.d/761.feature.rst1
-rw-r--r--dateutil/test/test_tz.py50
-rw-r--r--dateutil/tz/_factories.py24
-rw-r--r--dateutil/tz/tz.py18
5 files changed, 91 insertions, 3 deletions
diff --git a/AUTHORS.md b/AUTHORS.md
index 7142548..fcbb7ce 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -44,6 +44,7 @@ switch, and thus all their contributions are dual-licensed.
- Florian Rathgeber (gh: @kynan) **D**
- Gabriel Bianconi <gabriel@MASKED> (gh: @GabrielBianconi) **D**
- Gabriel Poesia <gabriel.poesia@MASKED>
+- Gökçen Nurlu <gnurlu1@bloomberg.net> (gh: @gokcennurlu) **D**
- Gustavo Niemeyer <gustavo@niemeyer.net> (gh: @niemeyer)
- Holger Joukl <holger.joukl@MASKED> (gh: @hjoukl)
- Igor <mrigor83@MASKED>
diff --git a/changelog.d/761.feature.rst b/changelog.d/761.feature.rst
new file mode 100644
index 0000000..37c9b4d
--- /dev/null
+++ b/changelog.d/761.feature.rst
@@ -0,0 +1 @@
+Added a small "strong value" cache into ``tz.gettz``, ``tz.tzoffset`` and ``tz.tzstr`` to improve performance in the situation where transient references are repeatedly created to the same time zones, but no strong reference is continuously held. Patch by Gökçen Nurlu (gh issue #691, gh pr #761)
diff --git a/dateutil/test/test_tz.py b/dateutil/test/test_tz.py
index 16d2bda..97a0c68 100644
--- a/dateutil/test/test_tz.py
+++ b/dateutil/test/test_tz.py
@@ -746,7 +746,15 @@ def test_tzoffset_weakref():
del UTC1
gc.collect()
- assert UTC_ref() is None
+ assert UTC_ref() is not None # Should be in the strong cache
+ assert UTC_ref() is tz.tzoffset('UTC', 0)
+
+ # Fill the strong cache with other items
+ for offset in range(5,15):
+ tz.tzoffset('RandomZone', offset)
+
+ gc.collect()
+ assert UTC_ref() is None
assert UTC_ref() is not tz.tzoffset('UTC', 0)
@@ -1106,12 +1114,33 @@ def test_gettz_cache_clear():
assert NYC1 is not NYC2
+@pytest.mark.gettz
+@pytest.mark.xfail(IS_WIN, reason='zoneinfo separately cached')
+def test_gettz_set_cache_size():
+ tz.gettz.cache_clear()
+ tz.gettz.set_cache_size(3)
+
+ MONACO_ref = weakref.ref(tz.gettz('Europe/Monaco'))
+ EASTER_ref = weakref.ref(tz.gettz('Pacific/Easter'))
+ CURRIE_ref = weakref.ref(tz.gettz('Australia/Currie'))
+
+ gc.collect()
+
+ assert MONACO_ref() is not None
+ assert EASTER_ref() is not None
+ assert CURRIE_ref() is not None
+
+ tz.gettz.set_cache_size(2)
+ gc.collect()
+
+ assert MONACO_ref() is None
@pytest.mark.xfail(IS_WIN, reason="Windows does not use system zoneinfo")
@pytest.mark.smoke
@pytest.mark.gettz
def test_gettz_weakref():
tz.gettz.cache_clear()
+ tz.gettz.set_cache_size(2)
NYC1 = tz.gettz('America/New_York')
NYC_ref = weakref.ref(tz.gettz('America/New_York'))
@@ -1120,9 +1149,17 @@ def test_gettz_weakref():
del NYC1
gc.collect()
- assert NYC_ref() is None
- assert tz.gettz('America/New_York') is not NYC_ref()
+ assert NYC_ref() is not None # Should still be in the strong cache
+ assert tz.gettz('America/New_York') is NYC_ref()
+
+ # Populate strong cache with other timezones
+ tz.gettz('Europe/Monaco')
+ tz.gettz('Pacific/Easter')
+ tz.gettz('Australia/Currie')
+ gc.collect()
+ assert NYC_ref() is None # Should have been pushed out
+ assert tz.gettz('America/New_York') is not NYC_ref()
class ZoneInfoGettzTest(GettzTest, WarningTestMixin):
def gettz(self, name):
@@ -1462,6 +1499,13 @@ def test_tzstr_weakref():
del tz_t1
gc.collect()
+ assert tz_t2_ref() is not None
+ assert tz.tzstr('EST5EDT') is tz_t2_ref()
+
+ for offset in range(5,15):
+ tz.tzstr('GMT+{}'.format(offset))
+ gc.collect()
+
assert tz_t2_ref() is None
assert tz.tzstr('EST5EDT') is not tz_t2_ref()
diff --git a/dateutil/tz/_factories.py b/dateutil/tz/_factories.py
index 152aa85..d2560eb 100644
--- a/dateutil/tz/_factories.py
+++ b/dateutil/tz/_factories.py
@@ -1,5 +1,7 @@
from datetime import timedelta
import weakref
+from collections import OrderedDict
+
class _TzSingleton(type):
def __init__(cls, *args, **kwargs):
@@ -11,6 +13,7 @@ class _TzSingleton(type):
cls.__instance = super(_TzSingleton, cls).__call__()
return cls.__instance
+
class _TzFactory(type):
def instance(cls, *args, **kwargs):
"""Alternate constructor that returns a fresh instance"""
@@ -20,6 +23,8 @@ class _TzFactory(type):
class _TzOffsetFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = weakref.WeakValueDictionary()
+ cls.__strong_cache = OrderedDict()
+ cls.__strong_cache_size = 8
def __call__(cls, name, offset):
if isinstance(offset, timedelta):
@@ -31,12 +36,22 @@ class _TzOffsetFactory(_TzFactory):
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(name, offset))
+
+ cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
+
+ # Remove an item if the strong cache is overpopulated
+ # TODO: Maybe this should be under a lock?
+ if len(cls.__strong_cache) > cls.__strong_cache_size:
+ cls.__strong_cache.popitem(last=False)
+
return instance
class _TzStrFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = weakref.WeakValueDictionary()
+ cls.__strong_cache = OrderedDict()
+ cls.__strong_cache_size = 8
def __call__(cls, s, posix_offset=False):
key = (s, posix_offset)
@@ -45,5 +60,14 @@ class _TzStrFactory(_TzFactory):
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(s, posix_offset))
+
+ cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
+
+
+ # Remove an item if the strong cache is overpopulated
+ # TODO: Maybe this should be under a lock?
+ if len(cls.__strong_cache) > cls.__strong_cache_size:
+ cls.__strong_cache.popitem(last=False)
+
return instance
diff --git a/dateutil/tz/tz.py b/dateutil/tz/tz.py
index 101f446..161f4d8 100644
--- a/dateutil/tz/tz.py
+++ b/dateutil/tz/tz.py
@@ -14,6 +14,7 @@ import sys
import os
import bisect
import weakref
+from collections import OrderedDict
import six
from six import string_types
@@ -1538,6 +1539,8 @@ def __get_gettz():
def __init__(self):
self.__instances = weakref.WeakValueDictionary()
+ self.__strong_cache_size = 8
+ self.__strong_cache = OrderedDict()
self._cache_lock = _thread.allocate_lock()
def __call__(self, name=None):
@@ -1556,12 +1559,27 @@ def __get_gettz():
# We also cannot store weak references to None, so we
# will also not store that.
self.__instances[name] = rv
+ else:
+ # No need for strong caching, return immediately
+ return rv
+
+ self.__strong_cache[name] = self.__strong_cache.pop(name, rv)
+
+ if len(self.__strong_cache) > self.__strong_cache_size:
+ self.__strong_cache.popitem(last=False)
return rv
+ def set_cache_size(self, size):
+ with self._cache_lock:
+ self.__strong_cache_size = size
+ while len(self.__strong_cache) > size:
+ self.__strong_cache.popitem(last=False)
+
def cache_clear(self):
with self._cache_lock:
self.__instances = weakref.WeakValueDictionary()
+ self.__strong_cache.clear()
@staticmethod
def nocache(name=None):