diff options
author | Kelvin Zhang <zhangkelvin@google.com> | 2024-03-14 18:29:46 -0700 |
---|---|---|
committer | Kelvin Zhang <zhangkelvin@google.com> | 2024-03-19 12:22:24 -0700 |
commit | 8b116a3f5dea5f64f272806ad87826d100d50f94 (patch) | |
tree | 1868cecc6b9795ae356357b138eaef521934883a | |
parent | f8716627219e8a94347e6def321495541adcc69e (diff) | |
download | puffin-8b116a3f5dea5f64f272806ad87826d100d50f94.tar.gz |
Add O(1) LRU cache to puffin
puffin uses an O(n) LRU cache, with a small cache size it's fine. Since
we are planning on increasing the cache size for better performance,
improve LRU efficiency so that it's not a bottleneck.
When using a 50MB cache size, this reduces runtime on reference hardware
from 238s to 180s .
Test: Run puffin with --cache_size=50MB
Bug: 320443894
Change-Id: I3dde082702fcedb63cdd04b238c1382b75fb0109
-rw-r--r-- | Android.bp | 1 | ||||
-rw-r--r-- | src/include/puffin/common.h | 3 | ||||
-rw-r--r-- | src/puffer.cc | 3 | ||||
-rw-r--r-- | src/puffin_stream.cc | 99 | ||||
-rw-r--r-- | src/puffin_stream.h | 43 |
5 files changed, 91 insertions, 58 deletions
@@ -110,6 +110,7 @@ cc_binary { cc_test { name: "puffin_unittest", + host_supported: true, defaults: ["puffin_defaults"], test_suites: ["device-tests"], cflags: ["-Wno-sign-compare"], diff --git a/src/include/puffin/common.h b/src/include/puffin/common.h index 97e0c07..64eb6f2 100644 --- a/src/include/puffin/common.h +++ b/src/include/puffin/common.h @@ -5,8 +5,7 @@ #ifndef SRC_INCLUDE_PUFFIN_COMMON_H_ #define SRC_INCLUDE_PUFFIN_COMMON_H_ -#include <functional> -#include <memory> +#include <ostream> #include <vector> #ifndef DISALLOW_COPY_AND_ASSIGN diff --git a/src/puffer.cc b/src/puffer.cc index 6d7b287..32bf6d3 100644 --- a/src/puffer.cc +++ b/src/puffer.cc @@ -4,16 +4,13 @@ #include "puffin/src/include/puffin/puffer.h" -#include <algorithm> #include <memory> #include <string> -#include <utility> #include <vector> #include "puffin/src/bit_reader.h" #include "puffin/src/huffman_table.h" #include "puffin/src/include/puffin/common.h" -#include "puffin/src/include/puffin/stream.h" #include "puffin/src/logging.h" #include "puffin/src/puff_data.h" #include "puffin/src/puff_writer.h" diff --git a/src/puffin_stream.cc b/src/puffin_stream.cc index b48783e..c50ad93 100644 --- a/src/puffin_stream.cc +++ b/src/puffin_stream.cc @@ -6,7 +6,6 @@ #include <algorithm> #include <memory> -#include <string> #include <utility> #include <vector> @@ -106,8 +105,7 @@ PuffinStream::PuffinStream(UniqueStreamPtr stream, extra_byte_(0), is_for_puff_(puffer_ ? true : false), closed_(false), - max_cache_size_(max_cache_size), - cur_cache_size_(0) { + lru_cache_(max_cache_size) { // Building upper bounds for faster seek. upper_bounds_.reserve(puffs.size()); for (const auto& puff : puffs) { @@ -134,9 +132,6 @@ PuffinStream::PuffinStream(UniqueStreamPtr stream, max_puff_length = std::max(max_puff_length, puff.length); } puff_buffer_.reset(new Buffer(max_puff_length + 1)); - if (max_cache_size_ < max_puff_length) { - max_cache_size_ = 0; // It means we are not caching puffs. - } uint64_t max_deflate_length = 0; for (const auto& deflate : deflates) { @@ -262,12 +257,12 @@ bool PuffinStream::Read(void* buffer, size_t count) { auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8; auto bytes_to_read = end_byte - start_byte; // Puff directly to buffer if it has space. - bool puff_directly_into_buffer = - max_cache_size_ == 0 && (skip_bytes_ == 0) && + const bool puff_directly_into_buffer = + lru_cache_.capacity() == 0 && (skip_bytes_ == 0) && (length - bytes_read >= cur_puff_->length); auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_); - if (max_cache_size_ == 0 || + if (lru_cache_.capacity() == 0 || !GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) { // Did not find the puff buffer in cache. We have to build it. deflate_buffer_->resize(bytes_to_read); @@ -440,51 +435,65 @@ bool PuffinStream::SetExtraByte() { bool PuffinStream::GetPuffCache(int puff_id, uint64_t puff_size, shared_ptr<Buffer>* buffer) { - bool found = false; // Search for it. - std::pair<int, shared_ptr<Buffer>> cache; - // TODO(*): Find a faster way of doing this? Maybe change the data structure - // that supports faster search. - for (auto iter = caches_.begin(); iter != caches_.end(); ++iter) { - if (iter->first == puff_id) { - cache = std::move(*iter); - found = true; - // Remove it so later we can add it to the begining of the list. - caches_.erase(iter); - break; - } - } + shared_ptr<Buffer> cache = lru_cache_.get(puff_id); + const bool found = cache != nullptr; // If not found, either create one or get one from the list. if (!found) { - // If |caches_| were full, remove last ones in the list (least used), until - // we have enough space for the new cache. - while (!caches_.empty() && cur_cache_size_ + puff_size > max_cache_size_) { - cache = std::move(caches_.back()); - caches_.pop_back(); // Remove it from the list. - cur_cache_size_ -= cache.second->capacity(); - } // If we have not populated the cache yet, create one. - if (!cache.second) { - cache.second.reset(new Buffer(puff_size)); - } - cache.second->resize(puff_size); + lru_cache_.EnsureSpace(puff_size); + cache = std::make_shared<Buffer>(puff_size); - constexpr uint64_t kMaxSizeDifference = 20 * 1024; - if (puff_size + kMaxSizeDifference < cache.second->capacity()) { - cache.second->shrink_to_fit(); - } - - cur_cache_size_ += cache.second->capacity(); - cache.first = puff_id; + lru_cache_.put(puff_id, cache); } - *buffer = cache.second; - // By now we have either removed a cache or created new one. Now we have to - // insert it in the front of the list so it becomes the most recently used - // one. - caches_.push_front(std::move(cache)); + *buffer = cache; return found; } +const LRUCache::Value LRUCache::get(const LRUCache::Key& key) { + auto it = items_map_.find(key); + if (it == items_map_.end()) { + return nullptr; + } + items_list_.splice(items_list_.begin(), items_list_, it->second); + return it->second->second; +} + +void LRUCache::put(const LRUCache::Key& key, const LRUCache::Value& value) { + auto it = items_map_.find(key); + if (it != items_map_.end()) { + cache_size_ -= it->second->second->capacity(); + items_list_.erase(it->second); + items_map_.erase(it); + } + EnsureSpace(value->capacity()); + cache_size_ += value->capacity(); + items_list_.push_front(key_value_pair_t(key, value)); + items_map_[key] = items_list_.begin(); +} + +void LRUCache::EvictLRUItem() { + if (items_list_.empty()) { + return; + } + auto last = items_list_.back(); + cache_size_ -= last.second->capacity(); + items_map_.erase(last.first); + items_list_.pop_back(); +} + +void LRUCache::EnsureSpace(size_t size) { + if (size > max_size_) { + items_list_.clear(); + items_map_.clear(); + cache_size_ = 0; + return; + } + while (cache_size_ + size > max_size_) { + EvictLRUItem(); + } +} + } // namespace puffin diff --git a/src/puffin_stream.h b/src/puffin_stream.h index bb35314..1f8ec4f 100644 --- a/src/puffin_stream.h +++ b/src/puffin_stream.h @@ -7,7 +7,7 @@ #include <list> #include <memory> -#include <string> +#include <unordered_map> #include <utility> #include <vector> @@ -18,6 +18,38 @@ namespace puffin { +class LRUCache { + public: + using Key = int; + using Value = std::shared_ptr<Buffer>; + typedef typename std::pair<Key, Value> key_value_pair_t; + typedef typename std::list<key_value_pair_t>::iterator iterator; + + LRUCache(size_t max_size) : max_size_(max_size) {} + + void put(const Key& key, const Value& value); + + void EvictLRUItem(); + + // Ensure that cache has sufficient space for a new item of |size| bytes + void EnsureSpace(size_t size); + + const Value get(const Key& key); + + bool exists(const Key& key) const { + return items_map_.find(key) != items_map_.end(); + } + + size_t size() const { return cache_size_; } + size_t capacity() const { return max_size_; } + + private: + std::list<key_value_pair_t> items_list_; + std::unordered_map<Key, iterator> items_map_; + size_t cache_size_ = 0; + size_t max_size_; +}; + // A class for puffing a deflate stream and huffing into a deflate stream. The // puff stream is "imaginary", which means it doesn't really exists; It is build // and used on demand. This class uses a given deflate stream, and puffs the @@ -154,13 +186,8 @@ class PuffinStream : public StreamInterface { std::unique_ptr<Buffer> deflate_buffer_; std::shared_ptr<Buffer> puff_buffer_; - // The list of puff buffer caches. - std::list<std::pair<int, std::shared_ptr<Buffer>>> caches_; - // The maximum memory (in bytes) kept for caching puff buffers by an object of - // this class. - size_t max_cache_size_; - // The current amount of memory (in bytes) used for caching puff buffers. - uint64_t cur_cache_size_; + // The LRU cache for holding puff data + LRUCache lru_cache_; DISALLOW_COPY_AND_ASSIGN(PuffinStream); }; |