aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKelvin Zhang <zhangkelvin@google.com>2024-03-14 18:29:46 -0700
committerKelvin Zhang <zhangkelvin@google.com>2024-03-19 12:22:24 -0700
commit8b116a3f5dea5f64f272806ad87826d100d50f94 (patch)
tree1868cecc6b9795ae356357b138eaef521934883a
parentf8716627219e8a94347e6def321495541adcc69e (diff)
downloadpuffin-8b116a3f5dea5f64f272806ad87826d100d50f94.tar.gz
Add O(1) LRU cache to puffin
puffin uses an O(n) LRU cache, with a small cache size it's fine. Since we are planning on increasing the cache size for better performance, improve LRU efficiency so that it's not a bottleneck. When using a 50MB cache size, this reduces runtime on reference hardware from 238s to 180s . Test: Run puffin with --cache_size=50MB Bug: 320443894 Change-Id: I3dde082702fcedb63cdd04b238c1382b75fb0109
-rw-r--r--Android.bp1
-rw-r--r--src/include/puffin/common.h3
-rw-r--r--src/puffer.cc3
-rw-r--r--src/puffin_stream.cc99
-rw-r--r--src/puffin_stream.h43
5 files changed, 91 insertions, 58 deletions
diff --git a/Android.bp b/Android.bp
index 022be2e..fdaab47 100644
--- a/Android.bp
+++ b/Android.bp
@@ -110,6 +110,7 @@ cc_binary {
cc_test {
name: "puffin_unittest",
+ host_supported: true,
defaults: ["puffin_defaults"],
test_suites: ["device-tests"],
cflags: ["-Wno-sign-compare"],
diff --git a/src/include/puffin/common.h b/src/include/puffin/common.h
index 97e0c07..64eb6f2 100644
--- a/src/include/puffin/common.h
+++ b/src/include/puffin/common.h
@@ -5,8 +5,7 @@
#ifndef SRC_INCLUDE_PUFFIN_COMMON_H_
#define SRC_INCLUDE_PUFFIN_COMMON_H_
-#include <functional>
-#include <memory>
+#include <ostream>
#include <vector>
#ifndef DISALLOW_COPY_AND_ASSIGN
diff --git a/src/puffer.cc b/src/puffer.cc
index 6d7b287..32bf6d3 100644
--- a/src/puffer.cc
+++ b/src/puffer.cc
@@ -4,16 +4,13 @@
#include "puffin/src/include/puffin/puffer.h"
-#include <algorithm>
#include <memory>
#include <string>
-#include <utility>
#include <vector>
#include "puffin/src/bit_reader.h"
#include "puffin/src/huffman_table.h"
#include "puffin/src/include/puffin/common.h"
-#include "puffin/src/include/puffin/stream.h"
#include "puffin/src/logging.h"
#include "puffin/src/puff_data.h"
#include "puffin/src/puff_writer.h"
diff --git a/src/puffin_stream.cc b/src/puffin_stream.cc
index b48783e..c50ad93 100644
--- a/src/puffin_stream.cc
+++ b/src/puffin_stream.cc
@@ -6,7 +6,6 @@
#include <algorithm>
#include <memory>
-#include <string>
#include <utility>
#include <vector>
@@ -106,8 +105,7 @@ PuffinStream::PuffinStream(UniqueStreamPtr stream,
extra_byte_(0),
is_for_puff_(puffer_ ? true : false),
closed_(false),
- max_cache_size_(max_cache_size),
- cur_cache_size_(0) {
+ lru_cache_(max_cache_size) {
// Building upper bounds for faster seek.
upper_bounds_.reserve(puffs.size());
for (const auto& puff : puffs) {
@@ -134,9 +132,6 @@ PuffinStream::PuffinStream(UniqueStreamPtr stream,
max_puff_length = std::max(max_puff_length, puff.length);
}
puff_buffer_.reset(new Buffer(max_puff_length + 1));
- if (max_cache_size_ < max_puff_length) {
- max_cache_size_ = 0; // It means we are not caching puffs.
- }
uint64_t max_deflate_length = 0;
for (const auto& deflate : deflates) {
@@ -262,12 +257,12 @@ bool PuffinStream::Read(void* buffer, size_t count) {
auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
auto bytes_to_read = end_byte - start_byte;
// Puff directly to buffer if it has space.
- bool puff_directly_into_buffer =
- max_cache_size_ == 0 && (skip_bytes_ == 0) &&
+ const bool puff_directly_into_buffer =
+ lru_cache_.capacity() == 0 && (skip_bytes_ == 0) &&
(length - bytes_read >= cur_puff_->length);
auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_);
- if (max_cache_size_ == 0 ||
+ if (lru_cache_.capacity() == 0 ||
!GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) {
// Did not find the puff buffer in cache. We have to build it.
deflate_buffer_->resize(bytes_to_read);
@@ -440,51 +435,65 @@ bool PuffinStream::SetExtraByte() {
bool PuffinStream::GetPuffCache(int puff_id,
uint64_t puff_size,
shared_ptr<Buffer>* buffer) {
- bool found = false;
// Search for it.
- std::pair<int, shared_ptr<Buffer>> cache;
- // TODO(*): Find a faster way of doing this? Maybe change the data structure
- // that supports faster search.
- for (auto iter = caches_.begin(); iter != caches_.end(); ++iter) {
- if (iter->first == puff_id) {
- cache = std::move(*iter);
- found = true;
- // Remove it so later we can add it to the begining of the list.
- caches_.erase(iter);
- break;
- }
- }
+ shared_ptr<Buffer> cache = lru_cache_.get(puff_id);
+ const bool found = cache != nullptr;
// If not found, either create one or get one from the list.
if (!found) {
- // If |caches_| were full, remove last ones in the list (least used), until
- // we have enough space for the new cache.
- while (!caches_.empty() && cur_cache_size_ + puff_size > max_cache_size_) {
- cache = std::move(caches_.back());
- caches_.pop_back(); // Remove it from the list.
- cur_cache_size_ -= cache.second->capacity();
- }
// If we have not populated the cache yet, create one.
- if (!cache.second) {
- cache.second.reset(new Buffer(puff_size));
- }
- cache.second->resize(puff_size);
+ lru_cache_.EnsureSpace(puff_size);
+ cache = std::make_shared<Buffer>(puff_size);
- constexpr uint64_t kMaxSizeDifference = 20 * 1024;
- if (puff_size + kMaxSizeDifference < cache.second->capacity()) {
- cache.second->shrink_to_fit();
- }
-
- cur_cache_size_ += cache.second->capacity();
- cache.first = puff_id;
+ lru_cache_.put(puff_id, cache);
}
- *buffer = cache.second;
- // By now we have either removed a cache or created new one. Now we have to
- // insert it in the front of the list so it becomes the most recently used
- // one.
- caches_.push_front(std::move(cache));
+ *buffer = cache;
return found;
}
+const LRUCache::Value LRUCache::get(const LRUCache::Key& key) {
+ auto it = items_map_.find(key);
+ if (it == items_map_.end()) {
+ return nullptr;
+ }
+ items_list_.splice(items_list_.begin(), items_list_, it->second);
+ return it->second->second;
+}
+
+void LRUCache::put(const LRUCache::Key& key, const LRUCache::Value& value) {
+ auto it = items_map_.find(key);
+ if (it != items_map_.end()) {
+ cache_size_ -= it->second->second->capacity();
+ items_list_.erase(it->second);
+ items_map_.erase(it);
+ }
+ EnsureSpace(value->capacity());
+ cache_size_ += value->capacity();
+ items_list_.push_front(key_value_pair_t(key, value));
+ items_map_[key] = items_list_.begin();
+}
+
+void LRUCache::EvictLRUItem() {
+ if (items_list_.empty()) {
+ return;
+ }
+ auto last = items_list_.back();
+ cache_size_ -= last.second->capacity();
+ items_map_.erase(last.first);
+ items_list_.pop_back();
+}
+
+void LRUCache::EnsureSpace(size_t size) {
+ if (size > max_size_) {
+ items_list_.clear();
+ items_map_.clear();
+ cache_size_ = 0;
+ return;
+ }
+ while (cache_size_ + size > max_size_) {
+ EvictLRUItem();
+ }
+}
+
} // namespace puffin
diff --git a/src/puffin_stream.h b/src/puffin_stream.h
index bb35314..1f8ec4f 100644
--- a/src/puffin_stream.h
+++ b/src/puffin_stream.h
@@ -7,7 +7,7 @@
#include <list>
#include <memory>
-#include <string>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -18,6 +18,38 @@
namespace puffin {
+class LRUCache {
+ public:
+ using Key = int;
+ using Value = std::shared_ptr<Buffer>;
+ typedef typename std::pair<Key, Value> key_value_pair_t;
+ typedef typename std::list<key_value_pair_t>::iterator iterator;
+
+ LRUCache(size_t max_size) : max_size_(max_size) {}
+
+ void put(const Key& key, const Value& value);
+
+ void EvictLRUItem();
+
+ // Ensure that cache has sufficient space for a new item of |size| bytes
+ void EnsureSpace(size_t size);
+
+ const Value get(const Key& key);
+
+ bool exists(const Key& key) const {
+ return items_map_.find(key) != items_map_.end();
+ }
+
+ size_t size() const { return cache_size_; }
+ size_t capacity() const { return max_size_; }
+
+ private:
+ std::list<key_value_pair_t> items_list_;
+ std::unordered_map<Key, iterator> items_map_;
+ size_t cache_size_ = 0;
+ size_t max_size_;
+};
+
// A class for puffing a deflate stream and huffing into a deflate stream. The
// puff stream is "imaginary", which means it doesn't really exists; It is build
// and used on demand. This class uses a given deflate stream, and puffs the
@@ -154,13 +186,8 @@ class PuffinStream : public StreamInterface {
std::unique_ptr<Buffer> deflate_buffer_;
std::shared_ptr<Buffer> puff_buffer_;
- // The list of puff buffer caches.
- std::list<std::pair<int, std::shared_ptr<Buffer>>> caches_;
- // The maximum memory (in bytes) kept for caching puff buffers by an object of
- // this class.
- size_t max_cache_size_;
- // The current amount of memory (in bytes) used for caching puff buffers.
- uint64_t cur_cache_size_;
+ // The LRU cache for holding puff data
+ LRUCache lru_cache_;
DISALLOW_COPY_AND_ASSIGN(PuffinStream);
};