aboutsummaryrefslogtreecommitdiff
path: root/equivalence_map.cc
diff options
context:
space:
mode:
authorCalder Kitagawa <ckitagawa@chromium.org>2018-06-28 15:32:16 +0000
committerCopybara-Service <copybara-worker@google.com>2021-07-25 20:01:42 -0700
commit2ed3877df49fb5271a03f999192d8640b41f5b5e (patch)
tree61fbdc145f07604707b87ab8f7d02a0d12b1e8af /equivalence_map.cc
parent8fdb8ba40fec579b42a7dc8bbd1475ee91e1aa42 (diff)
downloadzucchini-2ed3877df49fb5271a03f999192d8640b41f5b5e.tar.gz
[Zucchini] Fix O(n^2) in pathological case
In pathological cases, such as those provided in the relevant bug, Zucchini could exhibit O(n^2) behavior during seed selection. To remedy this, this CL adds a quota to limit the total number of bytes covered in seed selection search. However, as demonstrated in CL: https://chromium-review.googlesource.com/c/chromium/src/+/1115710 this is insufficient. haungs@ determined that if this is the only limitation then the ExtendEquivalenceBackward method will perform a linear time extension resulting in O(n) behvior which when applied over O(n) offsets results in O(n^2) behavior again. To limit this he proposed limiting the distance ExtendEquivalenceBackward can search which is also implemented in this CL. These parameters are tuned such that they have no noticeable impact when patching chrome.7z for Windows. For the pathological testcases in the bug the results are significantly better (refer to the bug for exact improvements). This solution results in linear time performance on the pathological cases. We could get these numbers down even smaller by reducing the limit on backward extension. However, this had a small, but noticeable ~5 kiB increase in patches for chrome.7z. Bug: 849471 Change-Id: If7e857884b00daeeae7a4a29b1236c749f0b84e4 Reviewed-on: https://chromium-review.googlesource.com/1117273 Commit-Queue: Calder Kitagawa <ckitagawa@chromium.org> Reviewed-by: Samuel Huang <huangs@chromium.org> Cr-Commit-Position: refs/heads/master@{#571134} NOKEYCHECK=True GitOrigin-RevId: 94722d4e926dee2b8557c8b4abb881a4bf1e77f0
Diffstat (limited to 'equivalence_map.cc')
-rw-r--r--equivalence_map.cc34
1 files changed, 32 insertions, 2 deletions
diff --git a/equivalence_map.cc b/equivalence_map.cc
index 40071c0..77551ec 100644
--- a/equivalence_map.cc
+++ b/equivalence_map.cc
@@ -15,6 +15,25 @@
namespace zucchini {
+namespace {
+
+// TODO(haungs): Tune these numbers to improve pathological case results.
+
+// In pathological cases Zucchini can exhibit O(n^2) behavior if the seed
+// selection process runs to completion. To prevent this we impose a quota for
+// the total length of equivalences the seed selection process can perform
+// trials on. For regular use cases it is unlikely this quota will be exceeded,
+// and if it is the effects on patch size are expected to be small.
+constexpr uint64_t kSeedSelectionTotalVisitLengthQuota = 1 << 18; // 256 KiB
+
+// The aforementioned quota alone is insufficient, as exploring backwards will
+// still be very successful resulting in O(n) behavior in the case of a limited
+// seed selection trials. This results in O(n^2) behavior returning. To mitigate
+// this we also impose a cap on the ExtendEquivalenceBackward() exploration.
+constexpr offset_t kBackwardsExtendLimit = 1 << 16; // 64 KiB
+
+} // namespace
+
/******** Utility Functions ********/
double GetTokenSimilarity(
@@ -133,8 +152,9 @@ EquivalenceCandidate ExtendEquivalenceBackward(
double current_similarity = candidate.similarity;
double best_similarity = current_similarity;
double current_penalty = 0.0;
- for (offset_t k = 1;
- k <= equivalence.dst_offset && k <= equivalence.src_offset; ++k) {
+ offset_t k_min = std::min(
+ {equivalence.dst_offset, equivalence.src_offset, kBackwardsExtendLimit});
+ for (offset_t k = 1; k <= k_min; ++k) {
// Mismatch in type, |candidate| cannot be extended further.
if (old_image_index.LookupType(equivalence.src_offset - k) !=
new_image_index.LookupType(equivalence.dst_offset - k)) {
@@ -413,6 +433,7 @@ void EquivalenceMap::CreateCandidates(
offset_t next_dst_offset = dst_offset + 1;
// TODO(huangs): Clean up.
double best_similarity = min_similarity;
+ uint64_t total_visit_length = 0;
EquivalenceCandidate best_candidate = {{0, 0, 0}, 0.0};
for (auto it = match; it != old_sa.end(); ++it) {
EquivalenceCandidate candidate = VisitEquivalenceSeed(
@@ -422,10 +443,15 @@ void EquivalenceMap::CreateCandidates(
best_candidate = candidate;
best_similarity = candidate.similarity;
next_dst_offset = candidate.eq.dst_end();
+ total_visit_length += candidate.eq.length;
+ if (total_visit_length > kSeedSelectionTotalVisitLengthQuota) {
+ break;
+ }
} else {
break;
}
}
+ total_visit_length = 0;
for (auto it = match; it != old_sa.begin(); --it) {
EquivalenceCandidate candidate = VisitEquivalenceSeed(
old_view.image_index(), new_view.image_index(), targets_affinities,
@@ -434,6 +460,10 @@ void EquivalenceMap::CreateCandidates(
best_candidate = candidate;
best_similarity = candidate.similarity;
next_dst_offset = candidate.eq.dst_end();
+ total_visit_length += candidate.eq.length;
+ if (total_visit_length > kSeedSelectionTotalVisitLengthQuota) {
+ break;
+ }
} else {
break;
}