summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAlex Deymo <deymo@google.com>2018-02-08 15:50:11 +0100
committerAlex Deymo <deymo@google.com>2018-02-12 17:20:51 +0100
commit383f677d7e0d9e8e5a967c5451f5f6a663f222c6 (patch)
treef8eb510cb524d41c4f53e420ed6c0bdcfcbc46d5 /include
parent6c53ba8a18007181f167acfe12d456cc87081fad (diff)
downloadbsdiff-383f677d7e0d9e8e5a967c5451f5f6a663f222c6.tar.gz
Allow to set the minimum required match length.
The minimum required match length plays an important role in the current algorithm. This algorithm requires that the current match has at least 8 mismatches over a region where we find a new match, therefore imposing a minimum match length of at least 8 bytes. This patch allows to increase this minium value to avoid using small matches that only match a very small region. A larger value can improve the patch size, depending on the data, but a too large value will cause the algorithm to miss matches that were otherwise benefical. Bug: 73107113 Test: Added unittests. Ran it with different minlen values. Change-Id: Iac594fc1cd7ecd5cfdc676bdb6ebe7c626de6dcd
Diffstat (limited to 'include')
-rw-r--r--include/bsdiff/bsdiff.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/include/bsdiff/bsdiff.h b/include/bsdiff/bsdiff.h
index 08cb688..b6475f4 100644
--- a/include/bsdiff/bsdiff.h
+++ b/include/bsdiff/bsdiff.h
@@ -34,6 +34,23 @@ int bsdiff(const uint8_t* old_buf,
PatchWriterInterface* patch,
SuffixArrayIndexInterface** sai_cache);
+// The |min_length| parameter determines the required minimum length of a match
+// to be considered instead of emitting mismatches. The minimum value is 9,
+// since smaller matches are always ignored. If a smaller value is passed, the
+// minimum value of 9 will be used instead. A very large value (past 30) will
+// give increasingly bad results as you increase the minimum length since legit
+// matches between the old and new data will be ignored. The exact best value
+// depends on the data, but the sweet spot should be between 9 and 20 for the
+// examples tested.
+BSDIFF_EXPORT
+int bsdiff(const uint8_t* old_buf,
+ size_t oldsize,
+ const uint8_t* new_buf,
+ size_t newsize,
+ size_t min_length,
+ PatchWriterInterface* patch,
+ SuffixArrayIndexInterface** sai_cache);
+
} // namespace bsdiff
#endif // _BSDIFF_BSDIFF_H_