diff options
author | Alex Deymo <deymo@google.com> | 2018-02-13 10:30:08 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2018-02-13 10:30:08 +0000 |
commit | f71e7b4bedaee9c72923978c7c305be9d1fc6e4b (patch) | |
tree | f8eb510cb524d41c4f53e420ed6c0bdcfcbc46d5 | |
parent | 7efa7ac2af90fc00608ae764bed2a92b8c19d51a (diff) | |
parent | 8593ed6bd83944f8ce1f3752262bce8f17224962 (diff) | |
download | bsdiff-f71e7b4bedaee9c72923978c7c305be9d1fc6e4b.tar.gz |
Merge "Allow to set the minimum required match length." am: 46e8a3d9bd
am: 8593ed6bd8
Change-Id: I01e8d40066551750fc02cce9fc32a339780afd0f
-rw-r--r-- | bsdiff.cc | 12 | ||||
-rw-r--r-- | bsdiff_arguments.cc | 25 | ||||
-rw-r--r-- | bsdiff_arguments.h | 9 | ||||
-rw-r--r-- | bsdiff_arguments_unittest.cc | 18 | ||||
-rw-r--r-- | bsdiff_main.cc | 6 | ||||
-rw-r--r-- | bsdiff_unittest.cc | 24 | ||||
-rw-r--r-- | include/bsdiff/bsdiff.h | 17 |
7 files changed, 101 insertions, 10 deletions
@@ -50,12 +50,20 @@ int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf, size_t newsize, const char* patch_filename, SuffixArrayIndexInterface** sai_cache) { BsdiffPatchWriter patch(patch_filename); - return bsdiff(old_buf, oldsize, new_buf, newsize, &patch, sai_cache); + return bsdiff(old_buf, oldsize, new_buf, newsize, 0, &patch, + sai_cache); } int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf, size_t newsize, PatchWriterInterface* patch, SuffixArrayIndexInterface** sai_cache) { + return bsdiff(old_buf, oldsize, new_buf, newsize, 0, patch, + sai_cache); +} + +int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf, + size_t newsize, size_t min_length, PatchWriterInterface* patch, + SuffixArrayIndexInterface** sai_cache) { size_t scsc, scan; uint64_t pos=0; size_t len; @@ -112,7 +120,7 @@ int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf, oldscore++; if(((len==oldscore) && (len!=0)) || - (len>oldscore+8)) break; + (len>oldscore+8 && len>=min_length)) break; if((scan+lastoffset<oldsize) && (old_buf[scan+lastoffset] == new_buf[scan])) diff --git a/bsdiff_arguments.cc b/bsdiff_arguments.cc index fabb7ef..f56c186 100644 --- a/bsdiff_arguments.cc +++ b/bsdiff_arguments.cc @@ -24,6 +24,7 @@ constexpr char kBsdiff40String[] = "bsdiff40"; const struct option OPTIONS[] = { {"format", required_argument, nullptr, 0}, + {"minlen", required_argument, nullptr, 0}, {"type", required_argument, nullptr, 0}, {"quality", required_argument, nullptr, 0}, {nullptr, 0, nullptr, 0}, @@ -58,11 +59,15 @@ bool BsdiffArguments::ParseCommandLine(int argc, char** argv) { return false; } - std::string name = OPTIONS[option_index].name; + string name = OPTIONS[option_index].name; if (name == "format") { if (!ParseBsdiffFormat(optarg, &format_)) { return false; } + } else if (name == "minlen") { + if (!ParseMinLength(optarg, &min_length_)) { + return false; + } } else if (name == "type") { if (!ParseCompressorType(optarg, &compressor_type_)) { return false; @@ -110,6 +115,24 @@ bool BsdiffArguments::ParseCompressorType(const string& str, return false; } +bool BsdiffArguments::ParseMinLength(const string& str, size_t* len) { + errno = 0; + char* end; + const char* s = str.c_str(); + long result = strtol(s, &end, 10); + if (errno != 0 || s == end || *end != '\0') { + return false; + } + + if (result < 0) { + std::cerr << "Minimum length must be non-negative: " << str << endl; + return false; + } + + *len = result; + return true; +} + bool BsdiffArguments::ParseBsdiffFormat(const string& str, BsdiffFormat* format) { string format_string = str; diff --git a/bsdiff_arguments.h b/bsdiff_arguments.h index 4d0e1e0..fdce065 100644 --- a/bsdiff_arguments.h +++ b/bsdiff_arguments.h @@ -35,6 +35,8 @@ class BsdiffArguments { // Getter functions. BsdiffFormat format() const { return format_; } + int min_length() const { return min_length_; } + CompressorType compressor_type() const { return compressor_type_; } int compression_quality() const { return compression_quality_; } @@ -46,6 +48,9 @@ class BsdiffArguments { // Parse the compression type from string. static bool ParseCompressorType(const string& str, CompressorType* type); + // Parse the minimum length parameter from string. + static bool ParseMinLength(const string& str, size_t* len); + // Parse the bsdiff format from string. static bool ParseBsdiffFormat(const string& str, BsdiffFormat* format); @@ -62,8 +67,10 @@ class BsdiffArguments { // The quality of compression, only valid when using brotli as the // compression algorithm. int compression_quality_; + + size_t min_length_{0}; }; } // namespace bsdiff -#endif //_BSDIFF_BSDIFF_ARGUMENTS_H_ +#endif // _BSDIFF_BSDIFF_ARGUMENTS_H_ diff --git a/bsdiff_arguments_unittest.cc b/bsdiff_arguments_unittest.cc index 15bfbf5..3f061c9 100644 --- a/bsdiff_arguments_unittest.cc +++ b/bsdiff_arguments_unittest.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "bsdiff_arguments.h" +#include "bsdiff/bsdiff_arguments.h" #include <vector> @@ -46,6 +46,16 @@ TEST(BsdiffArgumentsTest, ParseQualityTest) { EXPECT_FALSE(BsdiffArguments::ParseQuality("aabb", &quality)); } +TEST(BsdiffArgumentsTest, ParseMinLengthTest) { + size_t len; + EXPECT_TRUE(BsdiffArguments::ParseMinLength("11", &len)); + EXPECT_EQ(11U, len); + + // Check the out of range quality values. + EXPECT_FALSE(BsdiffArguments::ParseMinLength("-1", &len)); + EXPECT_FALSE(BsdiffArguments::ParseMinLength("aabb", &len)); +} + TEST(BsdiffArgumentsTest, ArgumentsValidTest) { // Default arguments using BsdiffFormat::kLegacy and CompressorType::kBZ2 // should be valid. @@ -67,14 +77,16 @@ TEST(BsdiffArgumentsTest, ArgumentsValidTest) { TEST(BsdiffArgumentsTest, ParseArgumentsSmokeTest) { std::vector<const char*> args = {"bsdiff", "--format=bsdf2", "--type=brotli", - "--quality=9"}; + "--quality=9", "--minlen=12"}; BsdiffArguments arguments; - EXPECT_TRUE(arguments.ParseCommandLine(4, const_cast<char**>(args.data()))); + EXPECT_TRUE( + arguments.ParseCommandLine(args.size(), const_cast<char**>(args.data()))); EXPECT_EQ(BsdiffFormat::kBsdf2, arguments.format()); EXPECT_EQ(CompressorType::kBrotli, arguments.compressor_type()); EXPECT_EQ(9, arguments.compression_quality()); + EXPECT_EQ(12, arguments.min_length()); } } // namespace bsdiff diff --git a/bsdiff_main.cc b/bsdiff_main.cc index bce9453..1465441 100644 --- a/bsdiff_main.cc +++ b/bsdiff_main.cc @@ -85,8 +85,8 @@ int GenerateBsdiffFromFiles(const char* old_filename, return 1; } - return bsdiff::bsdiff(old_buf, oldsize, new_buf, newsize, patch_writer.get(), - nullptr); + return bsdiff::bsdiff(old_buf, oldsize, new_buf, newsize, + arguments.min_length(), patch_writer.get(), nullptr); } void PrintUsage(const std::string& proc_name) { @@ -94,6 +94,8 @@ void PrintUsage(const std::string& proc_name) { << " [options] oldfile newfile patchfile\n"; std::cerr << " --format <legacy|bsdiff40|bsdf2> The format of the bsdiff" " patch.\n" + << " --minlen LEN The minimum match length required " + "to consider a match in the algorithm.\n" << " --type <bz2|brotli> The algorithm to compress the " "patch, bsdf2 format only.\n" << " --quality Quality of the patch compression," diff --git a/bsdiff_unittest.cc b/bsdiff_unittest.cc index 6bd128d..ef4d3ef 100644 --- a/bsdiff_unittest.cc +++ b/bsdiff_unittest.cc @@ -33,11 +33,12 @@ class BsdiffTest : public testing::Test { void RunBsdiff() { EXPECT_EQ(0, bsdiff(old_file_.data(), old_file_.size(), new_file_.data(), - new_file_.size(), &patch_writer_, nullptr)); + new_file_.size(), min_len_, &patch_writer_, nullptr)); } std::vector<uint8_t> old_file_; std::vector<uint8_t> new_file_; + size_t min_len_ = 0; // 0 means the default. FakePatchWriter patch_writer_; }; @@ -81,4 +82,25 @@ TEST_F(BsdiffTest, FileWithSmallErrorsTest) { EXPECT_EQ(0U, entry.extra_size); } +TEST_F(BsdiffTest, MinLengthConsideredTest) { + old_file_.resize(100); + GenerateRandomBuffer(&old_file_); + new_file_ = old_file_; + // Copy the first 10 bytes to the middle. + for (size_t i = 0; i < 10; i++) { + new_file_[50 + i] = old_file_[i]; + } + + min_len_ = 12; + RunBsdiff(); + + // We expect that the 10 bytes in the middle that match the beginning are + // ignored and just emitted as diff data because the min_len is bigger than + // 10. + EXPECT_EQ(1U, patch_writer_.entries().size()); + ControlEntry entry = patch_writer_.entries()[0]; + EXPECT_EQ(100U, entry.diff_size); + EXPECT_EQ(0U, entry.extra_size); +} + } // namespace bsdiff diff --git a/include/bsdiff/bsdiff.h b/include/bsdiff/bsdiff.h index 08cb688..b6475f4 100644 --- a/include/bsdiff/bsdiff.h +++ b/include/bsdiff/bsdiff.h @@ -34,6 +34,23 @@ int bsdiff(const uint8_t* old_buf, PatchWriterInterface* patch, SuffixArrayIndexInterface** sai_cache); +// The |min_length| parameter determines the required minimum length of a match +// to be considered instead of emitting mismatches. The minimum value is 9, +// since smaller matches are always ignored. If a smaller value is passed, the +// minimum value of 9 will be used instead. A very large value (past 30) will +// give increasingly bad results as you increase the minimum length since legit +// matches between the old and new data will be ignored. The exact best value +// depends on the data, but the sweet spot should be between 9 and 20 for the +// examples tested. +BSDIFF_EXPORT +int bsdiff(const uint8_t* old_buf, + size_t oldsize, + const uint8_t* new_buf, + size_t newsize, + size_t min_length, + PatchWriterInterface* patch, + SuffixArrayIndexInterface** sai_cache); + } // namespace bsdiff #endif // _BSDIFF_BSDIFF_H_ |