diff options
author | android-build-team Robot <android-build-team-robot@google.com> | 2018-02-14 08:22:22 +0000 |
---|---|---|
committer | android-build-team Robot <android-build-team-robot@google.com> | 2018-02-14 08:22:22 +0000 |
commit | 1b46ecc637b20328bf1520eab77a7f1f8f3374fe (patch) | |
tree | fbd6896513ee8d422f91ca6dbcbe5983932c975e | |
parent | 7583a7b8fca786075ad89d86da43865d95dad625 (diff) | |
parent | 0b6eab0f1421d6abfd701b751e90c2dec0e9bb51 (diff) | |
download | bsdiff-1b46ecc637b20328bf1520eab77a7f1f8f3374fe.tar.gz |
Snap for 4603395 from 0b6eab0f1421d6abfd701b751e90c2dec0e9bb51 to pi-release
Change-Id: I070f8c2b9816a2390e8996451675919ed17d1dc1
-rw-r--r-- | bsdiff.cc | 12 | ||||
-rw-r--r-- | bsdiff_arguments.cc | 25 | ||||
-rw-r--r-- | bsdiff_arguments.h | 17 | ||||
-rw-r--r-- | bsdiff_arguments_unittest.cc | 18 | ||||
-rw-r--r-- | bsdiff_main.cc | 6 | ||||
-rw-r--r-- | bsdiff_unittest.cc | 24 | ||||
-rw-r--r-- | include/bsdiff/bsdiff.h | 17 |
7 files changed, 104 insertions, 15 deletions
@@ -50,12 +50,20 @@ int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf, size_t newsize, const char* patch_filename, SuffixArrayIndexInterface** sai_cache) { BsdiffPatchWriter patch(patch_filename); - return bsdiff(old_buf, oldsize, new_buf, newsize, &patch, sai_cache); + return bsdiff(old_buf, oldsize, new_buf, newsize, 0, &patch, + sai_cache); } int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf, size_t newsize, PatchWriterInterface* patch, SuffixArrayIndexInterface** sai_cache) { + return bsdiff(old_buf, oldsize, new_buf, newsize, 0, patch, + sai_cache); +} + +int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf, + size_t newsize, size_t min_length, PatchWriterInterface* patch, + SuffixArrayIndexInterface** sai_cache) { size_t scsc, scan; uint64_t pos=0; size_t len; @@ -112,7 +120,7 @@ int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf, oldscore++; if(((len==oldscore) && (len!=0)) || - (len>oldscore+8)) break; + (len>oldscore+8 && len>=min_length)) break; if((scan+lastoffset<oldsize) && (old_buf[scan+lastoffset] == new_buf[scan])) diff --git a/bsdiff_arguments.cc b/bsdiff_arguments.cc index fabb7ef..f56c186 100644 --- a/bsdiff_arguments.cc +++ b/bsdiff_arguments.cc @@ -24,6 +24,7 @@ constexpr char kBsdiff40String[] = "bsdiff40"; const struct option OPTIONS[] = { {"format", required_argument, nullptr, 0}, + {"minlen", required_argument, nullptr, 0}, {"type", required_argument, nullptr, 0}, {"quality", required_argument, nullptr, 0}, {nullptr, 0, nullptr, 0}, @@ -58,11 +59,15 @@ bool BsdiffArguments::ParseCommandLine(int argc, char** argv) { return false; } - std::string name = OPTIONS[option_index].name; + string name = OPTIONS[option_index].name; if (name == "format") { if (!ParseBsdiffFormat(optarg, &format_)) { return false; } + } else if (name == "minlen") { + if (!ParseMinLength(optarg, &min_length_)) { + return false; + } } else if (name == "type") { if (!ParseCompressorType(optarg, &compressor_type_)) { return false; @@ -110,6 +115,24 @@ bool BsdiffArguments::ParseCompressorType(const string& str, return false; } +bool BsdiffArguments::ParseMinLength(const string& str, size_t* len) { + errno = 0; + char* end; + const char* s = str.c_str(); + long result = strtol(s, &end, 10); + if (errno != 0 || s == end || *end != '\0') { + return false; + } + + if (result < 0) { + std::cerr << "Minimum length must be non-negative: " << str << endl; + return false; + } + + *len = result; + return true; +} + bool BsdiffArguments::ParseBsdiffFormat(const string& str, BsdiffFormat* format) { string format_string = str; diff --git a/bsdiff_arguments.h b/bsdiff_arguments.h index 4d0e1e0..b330834 100644 --- a/bsdiff_arguments.h +++ b/bsdiff_arguments.h @@ -12,8 +12,6 @@ #include "bsdiff/constants.h" #include "bsdiff/patch_writer_interface.h" -using std::string; - namespace bsdiff { // Class to store the patch writer options about format, type and quality. @@ -35,6 +33,8 @@ class BsdiffArguments { // Getter functions. BsdiffFormat format() const { return format_; } + int min_length() const { return min_length_; } + CompressorType compressor_type() const { return compressor_type_; } int compression_quality() const { return compression_quality_; } @@ -44,13 +44,16 @@ class BsdiffArguments { bool ParseCommandLine(int argc, char** argv); // Parse the compression type from string. - static bool ParseCompressorType(const string& str, CompressorType* type); + static bool ParseCompressorType(const std::string& str, CompressorType* type); + + // Parse the minimum length parameter from string. + static bool ParseMinLength(const std::string& str, size_t* len); // Parse the bsdiff format from string. - static bool ParseBsdiffFormat(const string& str, BsdiffFormat* format); + static bool ParseBsdiffFormat(const std::string& str, BsdiffFormat* format); // Parse the compression quality (for brotli) from string. - static bool ParseQuality(const string& str, int* quality); + static bool ParseQuality(const std::string& str, int* quality); private: // Current format supported are the legacy "BSDIFF40" or "BSDF2". @@ -62,8 +65,10 @@ class BsdiffArguments { // The quality of compression, only valid when using brotli as the // compression algorithm. int compression_quality_; + + size_t min_length_{0}; }; } // namespace bsdiff -#endif //_BSDIFF_BSDIFF_ARGUMENTS_H_ +#endif // _BSDIFF_BSDIFF_ARGUMENTS_H_ diff --git a/bsdiff_arguments_unittest.cc b/bsdiff_arguments_unittest.cc index 15bfbf5..3f061c9 100644 --- a/bsdiff_arguments_unittest.cc +++ b/bsdiff_arguments_unittest.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "bsdiff_arguments.h" +#include "bsdiff/bsdiff_arguments.h" #include <vector> @@ -46,6 +46,16 @@ TEST(BsdiffArgumentsTest, ParseQualityTest) { EXPECT_FALSE(BsdiffArguments::ParseQuality("aabb", &quality)); } +TEST(BsdiffArgumentsTest, ParseMinLengthTest) { + size_t len; + EXPECT_TRUE(BsdiffArguments::ParseMinLength("11", &len)); + EXPECT_EQ(11U, len); + + // Check the out of range quality values. + EXPECT_FALSE(BsdiffArguments::ParseMinLength("-1", &len)); + EXPECT_FALSE(BsdiffArguments::ParseMinLength("aabb", &len)); +} + TEST(BsdiffArgumentsTest, ArgumentsValidTest) { // Default arguments using BsdiffFormat::kLegacy and CompressorType::kBZ2 // should be valid. @@ -67,14 +77,16 @@ TEST(BsdiffArgumentsTest, ArgumentsValidTest) { TEST(BsdiffArgumentsTest, ParseArgumentsSmokeTest) { std::vector<const char*> args = {"bsdiff", "--format=bsdf2", "--type=brotli", - "--quality=9"}; + "--quality=9", "--minlen=12"}; BsdiffArguments arguments; - EXPECT_TRUE(arguments.ParseCommandLine(4, const_cast<char**>(args.data()))); + EXPECT_TRUE( + arguments.ParseCommandLine(args.size(), const_cast<char**>(args.data()))); EXPECT_EQ(BsdiffFormat::kBsdf2, arguments.format()); EXPECT_EQ(CompressorType::kBrotli, arguments.compressor_type()); EXPECT_EQ(9, arguments.compression_quality()); + EXPECT_EQ(12, arguments.min_length()); } } // namespace bsdiff diff --git a/bsdiff_main.cc b/bsdiff_main.cc index bce9453..1465441 100644 --- a/bsdiff_main.cc +++ b/bsdiff_main.cc @@ -85,8 +85,8 @@ int GenerateBsdiffFromFiles(const char* old_filename, return 1; } - return bsdiff::bsdiff(old_buf, oldsize, new_buf, newsize, patch_writer.get(), - nullptr); + return bsdiff::bsdiff(old_buf, oldsize, new_buf, newsize, + arguments.min_length(), patch_writer.get(), nullptr); } void PrintUsage(const std::string& proc_name) { @@ -94,6 +94,8 @@ void PrintUsage(const std::string& proc_name) { << " [options] oldfile newfile patchfile\n"; std::cerr << " --format <legacy|bsdiff40|bsdf2> The format of the bsdiff" " patch.\n" + << " --minlen LEN The minimum match length required " + "to consider a match in the algorithm.\n" << " --type <bz2|brotli> The algorithm to compress the " "patch, bsdf2 format only.\n" << " --quality Quality of the patch compression," diff --git a/bsdiff_unittest.cc b/bsdiff_unittest.cc index 6bd128d..ef4d3ef 100644 --- a/bsdiff_unittest.cc +++ b/bsdiff_unittest.cc @@ -33,11 +33,12 @@ class BsdiffTest : public testing::Test { void RunBsdiff() { EXPECT_EQ(0, bsdiff(old_file_.data(), old_file_.size(), new_file_.data(), - new_file_.size(), &patch_writer_, nullptr)); + new_file_.size(), min_len_, &patch_writer_, nullptr)); } std::vector<uint8_t> old_file_; std::vector<uint8_t> new_file_; + size_t min_len_ = 0; // 0 means the default. FakePatchWriter patch_writer_; }; @@ -81,4 +82,25 @@ TEST_F(BsdiffTest, FileWithSmallErrorsTest) { EXPECT_EQ(0U, entry.extra_size); } +TEST_F(BsdiffTest, MinLengthConsideredTest) { + old_file_.resize(100); + GenerateRandomBuffer(&old_file_); + new_file_ = old_file_; + // Copy the first 10 bytes to the middle. + for (size_t i = 0; i < 10; i++) { + new_file_[50 + i] = old_file_[i]; + } + + min_len_ = 12; + RunBsdiff(); + + // We expect that the 10 bytes in the middle that match the beginning are + // ignored and just emitted as diff data because the min_len is bigger than + // 10. + EXPECT_EQ(1U, patch_writer_.entries().size()); + ControlEntry entry = patch_writer_.entries()[0]; + EXPECT_EQ(100U, entry.diff_size); + EXPECT_EQ(0U, entry.extra_size); +} + } // namespace bsdiff diff --git a/include/bsdiff/bsdiff.h b/include/bsdiff/bsdiff.h index 08cb688..b6475f4 100644 --- a/include/bsdiff/bsdiff.h +++ b/include/bsdiff/bsdiff.h @@ -34,6 +34,23 @@ int bsdiff(const uint8_t* old_buf, PatchWriterInterface* patch, SuffixArrayIndexInterface** sai_cache); +// The |min_length| parameter determines the required minimum length of a match +// to be considered instead of emitting mismatches. The minimum value is 9, +// since smaller matches are always ignored. If a smaller value is passed, the +// minimum value of 9 will be used instead. A very large value (past 30) will +// give increasingly bad results as you increase the minimum length since legit +// matches between the old and new data will be ignored. The exact best value +// depends on the data, but the sweet spot should be between 9 and 20 for the +// examples tested. +BSDIFF_EXPORT +int bsdiff(const uint8_t* old_buf, + size_t oldsize, + const uint8_t* new_buf, + size_t newsize, + size_t min_length, + PatchWriterInterface* patch, + SuffixArrayIndexInterface** sai_cache); + } // namespace bsdiff #endif // _BSDIFF_BSDIFF_H_ |