summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorandroid-build-team Robot <android-build-team-robot@google.com>2018-02-14 08:22:22 +0000
committerandroid-build-team Robot <android-build-team-robot@google.com>2018-02-14 08:22:22 +0000
commit1b46ecc637b20328bf1520eab77a7f1f8f3374fe (patch)
treefbd6896513ee8d422f91ca6dbcbe5983932c975e
parent7583a7b8fca786075ad89d86da43865d95dad625 (diff)
parent0b6eab0f1421d6abfd701b751e90c2dec0e9bb51 (diff)
downloadbsdiff-1b46ecc637b20328bf1520eab77a7f1f8f3374fe.tar.gz
Snap for 4603395 from 0b6eab0f1421d6abfd701b751e90c2dec0e9bb51 to pi-release
Change-Id: I070f8c2b9816a2390e8996451675919ed17d1dc1
-rw-r--r--bsdiff.cc12
-rw-r--r--bsdiff_arguments.cc25
-rw-r--r--bsdiff_arguments.h17
-rw-r--r--bsdiff_arguments_unittest.cc18
-rw-r--r--bsdiff_main.cc6
-rw-r--r--bsdiff_unittest.cc24
-rw-r--r--include/bsdiff/bsdiff.h17
7 files changed, 104 insertions, 15 deletions
diff --git a/bsdiff.cc b/bsdiff.cc
index 01cd721..c775bdf 100644
--- a/bsdiff.cc
+++ b/bsdiff.cc
@@ -50,12 +50,20 @@ int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf,
size_t newsize, const char* patch_filename,
SuffixArrayIndexInterface** sai_cache) {
BsdiffPatchWriter patch(patch_filename);
- return bsdiff(old_buf, oldsize, new_buf, newsize, &patch, sai_cache);
+ return bsdiff(old_buf, oldsize, new_buf, newsize, 0, &patch,
+ sai_cache);
}
int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf,
size_t newsize, PatchWriterInterface* patch,
SuffixArrayIndexInterface** sai_cache) {
+ return bsdiff(old_buf, oldsize, new_buf, newsize, 0, patch,
+ sai_cache);
+}
+
+int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf,
+ size_t newsize, size_t min_length, PatchWriterInterface* patch,
+ SuffixArrayIndexInterface** sai_cache) {
size_t scsc, scan;
uint64_t pos=0;
size_t len;
@@ -112,7 +120,7 @@ int bsdiff(const uint8_t* old_buf, size_t oldsize, const uint8_t* new_buf,
oldscore++;
if(((len==oldscore) && (len!=0)) ||
- (len>oldscore+8)) break;
+ (len>oldscore+8 && len>=min_length)) break;
if((scan+lastoffset<oldsize) &&
(old_buf[scan+lastoffset] == new_buf[scan]))
diff --git a/bsdiff_arguments.cc b/bsdiff_arguments.cc
index fabb7ef..f56c186 100644
--- a/bsdiff_arguments.cc
+++ b/bsdiff_arguments.cc
@@ -24,6 +24,7 @@ constexpr char kBsdiff40String[] = "bsdiff40";
const struct option OPTIONS[] = {
{"format", required_argument, nullptr, 0},
+ {"minlen", required_argument, nullptr, 0},
{"type", required_argument, nullptr, 0},
{"quality", required_argument, nullptr, 0},
{nullptr, 0, nullptr, 0},
@@ -58,11 +59,15 @@ bool BsdiffArguments::ParseCommandLine(int argc, char** argv) {
return false;
}
- std::string name = OPTIONS[option_index].name;
+ string name = OPTIONS[option_index].name;
if (name == "format") {
if (!ParseBsdiffFormat(optarg, &format_)) {
return false;
}
+ } else if (name == "minlen") {
+ if (!ParseMinLength(optarg, &min_length_)) {
+ return false;
+ }
} else if (name == "type") {
if (!ParseCompressorType(optarg, &compressor_type_)) {
return false;
@@ -110,6 +115,24 @@ bool BsdiffArguments::ParseCompressorType(const string& str,
return false;
}
+bool BsdiffArguments::ParseMinLength(const string& str, size_t* len) {
+ errno = 0;
+ char* end;
+ const char* s = str.c_str();
+ long result = strtol(s, &end, 10);
+ if (errno != 0 || s == end || *end != '\0') {
+ return false;
+ }
+
+ if (result < 0) {
+ std::cerr << "Minimum length must be non-negative: " << str << endl;
+ return false;
+ }
+
+ *len = result;
+ return true;
+}
+
bool BsdiffArguments::ParseBsdiffFormat(const string& str,
BsdiffFormat* format) {
string format_string = str;
diff --git a/bsdiff_arguments.h b/bsdiff_arguments.h
index 4d0e1e0..b330834 100644
--- a/bsdiff_arguments.h
+++ b/bsdiff_arguments.h
@@ -12,8 +12,6 @@
#include "bsdiff/constants.h"
#include "bsdiff/patch_writer_interface.h"
-using std::string;
-
namespace bsdiff {
// Class to store the patch writer options about format, type and quality.
@@ -35,6 +33,8 @@ class BsdiffArguments {
// Getter functions.
BsdiffFormat format() const { return format_; }
+ int min_length() const { return min_length_; }
+
CompressorType compressor_type() const { return compressor_type_; }
int compression_quality() const { return compression_quality_; }
@@ -44,13 +44,16 @@ class BsdiffArguments {
bool ParseCommandLine(int argc, char** argv);
// Parse the compression type from string.
- static bool ParseCompressorType(const string& str, CompressorType* type);
+ static bool ParseCompressorType(const std::string& str, CompressorType* type);
+
+ // Parse the minimum length parameter from string.
+ static bool ParseMinLength(const std::string& str, size_t* len);
// Parse the bsdiff format from string.
- static bool ParseBsdiffFormat(const string& str, BsdiffFormat* format);
+ static bool ParseBsdiffFormat(const std::string& str, BsdiffFormat* format);
// Parse the compression quality (for brotli) from string.
- static bool ParseQuality(const string& str, int* quality);
+ static bool ParseQuality(const std::string& str, int* quality);
private:
// Current format supported are the legacy "BSDIFF40" or "BSDF2".
@@ -62,8 +65,10 @@ class BsdiffArguments {
// The quality of compression, only valid when using brotli as the
// compression algorithm.
int compression_quality_;
+
+ size_t min_length_{0};
};
} // namespace bsdiff
-#endif //_BSDIFF_BSDIFF_ARGUMENTS_H_
+#endif // _BSDIFF_BSDIFF_ARGUMENTS_H_
diff --git a/bsdiff_arguments_unittest.cc b/bsdiff_arguments_unittest.cc
index 15bfbf5..3f061c9 100644
--- a/bsdiff_arguments_unittest.cc
+++ b/bsdiff_arguments_unittest.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "bsdiff_arguments.h"
+#include "bsdiff/bsdiff_arguments.h"
#include <vector>
@@ -46,6 +46,16 @@ TEST(BsdiffArgumentsTest, ParseQualityTest) {
EXPECT_FALSE(BsdiffArguments::ParseQuality("aabb", &quality));
}
+TEST(BsdiffArgumentsTest, ParseMinLengthTest) {
+ size_t len;
+ EXPECT_TRUE(BsdiffArguments::ParseMinLength("11", &len));
+ EXPECT_EQ(11U, len);
+
+ // Check the out of range quality values.
+ EXPECT_FALSE(BsdiffArguments::ParseMinLength("-1", &len));
+ EXPECT_FALSE(BsdiffArguments::ParseMinLength("aabb", &len));
+}
+
TEST(BsdiffArgumentsTest, ArgumentsValidTest) {
// Default arguments using BsdiffFormat::kLegacy and CompressorType::kBZ2
// should be valid.
@@ -67,14 +77,16 @@ TEST(BsdiffArgumentsTest, ArgumentsValidTest) {
TEST(BsdiffArgumentsTest, ParseArgumentsSmokeTest) {
std::vector<const char*> args = {"bsdiff", "--format=bsdf2", "--type=brotli",
- "--quality=9"};
+ "--quality=9", "--minlen=12"};
BsdiffArguments arguments;
- EXPECT_TRUE(arguments.ParseCommandLine(4, const_cast<char**>(args.data())));
+ EXPECT_TRUE(
+ arguments.ParseCommandLine(args.size(), const_cast<char**>(args.data())));
EXPECT_EQ(BsdiffFormat::kBsdf2, arguments.format());
EXPECT_EQ(CompressorType::kBrotli, arguments.compressor_type());
EXPECT_EQ(9, arguments.compression_quality());
+ EXPECT_EQ(12, arguments.min_length());
}
} // namespace bsdiff
diff --git a/bsdiff_main.cc b/bsdiff_main.cc
index bce9453..1465441 100644
--- a/bsdiff_main.cc
+++ b/bsdiff_main.cc
@@ -85,8 +85,8 @@ int GenerateBsdiffFromFiles(const char* old_filename,
return 1;
}
- return bsdiff::bsdiff(old_buf, oldsize, new_buf, newsize, patch_writer.get(),
- nullptr);
+ return bsdiff::bsdiff(old_buf, oldsize, new_buf, newsize,
+ arguments.min_length(), patch_writer.get(), nullptr);
}
void PrintUsage(const std::string& proc_name) {
@@ -94,6 +94,8 @@ void PrintUsage(const std::string& proc_name) {
<< " [options] oldfile newfile patchfile\n";
std::cerr << " --format <legacy|bsdiff40|bsdf2> The format of the bsdiff"
" patch.\n"
+ << " --minlen LEN The minimum match length required "
+ "to consider a match in the algorithm.\n"
<< " --type <bz2|brotli> The algorithm to compress the "
"patch, bsdf2 format only.\n"
<< " --quality Quality of the patch compression,"
diff --git a/bsdiff_unittest.cc b/bsdiff_unittest.cc
index 6bd128d..ef4d3ef 100644
--- a/bsdiff_unittest.cc
+++ b/bsdiff_unittest.cc
@@ -33,11 +33,12 @@ class BsdiffTest : public testing::Test {
void RunBsdiff() {
EXPECT_EQ(0, bsdiff(old_file_.data(), old_file_.size(), new_file_.data(),
- new_file_.size(), &patch_writer_, nullptr));
+ new_file_.size(), min_len_, &patch_writer_, nullptr));
}
std::vector<uint8_t> old_file_;
std::vector<uint8_t> new_file_;
+ size_t min_len_ = 0; // 0 means the default.
FakePatchWriter patch_writer_;
};
@@ -81,4 +82,25 @@ TEST_F(BsdiffTest, FileWithSmallErrorsTest) {
EXPECT_EQ(0U, entry.extra_size);
}
+TEST_F(BsdiffTest, MinLengthConsideredTest) {
+ old_file_.resize(100);
+ GenerateRandomBuffer(&old_file_);
+ new_file_ = old_file_;
+ // Copy the first 10 bytes to the middle.
+ for (size_t i = 0; i < 10; i++) {
+ new_file_[50 + i] = old_file_[i];
+ }
+
+ min_len_ = 12;
+ RunBsdiff();
+
+ // We expect that the 10 bytes in the middle that match the beginning are
+ // ignored and just emitted as diff data because the min_len is bigger than
+ // 10.
+ EXPECT_EQ(1U, patch_writer_.entries().size());
+ ControlEntry entry = patch_writer_.entries()[0];
+ EXPECT_EQ(100U, entry.diff_size);
+ EXPECT_EQ(0U, entry.extra_size);
+}
+
} // namespace bsdiff
diff --git a/include/bsdiff/bsdiff.h b/include/bsdiff/bsdiff.h
index 08cb688..b6475f4 100644
--- a/include/bsdiff/bsdiff.h
+++ b/include/bsdiff/bsdiff.h
@@ -34,6 +34,23 @@ int bsdiff(const uint8_t* old_buf,
PatchWriterInterface* patch,
SuffixArrayIndexInterface** sai_cache);
+// The |min_length| parameter determines the required minimum length of a match
+// to be considered instead of emitting mismatches. The minimum value is 9,
+// since smaller matches are always ignored. If a smaller value is passed, the
+// minimum value of 9 will be used instead. A very large value (past 30) will
+// give increasingly bad results as you increase the minimum length since legit
+// matches between the old and new data will be ignored. The exact best value
+// depends on the data, but the sweet spot should be between 9 and 20 for the
+// examples tested.
+BSDIFF_EXPORT
+int bsdiff(const uint8_t* old_buf,
+ size_t oldsize,
+ const uint8_t* new_buf,
+ size_t newsize,
+ size_t min_length,
+ PatchWriterInterface* patch,
+ SuffixArrayIndexInterface** sai_cache);
+
} // namespace bsdiff
#endif // _BSDIFF_BSDIFF_H_