aboutsummaryrefslogtreecommitdiff
path: root/src/vcdiff_main.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vcdiff_main.cc')
-rw-r--r--src/vcdiff_main.cc656
1 files changed, 656 insertions, 0 deletions
diff --git a/src/vcdiff_main.cc b/src/vcdiff_main.cc
new file mode 100644
index 0000000..c54053d
--- /dev/null
+++ b/src/vcdiff_main.cc
@@ -0,0 +1,656 @@
+// Copyright 2008 Google Inc.
+// Author: Lincoln Smith
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A command-line interface to the open-vcdiff library.
+
+#include <config.h>
+#include <assert.h>
+#include <errno.h>
+#ifdef WIN32
+#include <fcntl.h>
+#include <io.h>
+#endif // WIN32
+#include <stdio.h>
+#include <string.h> // strerror
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+#include "gflags/gflags.h"
+#include "google/vcdecoder.h"
+#include "google/vcencoder.h"
+
+#ifndef HAS_GLOBAL_STRING
+using std::string;
+#endif // !HAS_GLOBAL_STRING
+using google::GetCommandLineFlagInfoOrDie;
+using google::ShowUsageWithFlagsRestrict;
+
+static const size_t kDefaultMaxTargetSize = 1 << 26; // 64 MB
+
+// Definitions of command-line flags
+DEFINE_string(dictionary, "",
+ "File containing dictionary data (required)");
+DEFINE_string(target, "",
+ "Target file (default is stdin for encode, stdout for decode");
+DEFINE_string(delta, "",
+ "Encoded delta file (default is stdout for encode, "
+ "stdin for decode");
+// --buffersize is the maximum allowable size of a target window.
+// This value may be increased if there is sufficient memory available.
+DEFINE_uint64(buffersize, 1 << 20, // 1 MB
+ "Buffer size for reading input file");
+DEFINE_bool(allow_vcd_target, true,
+ "If false, the decoder issues an error when the VCD_TARGET flag "
+ "is encountered");
+DEFINE_bool(checksum, false,
+ "Include an Adler32 checksum of the target data when encoding");
+DEFINE_bool(interleaved, false, "Use interleaved format");
+DEFINE_bool(json, false, "Output diff in the JSON format when encoding");
+DEFINE_bool(stats, false, "Report compression percentage");
+DEFINE_bool(target_matches, false, "Find duplicate strings in target data"
+ " as well as dictionary data");
+DEFINE_uint64(max_target_file_size, kDefaultMaxTargetSize,
+ "Maximum target file size allowed by decoder");
+DEFINE_uint64(max_target_window_size, kDefaultMaxTargetSize,
+ "Maximum target window size allowed by decoder");
+
+static const char* const kUsageString =
+ " {encode | delta | decode | patch }[ <options> ]\n"
+ "encode or delta: create delta file from dictionary and target file\n"
+ "decode or patch: reconstruct target file from dictionary and delta file";
+
+namespace open_vcdiff {
+
+class VCDiffFileBasedCoder {
+ public:
+ VCDiffFileBasedCoder();
+ ~VCDiffFileBasedCoder();
+
+ // Once the command-line arguments have been parsed, these functions
+ // will use the supplied options to carry out a file-based encode
+ // or decode operation.
+ bool Encode();
+ bool Decode();
+ bool DecodeAndCompare(); // for "vcdiff test"; compare target with original
+
+ private:
+ // Determines the size of the file. The given file must be an input file
+ // opened for reading only, not an input stream such as stdin. The function
+ // returns true and populates file_size if successful; otherwise, it returns
+ // false.
+ static bool FileSize(FILE* file, size_t* file_size);
+
+ // Opens a file for incremental reading. file_name is the name of the file
+ // to be opened. file_type should be a descriptive name (like "target") for
+ // use in log messages. If successful, returns true and sets *file to a
+ // valid input file, *buffer to a region of memory allocated using malloc()
+ // (so the caller must release it using free()), and buffer_size to the size
+ // of the buffer, which will not be larger than the size of the file, and
+ // will not be smaller than the --buffersize option. If the function fails,
+ // it outputs a log message and returns false.
+ bool OpenFileForReading(const string& file_name,
+ const char* file_type,
+ FILE** file,
+ std::vector<char>* buffer);
+
+ // Opens the dictionary file and reads it into a newly allocated buffer.
+ // If successful, returns true and populates dictionary_ with the dictionary
+ // contents; otherwise, returns false.
+ bool OpenDictionary();
+
+ // Opens the input file (the delta or target file) for reading.
+ // Allocates space for the input buffer. If successful,
+ // input_file_ will be valid and input_buffer_ will be allocated.
+ bool OpenInputFile() {
+ return OpenFileForReading(input_file_name_,
+ input_file_type_,
+ &input_file_,
+ &input_buffer_);
+ }
+
+ // Opens the output file (the target or delta file) for writing.
+ // If successful, output_file_ will be valid.
+ bool OpenOutputFile();
+
+ // Opens the output file (the target file) for comparison against the decoded
+ // output when using "vcdiff test".
+ bool OpenOutputFileForCompare() {
+ return OpenFileForReading(output_file_name_,
+ output_file_type_,
+ &output_file_,
+ &compare_buffer_);
+ }
+
+ // Reads as much input data as possible from the input file
+ // into input_buffer_. If successful, returns true and sets *bytes_read
+ // to the number of bytes read into input_buffer_. If an error occurs,
+ // writes an error log message and returns false.
+ bool ReadInput(size_t* bytes_read);
+
+ // Writes the contents of output to output_file_. If successful, returns
+ // true. If an error occurs, writes an error log message and returns false.
+ bool WriteOutput(const string& output);
+
+ // Reads a number of bytes from output_file_ equal to the size of output,
+ // and compares to make sure they match the contents of output. If the bytes
+ // do not match, or if end of file is reached before the expected number of
+ // bytes have been read, or a read error occurs, the function returns false;
+ // otherwise, returns true.
+ bool CompareOutput(const string& output);
+
+ // Dictionary contents. The entire dictionary file will be read into memory.
+ std::vector<char> dictionary_;
+
+ std::auto_ptr<open_vcdiff::HashedDictionary> hashed_dictionary_;
+
+ // These should be set to either "delta" or "target". They are only
+ // used in log messages such as "Error opening delta file..."
+ const char* input_file_type_;
+ const char* output_file_type_;
+
+ // The filenames used for input and output. Will be empty if stdin
+ // or stdout is being used.
+ string input_file_name_;
+ string output_file_name_;
+
+ // stdio-style file handles for the input and output files and the dictionary.
+ // When encoding, input_file_ is the target file and output_file_ is the delta
+ // file; when decoding, the reverse is true. The dictionary is always read
+ // from a file rather than from standard input.
+ FILE* input_file_;
+ FILE* output_file_;
+
+ // A memory buffer used to load the input file into memory. If the input
+ // comes from stdin because no input file was specified, then the size of
+ // input_buffer_ will be the value specified by the --buffersize option.
+ // If the input comes from a file, then the buffer will be allocated to match
+ // the file size, if possible. However, the buffer will not exceed
+ // --buffersize bytes in length.
+ std::vector<char> input_buffer_;
+
+ // A memory buffer used to load the output file into memory for comparison
+ // if "vcdiff test" is specified.
+ std::vector<char> compare_buffer_;
+
+ // Making these private avoids implicit copy constructor & assignment operator
+ VCDiffFileBasedCoder(const VCDiffFileBasedCoder&); // NOLINT
+ void operator=(const VCDiffFileBasedCoder&);
+};
+
+inline VCDiffFileBasedCoder::VCDiffFileBasedCoder()
+ : input_file_type_(""),
+ output_file_type_(""),
+ input_file_(NULL),
+ output_file_(NULL) { }
+
+VCDiffFileBasedCoder::~VCDiffFileBasedCoder() {
+ if (input_file_ && (input_file_ != stdin)) {
+ fclose(input_file_);
+ input_file_ = NULL;
+ }
+ if (output_file_ && (output_file_ != stdout)) {
+ fclose(output_file_);
+ output_file_ = NULL;
+ }
+}
+
+bool VCDiffFileBasedCoder::FileSize(FILE* file, size_t* file_size) {
+ long initial_position = ftell(file);
+ if (fseek(file, 0, SEEK_END) != 0) {
+ return false;
+ }
+ *file_size = static_cast<size_t>(ftell(file));
+ if (fseek(file, initial_position, SEEK_SET) != 0) {
+ return false;
+ }
+ return true;
+}
+
+bool VCDiffFileBasedCoder::OpenDictionary() {
+ assert(dictionary_.empty());
+ assert(!FLAGS_dictionary.empty());
+ FILE* dictionary_file = fopen(FLAGS_dictionary.c_str(), "rb");
+ if (!dictionary_file) {
+ std::cerr << "Error opening dictionary file '" << FLAGS_dictionary
+ << "': " << strerror(errno) << std::endl;
+ return false;
+ }
+ size_t dictionary_size = 0U;
+ if (!FileSize(dictionary_file, &dictionary_size)) {
+ std::cerr << "Error finding size of dictionary file '" << FLAGS_dictionary
+ << "': " << strerror(errno) << std::endl;
+ return false;
+ }
+ dictionary_.resize(dictionary_size);
+ if (dictionary_size > 0) {
+ if (fread(&dictionary_[0], 1, dictionary_size, dictionary_file)
+ != dictionary_size) {
+ std::cerr << "Unable to read dictionary file '" << FLAGS_dictionary
+ << "': " << strerror(errno) << std::endl;
+ fclose(dictionary_file);
+ dictionary_.clear();
+ return false;
+ }
+ }
+ fclose(dictionary_file);
+ return true;
+}
+
+bool VCDiffFileBasedCoder::OpenFileForReading(const string& file_name,
+ const char* file_type,
+ FILE** file,
+ std::vector<char>* buffer) {
+ assert(buffer->empty());
+ size_t buffer_size = 0U;
+ if (!*file && file_name.empty()) {
+#ifdef WIN32
+ _setmode(_fileno(stdin), _O_BINARY);
+#endif
+ *file = stdin;
+ buffer_size = static_cast<size_t>(FLAGS_buffersize);
+ } else {
+ if (!*file) {
+ *file = fopen(file_name.c_str(), "rb");
+ if (!*file) {
+ std::cerr << "Error opening " << file_type << " file '"
+ << file_name << "': " << strerror(errno) << std::endl;
+ return false;
+ }
+ }
+ size_t file_size = 0U;
+ if (!FileSize(*file, &file_size)) {
+ std::cerr << "Error finding size of " << file_type << " file '"
+ << file_name << "': " << strerror(errno) << std::endl;
+ return false;
+ }
+ buffer_size = static_cast<size_t>(FLAGS_buffersize);
+ if (file_size < buffer_size) {
+ // Allocate just enough memory to store the entire file
+ buffer_size = file_size;
+ }
+ }
+ buffer->resize(buffer_size);
+ return true;
+}
+
+// Opens the output file for streamed read operations using the
+// standard C I/O library, i.e., fopen(), fwrite(), fclose().
+// No output buffer is allocated because the encoded/decoded output
+// is constructed progressively using a std::string object
+// whose buffer is resized as needed.
+bool VCDiffFileBasedCoder::OpenOutputFile() {
+ if (output_file_name_.empty()) {
+#ifdef WIN32
+ _setmode(_fileno(stdout), _O_BINARY);
+#endif
+ output_file_ = stdout;
+ } else {
+ output_file_ = fopen(output_file_name_.c_str(), "wb");
+ if (!output_file_) {
+ std::cerr << "Error opening " << output_file_type_ << " file '"
+ << output_file_name_
+ << "': " << strerror(errno) << std::endl;
+ return false;
+ }
+ }
+ return true;
+}
+
+bool VCDiffFileBasedCoder::ReadInput(size_t* bytes_read) {
+ // Read from file or stdin
+ *bytes_read = fread(&input_buffer_[0], 1, input_buffer_.size(), input_file_);
+ if (ferror(input_file_)) {
+ std::cerr << "Error reading from " << input_file_type_ << " file '"
+ << input_file_name_
+ << "': " << strerror(errno) << std::endl;
+ return false;
+ }
+ return true;
+}
+
+bool VCDiffFileBasedCoder::WriteOutput(const string& output) {
+ if (!output.empty()) {
+ // Some new output has been generated and is ready to be written
+ // to the output file or to stdout.
+ fwrite(output.data(), 1, output.size(), output_file_);
+ if (ferror(output_file_)) {
+ std::cerr << "Error writing " << output.size() << " bytes to "
+ << output_file_type_ << " file '" << output_file_name_
+ << "': " << strerror(errno) << std::endl;
+ return false;
+ }
+ }
+ return true;
+}
+
+bool VCDiffFileBasedCoder::CompareOutput(const string& output) {
+ if (!output.empty()) {
+ size_t output_size = output.size();
+ // Some new output has been generated and is ready to be compared against
+ // the output file.
+ if (output_size > compare_buffer_.size()) {
+ compare_buffer_.resize(output_size);
+ }
+ size_t bytes_read = fread(&compare_buffer_[0],
+ 1,
+ output_size,
+ output_file_);
+ if (ferror(output_file_)) {
+ std::cerr << "Error reading from " << output_file_type_ << " file '"
+ << output_file_name_ << "': " << strerror(errno) << std::endl;
+ return false;
+ }
+ if (bytes_read < output_size) {
+ std::cerr << "Decoded target is longer than original target file"
+ << std::endl;
+ return false;
+ }
+ if (output.compare(0, output_size, &compare_buffer_[0], bytes_read) != 0) {
+ std::cerr << "Original target file does not match decoded target"
+ << std::endl;
+ return false;
+ }
+ }
+ return true;
+}
+
+bool VCDiffFileBasedCoder::Encode() {
+ input_file_type_ = "target";
+ input_file_name_ = FLAGS_target;
+ output_file_type_ = "delta";
+ output_file_name_ = FLAGS_delta;
+ if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFile()) {
+ return false;
+ }
+ // Issue 6: Visual Studio STL produces a runtime exception
+ // if &dictionary_[0] is attempted for an empty dictionary.
+ if (dictionary_.empty()) {
+ hashed_dictionary_.reset(new open_vcdiff::HashedDictionary("", 0));
+ } else {
+ hashed_dictionary_.reset(
+ new open_vcdiff::HashedDictionary(&dictionary_[0],
+ dictionary_.size()));
+ }
+ if (!hashed_dictionary_->Init()) {
+ std::cerr << "Error initializing hashed dictionary" << std::endl;
+ return false;
+ }
+ VCDiffFormatExtensionFlags format_flags = open_vcdiff::VCD_STANDARD_FORMAT;
+ if (FLAGS_interleaved) {
+ format_flags |= open_vcdiff::VCD_FORMAT_INTERLEAVED;
+ }
+ if (FLAGS_checksum) {
+ format_flags |= open_vcdiff::VCD_FORMAT_CHECKSUM;
+ }
+ if (FLAGS_json) {
+ format_flags |= open_vcdiff::VCD_FORMAT_JSON;
+ }
+ open_vcdiff::VCDiffStreamingEncoder encoder(hashed_dictionary_.get(),
+ format_flags,
+ FLAGS_target_matches);
+ string output;
+ size_t input_size = 0;
+ size_t output_size = 0;
+ {
+ if (!encoder.StartEncoding(&output)) {
+ std::cerr << "Error during encoder initialization" << std::endl;
+ return false;
+ }
+ }
+ do {
+ size_t bytes_read = 0;
+ if (!WriteOutput(output) || !ReadInput(&bytes_read)) {
+ return false;
+ }
+ output_size += output.size();
+ output.clear();
+ if (bytes_read > 0) {
+ input_size += bytes_read;
+ if (!encoder.EncodeChunk(&input_buffer_[0], bytes_read, &output)) {
+ std::cerr << "Error trying to encode data chunk of length "
+ << bytes_read << std::endl;
+ return false;
+ }
+ }
+ } while (!feof(input_file_));
+ encoder.FinishEncoding(&output);
+ if (!WriteOutput(output)) {
+ return false;
+ }
+ output_size += output.size();
+ output.clear();
+ if (FLAGS_stats && (input_size > 0)) {
+ std::cerr << "Original size: " << input_size
+ << "\tCompressed size: " << output_size << " ("
+ << ((static_cast<double>(output_size) / input_size) * 100)
+ << "% of original)" << std::endl;
+ }
+ return true;
+}
+
+bool VCDiffFileBasedCoder::Decode() {
+ input_file_type_ = "delta";
+ input_file_name_ = FLAGS_delta;
+ output_file_type_ = "target";
+ output_file_name_ = FLAGS_target;
+ if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFile()) {
+ return false;
+ }
+
+ open_vcdiff::VCDiffStreamingDecoder decoder;
+ decoder.SetMaximumTargetFileSize(
+ static_cast<size_t>(FLAGS_max_target_file_size));
+ decoder.SetMaximumTargetWindowSize(
+ static_cast<size_t>(FLAGS_max_target_window_size));
+ decoder.SetAllowVcdTarget(FLAGS_allow_vcd_target);
+ string output;
+ size_t input_size = 0;
+ size_t output_size = 0;
+ // Issue 6: Visual Studio STL produces a runtime exception
+ // if &dictionary_[0] is attempted for an empty dictionary.
+ if (dictionary_.empty()) {
+ decoder.StartDecoding("", 0);
+ } else {
+ decoder.StartDecoding(&dictionary_[0], dictionary_.size());
+ }
+
+ do {
+ size_t bytes_read = 0;
+ if (!ReadInput(&bytes_read)) {
+ return false;
+ }
+ if (bytes_read > 0) {
+ input_size += bytes_read;
+ if (!decoder.DecodeChunk(&input_buffer_[0], bytes_read, &output)) {
+ std::cerr << "Error trying to decode data chunk of length "
+ << bytes_read << std::endl;
+ return false;
+ }
+ }
+ if (!WriteOutput(output)) {
+ return false;
+ }
+ output_size += output.size();
+ output.clear();
+ } while (!feof(input_file_));
+ if (!decoder.FinishDecoding()) {
+ std::cerr << "Decode error; '" << FLAGS_delta
+ << " may not be a valid VCDIFF delta file" << std::endl;
+ return false;
+ }
+ if (!WriteOutput(output)) {
+ return false;
+ }
+ output_size += output.size();
+ output.clear();
+ if (FLAGS_stats && (output_size > 0)) {
+ std::cerr << "Decompressed size: " << output_size
+ << "\tCompressed size: " << input_size << " ("
+ << ((static_cast<double>(input_size) / output_size) * 100)
+ << "% of original)" << std::endl;
+ }
+ return true;
+}
+
+bool VCDiffFileBasedCoder::DecodeAndCompare() {
+ input_file_type_ = "delta";
+ input_file_name_ = FLAGS_delta;
+ output_file_type_ = "target";
+ output_file_name_ = FLAGS_target;
+ if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFileForCompare()) {
+ return false;
+ }
+
+ open_vcdiff::VCDiffStreamingDecoder decoder;
+ decoder.SetMaximumTargetFileSize(
+ static_cast<size_t>(FLAGS_max_target_file_size));
+ decoder.SetMaximumTargetWindowSize(
+ static_cast<size_t>(FLAGS_max_target_window_size));
+ decoder.SetAllowVcdTarget(FLAGS_allow_vcd_target);
+ string output;
+ size_t input_size = 0;
+ size_t output_size = 0;
+ // Issue 6: Visual Studio STL produces a runtime exception
+ // if &dictionary_[0] is attempted for an empty dictionary.
+ if (dictionary_.empty()) {
+ decoder.StartDecoding("", 0);
+ } else {
+ decoder.StartDecoding(&dictionary_[0], dictionary_.size());
+ }
+
+ do {
+ size_t bytes_read = 0;
+ if (!ReadInput(&bytes_read)) {
+ return false;
+ }
+ if (bytes_read > 0) {
+ input_size += bytes_read;
+ if (!decoder.DecodeChunk(&input_buffer_[0], bytes_read, &output)) {
+ std::cerr << "Error trying to decode data chunk of length "
+ << bytes_read << std::endl;
+ return false;
+ }
+ }
+ if (!CompareOutput(output)) {
+ return false;
+ }
+ output_size += output.size();
+ output.clear();
+ } while (!feof(input_file_));
+ if (!decoder.FinishDecoding()) {
+ std::cerr << "Decode error; '" << FLAGS_delta
+ << " may not be a valid VCDIFF delta file" << std::endl;
+ return false;
+ }
+ if (!CompareOutput(output)) {
+ return false;
+ }
+ output_size += output.size();
+ output.clear();
+ if (fgetc(output_file_) != EOF) {
+ std::cerr << "Decoded target is shorter than original target file"
+ << std::endl;
+ return false;
+ }
+ if (ferror(output_file_)) {
+ std::cerr << "Error reading end-of-file indicator from target file"
+ << std::endl;
+ return false;
+ }
+ if (FLAGS_stats && (output_size > 0)) {
+ std::cerr << "Decompressed size: " << output_size
+ << "\tCompressed size: " << input_size << " ("
+ << ((static_cast<double>(input_size) / output_size) * 100)
+ << "% of original)" << std::endl;
+ }
+ return true;
+}
+
+} // namespace open_vcdiff
+
+int main(int argc, char** argv) {
+ const char* const command_name = argv[0];
+ google::SetUsageMessage(kUsageString);
+ google::ParseCommandLineFlags(&argc, &argv, true);
+ if (argc != 2) {
+ std::cerr << command_name << ": Must specify exactly one command option"
+ << std::endl;
+ ShowUsageWithFlagsRestrict(command_name, "vcdiff");
+ return 1;
+ }
+ const char* const command_option = argv[1];
+ if (FLAGS_dictionary.empty()) {
+ std::cerr << command_name << " " << command_option
+ << ": Must specify --dictionary <file-name>" << std::endl;
+ ShowUsageWithFlagsRestrict(command_name, "vcdiff");
+ return 1;
+ }
+ if (!GetCommandLineFlagInfoOrDie("buffersize").is_default &&
+ (FLAGS_buffersize == 0)) {
+ std::cerr << command_name << ": Option --buffersize cannot be 0"
+ << std::endl;
+ ShowUsageWithFlagsRestrict(command_name, "vcdiff");
+ return 1;
+ }
+ if ((strcmp(command_option, "encode") == 0) ||
+ (strcmp(command_option, "delta") == 0)) {
+ open_vcdiff::VCDiffFileBasedCoder coder;
+ if (!coder.Encode()) {
+ return 1;
+ }
+ // The destructor for VCDiffFileBasedCoder will clean up the open files
+ // and allocated memory.
+ } else if ((strcmp(command_option, "decode") == 0) ||
+ (strcmp(command_option, "patch") == 0)) {
+ open_vcdiff::VCDiffFileBasedCoder coder;
+ if (!coder.Decode()) {
+ return 1;
+ }
+ } else if ((strcmp(command_option, "test") == 0)) {
+ // "vcdiff test" does not appear in the usage string, but can be
+ // used for debugging. It encodes, then decodes, then compares the result
+ // with the original target. It expects the same arguments as
+ // "vcdiff encode", with the additional requirement that the --target
+ // and --delta file arguments must be specified, rather than using stdin
+ // or stdout. It produces a delta file just as for "vcdiff encode".
+ if (FLAGS_target.empty() || FLAGS_delta.empty()) {
+ std::cerr << command_name
+ << " test: Must specify both --target <file-name>"
+ " and --delta <file-name>" << std::endl;
+ return 1;
+ }
+ const string original_target(FLAGS_target);
+ // Put coder into a separate scope.
+ {
+ open_vcdiff::VCDiffFileBasedCoder coder;
+ if (!coder.Encode()) {
+ return 1;
+ }
+ }
+ {
+ open_vcdiff::VCDiffFileBasedCoder coder;
+ if (!coder.DecodeAndCompare()) {
+ return 1;
+ }
+ }
+ } else {
+ std::cerr << command_name << ": Unrecognized command option "
+ << command_option << std::endl;
+ ShowUsageWithFlagsRestrict(command_name, "vcdiff");
+ return 1;
+ }
+ return 0;
+}