diff options
author | openvcdiff <openvcdiff@132ac840-3546-0410-a738-d3f8764196be> | 2008-08-26 19:29:25 +0000 |
---|---|---|
committer | openvcdiff <openvcdiff@132ac840-3546-0410-a738-d3f8764196be> | 2008-08-26 19:29:25 +0000 |
commit | 311c71486f5f6074e5ba62a7f4c5397c8700b868 (patch) | |
tree | 3851b12e95a0f6a5a30deb52ae13ae7453f606bc /src | |
parent | a2f33801808f7704582f62e098c0aff24a22def5 (diff) | |
download | open-vcdiff-311c71486f5f6074e5ba62a7f4c5397c8700b868.tar.gz |
Mon, 16 Jun 2008 15:15:51 -0700 Google Inc. <opensource@google.com>
* open-vcdiff: initial release:
The open-vcdiff package provides an encoder and decoder for the VCDIFF format
described in RFC 3284 (http://www.ietf.org/rfc/rfc3284.txt).
git-svn-id: http://open-vcdiff.googlecode.com/svn/trunk@7 132ac840-3546-0410-a738-d3f8764196be
Diffstat (limited to 'src')
75 files changed, 34358 insertions, 0 deletions
diff --git a/src/addrcache.cc b/src/addrcache.cc new file mode 100644 index 0000000..2948266 --- /dev/null +++ b/src/addrcache.cc @@ -0,0 +1,331 @@ +// Copyright 2007 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Implementation of the Address Cache and Address Encoding +// algorithms described in sections 5.1 - 5.4 of RFC 3284 - +// The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html +// +// Assumptions: +// * The VCDAddress type is large enough to hold any offset within +// the source and target windows. The limit (for int32_t) is 2^31-1 bytes. +// The source (dictionary) should not approach this size limit; +// to compress a target file that is larger than +// INT_MAX - (dictionary size) bytes, the encoder must +// break it up into multiple target windows. + +#include <config.h> +#include "addrcache.h" +#include "logging.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" // RESULT_ERROR + +namespace open_vcdiff { + +// The constructor does not initialize near_addresses_ and same_addresses_. +// Therefore, Init() must be called before any other method can be used. +// +// Arguments: +// near_cache_size: Size of the NEAR cache (number of 4-byte integers) +// same_cache_size: Size of the SAME cache (number of blocks of +// 256 4-byte integers per block) +// Because the mode is expressed as a byte value, +// near_cache_size + same_cache_size should not exceed 254. +// +VCDiffAddressCache::VCDiffAddressCache(int near_cache_size, + int same_cache_size) + : near_cache_size_(near_cache_size), + same_cache_size_(same_cache_size), + next_slot_(0) { } + +VCDiffAddressCache::VCDiffAddressCache() + : near_cache_size_(kDefaultNearCacheSize), + same_cache_size_(kDefaultSameCacheSize), + next_slot_(0) { } + +// Sets up data structures needed to call other methods. Operations that may +// fail at runtime (for example, validating the provided near_cache_size_ and +// same_cache_size_ parameters against their maximum allowed values) are +// confined to this routine in order to guarantee that the class constructor +// will never fail. Other methods (except the destructor) cannot be invoked +// until this method has been called successfully. After the object has been +// initialized and used, Init() can be called again to reset it to its initial +// state. +// +// Return value: "true" if initialization succeeded, "false" if it failed. +// No other method except the destructor may be invoked if this function +// returns false. The caller is responsible for checking the return value +// and providing an exit path in case of error. +// +bool VCDiffAddressCache::Init() { + // The mode is expressed as a byte value, so there is only room for 256 modes, + // including the two non-cached modes (SELF and HERE). Do not allow a larger + // number of modes to be defined. We do a separate sanity check for + // near_cache_size_ and same_cache_size_ because adding them together can + // cause an integer overflow if each is set to, say, INT_MAX. + if ((near_cache_size_ > (VCD_MAX_MODES - 2)) || (near_cache_size_ < 0)) { + LOG(ERROR) << "Near cache size " << near_cache_size_ << " is invalid" + << LOG_ENDL; + return false; + } + if ((same_cache_size_ > (VCD_MAX_MODES - 2)) || (same_cache_size_ < 0)) { + LOG(ERROR) << "Same cache size " << same_cache_size_ << " is invalid" + << LOG_ENDL; + return false; + } + if ((near_cache_size_ + same_cache_size_) > VCD_MAX_MODES - 2) { + LOG(ERROR) << "Using near cache size " << near_cache_size_ + << " and same cache size " << same_cache_size_ + << " would exceed maximum number of COPY modes (" + << VCD_MAX_MODES << ")" << LOG_ENDL; + return false; + } + if (near_cache_size_ > 0) { + near_addresses_.assign(near_cache_size_, 0); + } + if (same_cache_size_ > 0) { + same_addresses_.assign(same_cache_size_ * 256, 0); + } + next_slot_ = 0; // in case Init() is called a second time to reinit + return true; +} + +// This method will be called whenever an address is calculated for an +// encoded or decoded COPY instruction, and will update the contents +// of the SAME and NEAR caches. It is vital that the use of +// UpdateCache (called cache_update in the RFC examples) exactly match +// the RFC standard, and that the same caching logic be used in the +// decoder as in the encoder, in order for the decoded addresses to +// match. +// +// Argument: +// address: This must be a valid address between 0 and +// (source window size + target window size). It is assumed that +// these bounds have been checked before calling UpdateCache. +// +void VCDiffAddressCache::UpdateCache(VCDAddress address) { + if (near_cache_size_ > 0) { + near_addresses_[next_slot_] = address; + next_slot_ = (next_slot_ + 1) % near_cache_size_; + } + if (same_cache_size_ > 0) { + same_addresses_[address % (same_cache_size_ * 256)] = address; + } +} + +// Determines the address mode that yields the most compact encoding +// of the given address value, writes the encoded address into the +// address stream, and returns the mode used. The most compact encoding +// is found by looking for the numerically lowest encoded address. +// The Init() function must already have been called. +// +// Arguments: +// address: The address to be encoded. Must be a non-negative integer +// between 0 and (here_address - 1). +// here_address: The current location in the target data (i.e., the +// position just after the last encoded value.) Must be non-negative. +// encoded_addr: Points to an VCDAddress that will be replaced +// with the encoded representation of address. +// If WriteAddressAsVarintForMode returns true when passed +// the return value, then encoded_addr should be written +// into the delta file as a variable-length integer (Varint); +// otherwise, it should be written as a byte (unsigned char). +// +// Return value: A mode value between 0 and 255. The mode will tell +// how to interpret the next value in the address stream. +// The values 0 and 1 correspond to SELF and HERE addressing. +// +// The function is guaranteed to succeed unless the conditions on the arguments +// have not been met, in which case a LOG(DFATAL) message will be produced, +// 0 will be returned, and *encoded_addr will be replaced with 0. +// +unsigned char VCDiffAddressCache::EncodeAddress(VCDAddress address, + VCDAddress here_address, + VCDAddress* encoded_addr) { + if (address < 0) { + LOG(DFATAL) << "EncodeAddress was passed a negative address: " + << address << LOG_ENDL; + *encoded_addr = 0; + return 0; + } + if (address >= here_address) { + LOG(DFATAL) << "EncodeAddress was called with address (" << address + << ") < here_address (" << here_address << ")" << LOG_ENDL; + *encoded_addr = 0; + return 0; + } + // Try using the SAME cache. This method, if available, always + // results in the smallest encoding and takes priority over other modes. + if (same_cache_size() > 0) { + const VCDAddress same_cache_pos = + address % (same_cache_size() * 256); + if (SameAddress(same_cache_pos) == address) { + // This is the only mode for which an single byte will be written + // to the address stream instead of a variable-length integer. + UpdateCache(address); + *encoded_addr = same_cache_pos % 256; + return FirstSameMode() + (same_cache_pos / 256); // SAME mode + } + } + + // Try SELF mode + unsigned char best_mode = VCD_SELF_MODE; + VCDAddress best_encoded_address = address; + + // Try HERE mode + { + const VCDAddress here_encoded_address = here_address - address; + if (here_encoded_address < best_encoded_address) { + best_mode = VCD_HERE_MODE; + best_encoded_address = here_encoded_address; + } + } + + // Try using the NEAR cache + for (int i = 0; i < near_cache_size(); ++i) { + const VCDAddress near_encoded_address = address - NearAddress(i); + if ((near_encoded_address >= 0) && + (near_encoded_address < best_encoded_address)) { + best_mode = FirstNearMode() + i; + best_encoded_address = near_encoded_address; + } + } + + UpdateCache(address); + *encoded_addr = best_encoded_address; + return best_mode; +} + +// Increments *byte_pointer and returns the byte it pointed to before the +// increment. The caller must check bounds to ensure that *byte_pointer +// points to a valid address in memory. +static unsigned char ParseByte(const char** byte_pointer) { + unsigned char byte_value = static_cast<unsigned char>(**byte_pointer); + ++(*byte_pointer); + return byte_value; +} + +// Checks the given decoded address for validity. Returns true if the +// address is valid; otherwise, prints an error message to the log and +// returns false. +static bool IsDecodedAddressValid(VCDAddress decoded_address, + VCDAddress here_address) { + if (decoded_address < 0) { + LOG(ERROR) << "Decoded address " << decoded_address << " is invalid" + << LOG_ENDL; + return false; + } else if (decoded_address >= here_address) { + LOG(ERROR) << "Decoded address (" << decoded_address + << ") is beyond location in target file (" << here_address + << ")" << LOG_ENDL; + return false; + } + return true; +} + +// Interprets the next value in the address_stream using the provided mode, +// which may need to access the SAME or NEAR address cache. Returns the +// decoded address. +// The Init() function must already have been called. +// +// Arguments: +// here_address: The current location in the source + target data (i.e., the +// location into which the COPY instruction will copy.) By definition, +// all addresses between 0 and (here_address - 1) are valid, and +// any other address is invalid. +// mode: A byte value between 0 and (near_cache_size_ + same_cache_size_ + 1) +// which tells how to interpret the next value in the address stream. +// The values 0 and 1 correspond to SELF and HERE addressing. +// The validity of "mode" should already have been checked before +// calling this function. +// address_stream: Points to a pointer holding the position +// in the "Addresses section for COPYs" part of the input data. +// That section must already have been uncompressed +// using a secondary decompressor (if necessary.) +// This is an IN/OUT argument; the value of *address_stream will be +// incremented by the size of an integer, or (if the SAME cache +// was used) by the size of a byte (1). +// address_stream_end: Points to the position just after the end of +// the address stream buffer. All addresses between *address_stream +// and address_stream_end should contain valid address data. +// +// Return value: If the input conditions were met, and the address section +// of the input data contains properly encoded addresses that match +// the instructions section, then an integer between 0 and here_address - 1 +// will be returned, representing the address from which data should +// be copied from the source or target window into the output stream. +// If an invalid address value is found in address_stream, then +// RESULT_ERROR will be returned. If the limit address_stream_end +// is reached before the address can be decoded, then +// RESULT_END_OF_DATA will be returned. If more streamed data +// is expected, this means that the consumer should block and wait +// for more data before continuing to decode. If no more data is expected, +// this return value signals an error condition. +// +VCDAddress VCDiffAddressCache::DecodeAddress(VCDAddress here_address, + unsigned char mode, + const char** address_stream, + const char* address_stream_end) { + if (here_address < 0) { + LOG(DFATAL) << "DecodeAddress was passed a negative value" + " for here_address: " << here_address << LOG_ENDL; + return RESULT_ERROR; + } + const char* new_address_pos = *address_stream; + if (new_address_pos >= address_stream_end) { + return RESULT_END_OF_DATA; + } + VCDAddress decoded_address; + if (IsSameMode(mode)) { + // SAME mode expects a byte value as the encoded address + unsigned char encoded_address = ParseByte(&new_address_pos); + decoded_address = DecodeSameAddress(mode, encoded_address); + } else { + // All modes except SAME mode expect a VarintBE as the encoded address + int32_t encoded_address = VarintBE<int32_t>::Parse(address_stream_end, + &new_address_pos); + switch (encoded_address) { + case RESULT_ERROR: + LOG(ERROR) << "Found invalid variable-length integer " + "as encoded address value" << LOG_ENDL; + return RESULT_ERROR; + case RESULT_END_OF_DATA: + return RESULT_END_OF_DATA; + default: + break; + } + if (IsSelfMode(mode)) { + decoded_address = DecodeSelfAddress(encoded_address); + } else if (IsHereMode(mode)) { + decoded_address = DecodeHereAddress(encoded_address, here_address); + } else if (IsNearMode(mode)) { + decoded_address = DecodeNearAddress(mode, encoded_address); + } else { + LOG(DFATAL) << "Invalid mode value (" << static_cast<int>(mode) + << ") passed to DecodeAddress; maximum mode value = " + << static_cast<int>(LastMode()) << LOG_ENDL; + return RESULT_ERROR; + } + } + // Check for an out-of-bounds address (corrupt/malicious data) + if (!IsDecodedAddressValid(decoded_address, here_address)) { + return RESULT_ERROR; + } + *address_stream = new_address_pos; + UpdateCache(decoded_address); + return decoded_address; +} + +} // namespace open_vcdiff diff --git a/src/addrcache.h b/src/addrcache.h new file mode 100644 index 0000000..f2862e8 --- /dev/null +++ b/src/addrcache.h @@ -0,0 +1,218 @@ +// Copyright 2007 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Classes to implement the Address Cache and Address Encoding +// algorithms described in sections 5.1 - 5.4 of RFC 3284 - +// The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html + +#ifndef OPEN_VCDIFF_ADDRCACHE_H_ +#define OPEN_VCDIFF_ADDRCACHE_H_ + +#include <config.h> +#include <cstring> // memset +#include <vector> +#include "vcdiff_defs.h" // VCDAddress + +namespace open_vcdiff { + +// Implements the "same" and "near" caches +// as described in RFC 3284, section 5. The "near" cache allows +// efficient reuse of one of the last four referenced addresses +// plus a small offset, and the "same" cache allows efficient reuse +// of an exact recent address distinguished by its lowest-order bits. +// +// NOT threadsafe. +// +class VCDiffAddressCache { + public: + // The default cache sizes specified in the RFC + static const int kDefaultNearCacheSize = 4; + static const int kDefaultSameCacheSize = 3; + + VCDiffAddressCache(int near_cache_size, int same_cache_size); + + // This version of the constructor uses the default values + // kDefaultNearCacheSize and kDefaultSameCacheSize. + VCDiffAddressCache(); + + // Initializes the object before use. This method must be called after + // constructing a VCDiffAddressCache/ object, before any other method may be + // called. This is because Init() validates near_cache_size_ and + // same_cache_size_ before initializing the same and near caches. After the + // object has been initialized and used, Init() can be called again to reset + // it to its initial state. + // + bool Init(); + + int near_cache_size() const { return near_cache_size_; } + + int same_cache_size() const { return same_cache_size_; } + + // Returns the first mode number that represents one of the NEAR modes. + // The number of NEAR modes is near_cache_size. Each NEAR mode refers to + // an element of the near_addresses_ array, where a recently-referenced + // address is stored. + // + static const unsigned char FirstNearMode() { + return VCD_FIRST_NEAR_MODE; + } + + // Returns the first mode number that represents one of the SAME modes. + // The number of SAME modes is same_cache_size. Each SAME mode refers to + // a block of 256 elements of the same_addresses_ array; the lowest-order + // 8 bits of the address are used to find the element of this block that + // may match the desired address value. + // + const unsigned char FirstSameMode() const { + return VCD_FIRST_NEAR_MODE + near_cache_size(); + } + + // Returns the maximum valid mode number, which happens to be + // the last SAME mode. + // + const unsigned char LastMode() const { + return FirstSameMode() + same_cache_size() - 1; + } + + static const unsigned char DefaultLastMode() { + return VCD_FIRST_NEAR_MODE + + kDefaultNearCacheSize + kDefaultSameCacheSize - 1; + } + + // See the definition of enum VCDiffModes in vcdiff_defs.h, + // as well as section 5.3 of the RFC, for a description of + // each address mode type (SELF, HERE, NEAR, and SAME). + static bool IsSelfMode(unsigned char mode) { + return mode == VCD_SELF_MODE; + } + + static bool IsHereMode(unsigned char mode) { + return mode == VCD_HERE_MODE; + } + + bool IsNearMode(unsigned char mode) const { + return (mode >= FirstNearMode()) && (mode < FirstSameMode()); + } + + bool IsSameMode(unsigned char mode) const { + return (mode >= FirstSameMode()) && (mode <= LastMode()); + } + + static VCDAddress DecodeSelfAddress(int32_t encoded_address) { + return encoded_address; + } + + static VCDAddress DecodeHereAddress(int32_t encoded_address, + VCDAddress here_address) { + return here_address - encoded_address; + } + + VCDAddress DecodeNearAddress(unsigned char mode, + int32_t encoded_address) const { + return NearAddress(mode - FirstNearMode()) + encoded_address; + } + + VCDAddress DecodeSameAddress(unsigned char mode, + unsigned char encoded_address) const { + return SameAddress(((mode - FirstSameMode()) * 256) + encoded_address); + } + + // Returns true if, when using the given mode, an encoded address + // should be written to the delta file as a variable-length integer; + // returns false if the encoded address should be written + // as a byte value (unsigned char). + bool WriteAddressAsVarintForMode(unsigned char mode) const { + return !IsSameMode(mode); + } + + // An accessor for an element of the near_addresses_ array. + // No bounds checking is performed; the caller must ensure that + // Init() has already been called, and that + // 0 <= pos < near_cache_size_ + // + VCDAddress NearAddress(int pos) const { + return near_addresses_[pos]; + } + + // An accessor for an element of the same_addresses_ array. + // No bounds checking is performed; the caller must ensure that + // Init() has already been called, and that + // 0 <= pos < (same_cache_size_ * 256) + // + VCDAddress SameAddress(int pos) const { + return same_addresses_[pos]; + } + + // This method will be called whenever an address is calculated for an + // encoded or decoded COPY instruction, and will update the contents + // of the SAME and NEAR caches. + // + void UpdateCache(VCDAddress address); + + // Determines the address mode that yields the most compact encoding + // of the given address value. The most compact encoding + // is found by looking for the numerically lowest encoded address. + // Sets *encoded_addr to the encoded representation of the address + // and returns the mode used. + // + // The caller should pass the return value to the method + // WriteAddressAsVarintForMode() to determine whether encoded_addr + // should be written to the delta file as a variable-length integer + // or as a byte (unsigned char). + // + unsigned char EncodeAddress(VCDAddress address, + VCDAddress here_address, + VCDAddress* encoded_addr); + + // Interprets the next value in the address_stream using the provided mode, + // which may need to access the SAME or NEAR address cache. Returns the + // decoded address, or one of the following values: + // RESULT_ERROR: An invalid address value was found in address_stream. + // RESULT_END_OF_DATA: The limit address_stream_end was reached before + // the address could be decoded. If more streamed data is expected, + // this means that the consumer should block and wait for more data + // before continuing to decode. If no more data is expected, this + // return value signals an error condition. + // + // If successful, *address_stream will be incremented past the decoded address + // position. If RESULT_ERROR or RESULT_END_OF_DATA is returned, + // then the value of *address_stream will not have changed. + // + VCDAddress DecodeAddress(VCDAddress here_address, + unsigned char mode, + const char** address_stream, + const char* address_stream_end); + + private: + // The number of addresses to be kept in the NEAR cache. + const int near_cache_size_; + // The number of 256-byte blocks to store in the SAME cache. + const int same_cache_size_; + // The next position in the NEAR cache to which an address will be written. + int next_slot_; + // NEAR cache contents + std::vector<VCDAddress> near_addresses_; + // SAME cache contents + std::vector<VCDAddress> same_addresses_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffAddressCache(const VCDiffAddressCache&); // NOLINT + void operator=(const VCDiffAddressCache&); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_ADDRCACHE_H_ diff --git a/src/addrcache_test.cc b/src/addrcache_test.cc new file mode 100644 index 0000000..2e7632d --- /dev/null +++ b/src/addrcache_test.cc @@ -0,0 +1,670 @@ +// Copyright 2007 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "addrcache.h" +#include <stdint.h> // uint32_t +#include <climits> // INT_MAX, INT_MIN +#include <cstdlib> // rand, srand +#include <string> +#include <vector> +#include "logging.h" +#include "testing.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" // RESULT_ERROR + +namespace open_vcdiff { + +using std::string; + +namespace { + +// Provides an address_stream_ buffer and functions to manually encode +// values into the buffer, and to manually decode and verify test results +// from the buffer. +// +class VCDiffAddressCacheTest : public testing::Test { + public: + VCDiffAddressCacheTest() : decode_position_(NULL), + decode_position_end_(NULL), + verify_encode_position_(NULL), + last_encode_size_(0), + last_decode_position_(NULL) { } + + virtual ~VCDiffAddressCacheTest() { } + + virtual void SetUp() { + EXPECT_TRUE(cache_.Init()); + } + + // Benchmarks for timing encode/decode operations + void BM_Setup(int test_size, bool print_stats); + void BM_CacheEncode(int iterations, int test_size); + void BM_CacheDecode(int iterations, int test_size); + + protected: + virtual void TestBody() { } // to allow instantiation of this class + + void BeginDecode() { + decode_position_ = address_stream_.data(); + EXPECT_TRUE(decode_position_ != NULL); + last_decode_position_ = decode_position_; + decode_position_end_ = decode_position_ + address_stream_.size(); + } + + void ExpectEncodedSizeInBytes(int n) { + EXPECT_EQ(last_encode_size_ + n, address_stream_.size()); + last_encode_size_ = address_stream_.size(); + } + + void ExpectDecodedSizeInBytes(int n) { + EXPECT_EQ(last_decode_position_ + n, decode_position_); + last_decode_position_ = decode_position_; + } + + void ManualEncodeVarint(VCDAddress value) { + VarintBE<VCDAddress>::AppendToString(value, &address_stream_); + } + + void ManualEncodeByte(unsigned char byte) { + address_stream_.push_back(byte); + } + + void ExpectEncodedVarint(VCDAddress expected_value, int expected_size) { + if (!verify_encode_position_) { + verify_encode_position_ = address_stream_.data(); + } + EXPECT_EQ(expected_size, VarintBE<VCDAddress>::Length(expected_value)); + VCDAddress output_val = VarintBE<VCDAddress>::Parse( + address_stream_.data() + address_stream_.size(), + &verify_encode_position_); + EXPECT_EQ(expected_value, output_val); + } + + void ExpectEncodedByte(unsigned char expected_value) { + if (!verify_encode_position_) { + verify_encode_position_ = address_stream_.data(); + } + EXPECT_EQ(expected_value, *verify_encode_position_); + ++verify_encode_position_; + } + + void TestEncode(VCDAddress address, + VCDAddress here_address, + unsigned char mode, + int size) { + VCDAddress encoded_addr = 0; + EXPECT_EQ(mode, cache_.EncodeAddress(address, here_address, &encoded_addr)); + if (cache_.WriteAddressAsVarintForMode(mode)) { + ManualEncodeVarint(encoded_addr); + } else { + EXPECT_GT(256, encoded_addr); + ManualEncodeByte(static_cast<unsigned char>(encoded_addr)); + } + ExpectEncodedSizeInBytes(size); + } + + VCDiffAddressCache cache_; + string address_stream_; + const char* decode_position_; + const char* decode_position_end_; + string large_address_stream_; + std::vector<unsigned char> mode_stream_; + std::vector<VCDAddress> verify_stream_; + + private: + const char* verify_encode_position_; + string::size_type last_encode_size_; + const char* last_decode_position_; +}; + +#ifdef GTEST_HAS_DEATH_TEST +// This synonym is needed for the tests that use ASSERT_DEATH +typedef VCDiffAddressCacheTest VCDiffAddressCacheDeathTest; +#endif // GTEST_HAS_DEATH_TEST + +// Having either or both cache size == 0 is acceptable +TEST_F(VCDiffAddressCacheTest, ZeroCacheSizes) { + VCDiffAddressCache zero_cache(0, 0); + EXPECT_TRUE(zero_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, NegativeCacheSizes) { + VCDiffAddressCache negative_cache(-1, -1); // The constructor must not fail + EXPECT_FALSE(negative_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, OnlySameCacheSizeIsNegative) { + VCDiffAddressCache negative_cache(0, -1); // The constructor must not fail + EXPECT_FALSE(negative_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, ExtremePositiveCacheSizes) { + // The constructor must not fail + VCDiffAddressCache int_max_cache(INT_MAX, INT_MAX); + EXPECT_FALSE(int_max_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, ExtremeNegativeCacheSizes) { + // The constructor must not fail + VCDiffAddressCache int_min_cache(INT_MIN, INT_MIN); + EXPECT_FALSE(int_min_cache.Init()); +} + +// VCD_MAX_MODES is the maximum number of modes, including SAME and HERE modes. +// So neither the SAME cache nor the HERE cache can be larger than +// (VCD_MAX_MODES - 2). +TEST_F(VCDiffAddressCacheTest, NearCacheSizeIsTooBig) { + VCDiffAddressCache negative_cache(VCD_MAX_MODES - 1, 0); + EXPECT_FALSE(negative_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, SameCacheSizeIsTooBig) { + VCDiffAddressCache negative_cache(0, VCD_MAX_MODES - 1); + EXPECT_FALSE(negative_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, CombinedSizesAreTooBig) { + VCDiffAddressCache negative_cache((VCD_MAX_MODES / 2), + (VCD_MAX_MODES / 2) - 1); + EXPECT_FALSE(negative_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, MaxLegalNearCacheSize) { + VCDiffAddressCache negative_cache(VCD_MAX_MODES - 2, 0); + EXPECT_TRUE(negative_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, MaxLegalSameCacheSize) { + VCDiffAddressCache negative_cache(0, VCD_MAX_MODES - 2); + EXPECT_TRUE(negative_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, MaxLegalCombinedSizes) { + VCDiffAddressCache negative_cache((VCD_MAX_MODES / 2) - 1, + (VCD_MAX_MODES / 2) - 1); + EXPECT_TRUE(negative_cache.Init()); +} + +TEST_F(VCDiffAddressCacheTest, DestroyWithoutInitialization) { + VCDiffAddressCache no_init_cache(4, 3); + // Should be destroyed without crashing +} + +TEST_F(VCDiffAddressCacheTest, DestroyDefaultWithoutInitialization) { + VCDiffAddressCache no_init_cache; + // Should be destroyed without crashing +} + +TEST_F(VCDiffAddressCacheTest, CacheContentsInitiallyZero) { + VCDAddress test_address = 0; + // Check that caches are initially set to zero + for (test_address = 0; test_address < 4; ++test_address) { + EXPECT_EQ(0, cache_.NearAddress(test_address)); + } + for (test_address = 0; test_address < 256 * 3; ++test_address) { + EXPECT_EQ(0, cache_.SameAddress(test_address)); + } +} + +// Inserts values 1, 2, ... , 10 into the cache and tests its entire +// contents for consistency. +// +TEST_F(VCDiffAddressCacheTest, InsertFirstTen) { + VCDAddress test_address = 0; + for (test_address = 1; test_address <= 10; ++test_address) { + cache_.UpdateCache(test_address); + } + EXPECT_EQ(9, cache_.NearAddress(0)); // slot 0: 1 => 5 => 9 + EXPECT_EQ(10, cache_.NearAddress(1)); // slot 1: 2 => 6 => 10 + EXPECT_EQ(7, cache_.NearAddress(2)); // slot 2: 3 => 7 + EXPECT_EQ(8, cache_.NearAddress(3)); // slot 3: 4 => 8 + EXPECT_EQ(0, cache_.SameAddress(0)); + for (test_address = 1; test_address <= 10; ++test_address) { + EXPECT_EQ(test_address, cache_.SameAddress(test_address)); + } + for (test_address = 11; test_address < 256 * 3; ++test_address) { + EXPECT_EQ(0, cache_.SameAddress(test_address)); + } +} + +TEST_F(VCDiffAddressCacheTest, InsertIntMax) { + cache_.UpdateCache(INT_MAX); + EXPECT_EQ(INT_MAX, cache_.NearAddress(0)); + EXPECT_EQ(INT_MAX, cache_.SameAddress(INT_MAX % (256 * 3))); + EXPECT_EQ(0, cache_.SameAddress((INT_MAX - 256) % (256 * 3))); + EXPECT_EQ(0, cache_.SameAddress((INT_MAX - 512) % (256 * 3))); +} + +// Exercises all four addressing mode types by encoding five values +// with EncodeAddress. +// Checks to see that the proper mode was selected in each case, +// and that the encoding is correct. +// +TEST_F(VCDiffAddressCacheTest, EncodeAddressModes) { + TestEncode(0x0000FFFF, 0x10000000, VCD_SELF_MODE, 3); + TestEncode(0x10000000, 0x10000010, VCD_HERE_MODE, 1); + TestEncode(0x10000004, 0x10000020, cache_.FirstNearMode() + 0x01, 1); + TestEncode(0x0FFFFFFE, 0x10000030, VCD_HERE_MODE, 1); + TestEncode(0x10000004, 0x10000040, cache_.FirstSameMode() + 0x01, 1); + ExpectEncodedVarint(0xFFFF, 3); // SELF mode: addr 0x0000FFFF + ExpectEncodedVarint(0x10, 1); // HERE mode: here - 0x10 = 0x10000000 + ExpectEncodedVarint(0x04, 1); // NEAR cache #1: + // last addr + 0x4 = 0x10000004 + ExpectEncodedVarint(0x32, 1); // HERE mode: here - 0x32 = 0x0FFFFFFE + ExpectEncodedByte(0x04); // SAME cache #1: 0x10000004 hits +} + +// Exercises all four addressing mode types by manually encoding six values +// and calling DecodeAddress on each one. +// +TEST_F(VCDiffAddressCacheTest, DecodeAddressModes) { + ManualEncodeVarint(0xCAFE); + ManualEncodeVarint(0xCAFE); + ManualEncodeVarint(0x1000); + ManualEncodeByte(0xFE); // SAME mode uses a byte, not a Varint + ManualEncodeVarint(0xFE); + ManualEncodeVarint(0x1000); + BeginDecode(); + EXPECT_EQ(0xCAFE, + cache_.DecodeAddress(0x10000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0xCAFE)); + EXPECT_EQ(0x20000 - 0xCAFE, + cache_.DecodeAddress(0x20000, + VCD_HERE_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0xCAFE)); + EXPECT_EQ(0xDAFE, + cache_.DecodeAddress(0x30000, + cache_.FirstNearMode(), + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0x1000)); + EXPECT_EQ(0xCAFE, + cache_.DecodeAddress(0x40000, + cache_.FirstSameMode() + (0xCA % 3), + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(sizeof(unsigned char)); // a byte, not a Varint + EXPECT_EQ(0xFE, + cache_.DecodeAddress(0x50000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0xFE)); + // NEAR mode #0 has been overwritten by fifth computed addr (wrap around) + EXPECT_EQ(0x10FE, + cache_.DecodeAddress(0x60000, + cache_.FirstNearMode(), + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0x1000)); +} + +// Test with both cache sizes == 0. The encoder should not choose +// a SAME or NEAR mode under these conditions. +TEST_F(VCDiffAddressCacheTest, EncodeAddressZeroCacheSizes) { + VCDAddress encoded_addr = 0; + VCDiffAddressCache zero_cache(0, 0); + EXPECT_TRUE(zero_cache.Init()); + EXPECT_EQ(VCD_SELF_MODE, + zero_cache.EncodeAddress(0x0000FFFF, 0x10000000, &encoded_addr)); + EXPECT_EQ(0xFFFF, encoded_addr); + EXPECT_EQ(VCD_HERE_MODE, + zero_cache.EncodeAddress(0x10000000, 0x10000010, &encoded_addr)); + EXPECT_EQ(0x10, encoded_addr); + EXPECT_EQ(VCD_HERE_MODE, + zero_cache.EncodeAddress(0x10000004, 0x10000020, &encoded_addr)); + EXPECT_EQ(0x1C, encoded_addr); + EXPECT_EQ(VCD_HERE_MODE, + zero_cache.EncodeAddress(0x0FFFFFFE, 0x10000030, &encoded_addr)); + EXPECT_EQ(0x32, encoded_addr); + EXPECT_EQ(VCD_HERE_MODE, + zero_cache.EncodeAddress(0x10000004, 0x10000040, &encoded_addr)); + EXPECT_EQ(0x3C, encoded_addr); +} + +TEST_F(VCDiffAddressCacheTest, DecodeAddressZeroCacheSizes) { + VCDiffAddressCache zero_cache(0, 0); + EXPECT_TRUE(zero_cache.Init()); + ManualEncodeVarint(0xCAFE); + ManualEncodeVarint(0xCAFE); + ManualEncodeVarint(0xDAFE); + BeginDecode(); + EXPECT_EQ(0xCAFE, zero_cache.DecodeAddress(0x10000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0xCAFE)); + EXPECT_EQ(0x20000 - 0xCAFE, zero_cache.DecodeAddress(0x20000, + VCD_HERE_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0xCAFE)); + EXPECT_EQ(0xDAFE, zero_cache.DecodeAddress(0x30000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0xDAFE)); +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(VCDiffAddressCacheDeathTest, EncodeNegativeAddress) { + VCDAddress dummy_encoded_address = 0; + EXPECT_DEBUG_DEATH(cache_.EncodeAddress(-1, -1, &dummy_encoded_address), + "negative"); +} + +TEST_F(VCDiffAddressCacheDeathTest, EncodeAddressPastHereAddress) { + VCDAddress dummy_encoded_address = 0; + EXPECT_DEBUG_DEATH(cache_.EncodeAddress(0x100, 0x100, &dummy_encoded_address), + "address.*<.*here_address"); + EXPECT_DEBUG_DEATH(cache_.EncodeAddress(0x200, 0x100, &dummy_encoded_address), + "address.*<.*here_address"); +} + +TEST_F(VCDiffAddressCacheDeathTest, DecodeInvalidMode) { + ManualEncodeVarint(0xCAFE); + BeginDecode(); + EXPECT_DEBUG_DEATH(EXPECT_EQ(RESULT_ERROR, + cache_.DecodeAddress(0x10000000, + cache_.LastMode() + 1, + &decode_position_, + decode_position_end_)), + "mode"); + EXPECT_DEBUG_DEATH(EXPECT_EQ(RESULT_ERROR, + cache_.DecodeAddress(0x10000000, + 0xFF, + &decode_position_, + decode_position_end_)), + "mode"); + ExpectDecodedSizeInBytes(0); // Should not modify decode_position_ +} + +TEST_F(VCDiffAddressCacheDeathTest, DecodeZeroOrNegativeHereAddress) { + ManualEncodeVarint(0xCAFE); + ManualEncodeVarint(0xCAFE); + BeginDecode(); + // Using a Debug build, the check will fail; using a Release build, + // the check will not occur, and the SELF mode does not depend on + // the value of here_address, so DecodeAddress() will succeed. + EXPECT_DEBUG_DEATH(cache_.DecodeAddress(-1, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_), + "negative"); + // A zero value for here_address should not kill the decoder, + // but instead should return an error value. A delta file may contain + // a window that has no source segment and that (erroneously) + // uses a COPY instruction as its first instruction. This should + // cause an error to be reported, not a debug check failure. + EXPECT_EQ(RESULT_ERROR, cache_.DecodeAddress(0, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST_F(VCDiffAddressCacheTest, DecodeAddressPastHereAddress) { + ManualEncodeVarint(0xCAFE); + BeginDecode(); + EXPECT_EQ(RESULT_ERROR, cache_.DecodeAddress(0x1000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(0); // Should not modify decode_position_ +} + +TEST_F(VCDiffAddressCacheTest, HereModeAddressTooLarge) { + ManualEncodeVarint(0x10001); // here_address + 1 + BeginDecode(); + EXPECT_EQ(RESULT_ERROR, cache_.DecodeAddress(0x10000, + VCD_HERE_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(0); // Should not modify decode_position_ +} + +TEST_F(VCDiffAddressCacheTest, NearModeAddressOverflow) { + ManualEncodeVarint(0xCAFE); + ManualEncodeVarint(0x7FFFFFFF); + BeginDecode(); + EXPECT_EQ(0xCAFE, cache_.DecodeAddress(0x10000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0xCAFE)); + // Now decode a NEAR mode address of base address 0xCAFE + // (the first decoded address) + offset 0x7FFFFFFF. This will cause + // an integer overflow and should signal an error. + EXPECT_EQ(RESULT_ERROR, cache_.DecodeAddress(0x10000000, + cache_.FirstNearMode(), + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(0); // Should not modify decode_position_ +} + +// A Varint should contain at most 9 bytes that have their continuation bit +// (the uppermost, or 7 bit) set. A longer string of bytes that all have +// bit 7 set is not a valid Varint. Try to parse such a string as a Varint +// and confirm that it does not run off the end of the input buffer and +// it returns an error value (RESULT_ERROR). +// +TEST_F(VCDiffAddressCacheTest, DecodeInvalidVarint) { + address_stream_.clear(); + // Write 512 0xFE bytes + address_stream_.append(512, static_cast<char>(0xFE)); + BeginDecode(); + EXPECT_EQ(RESULT_ERROR, cache_.DecodeAddress(0x10000000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(0); // Should not modify decode_position_ +} + +// If only part of a Varint appears in the data to be decoded, +// then DecodeAddress should return RESULT_END_OF_DATA, +// which means that the Varint *may* be valid if there is more +// data expected to be returned. +// +TEST_F(VCDiffAddressCacheTest, DecodePartialVarint) { + address_stream_.clear(); + ManualEncodeByte(0xFE); + ManualEncodeByte(0xFE); + ManualEncodeByte(0xFE); + BeginDecode(); + EXPECT_EQ(RESULT_END_OF_DATA, + cache_.DecodeAddress(0x10000000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(0); // Should not modify decode_position_ + // Now add the missing last byte (supposedly read from a stream of data) + // and verify that the Varint is now valid. + ManualEncodeByte(0x01); // End the Varint with an additional byte + BeginDecode(); // Reset read position to start of data + EXPECT_EQ(0xFDFBF01, + cache_.DecodeAddress(0x10000000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(4); // ManualEncodeByte was called for 4 byte values +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(VCDiffAddressCacheDeathTest, DecodeBadMode) { + ManualEncodeVarint(0xCAFE); + BeginDecode(); + EXPECT_DEBUG_DEATH(EXPECT_EQ(RESULT_ERROR, + cache_.DecodeAddress(0x10000, + cache_.LastMode() + 1, + &decode_position_, + decode_position_end_)), + "maximum"); + ExpectDecodedSizeInBytes(0); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST_F(VCDiffAddressCacheTest, DecodeInvalidHereAddress) { + ManualEncodeVarint(0x10001); // offset larger than here_address + BeginDecode(); + EXPECT_EQ(RESULT_ERROR, cache_.DecodeAddress(0x10000, + VCD_HERE_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(0); +} + +TEST_F(VCDiffAddressCacheTest, DecodeInvalidNearAddress) { + ManualEncodeVarint(0xCAFE); + ManualEncodeVarint(INT_MAX); // offset will cause integer overflow + BeginDecode(); + EXPECT_EQ(0xCAFE, + cache_.DecodeAddress(0x10000, + VCD_SELF_MODE, + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(VarintBE<VCDAddress>::Length(0xCAFE)); + EXPECT_EQ(RESULT_ERROR, cache_.DecodeAddress(0x10000, + cache_.FirstNearMode(), + &decode_position_, + decode_position_end_)); + ExpectDecodedSizeInBytes(0); +} + +void VCDiffAddressCacheTest::BM_Setup(int test_size, bool print_stats) { + mode_stream_.resize(test_size); + verify_stream_.resize(test_size); + VCDAddress here_address = 1; + srand(1); + for (int i = 0; i < test_size; ++i) { + verify_stream_[i] = PortableRandomInRange(here_address - 1); + here_address += 4; + } + BM_CacheEncode(1, test_size); // populate large_address_stream_, mode_stream_ + const size_t encoded_size_ = large_address_stream_.size(); + if (print_stats) { + // Count the percentages of modes used + int self_count = 0, here_count = 0, near_count = 0, same_count = 0; + for (int i = 0; i < test_size; ++i) { + if (mode_stream_[i] == VCD_SELF_MODE) { + ++self_count; + } else if (mode_stream_[i] == VCD_HERE_MODE) { + ++here_count; + } else if (mode_stream_[i] < cache_.FirstSameMode()) { + ++near_count; + } else { + ++same_count; + } + } + const int original_bytes = test_size * sizeof(verify_stream_[0]); + const int mode_bytes = test_size * sizeof(mode_stream_[0]); + double percent_compression = + ((1 - (static_cast<double>(encoded_size_ + mode_bytes) / original_bytes)) + * 100); + LOG(INFO) << "Encoded " << test_size << " addresses (" + << original_bytes << " bytes) into " + << encoded_size_ << " address bytes + " + << mode_bytes << " mode bytes: " + << percent_compression << "% compression" << LOG_ENDL; + LOG(INFO) << "SELF mode: " << self_count << " (" + << ((static_cast<double>(self_count) / test_size) * 100) << "%); " + << "HERE mode: " << here_count << " (" + << ((static_cast<double>(here_count) / test_size) * 100) << "%); " + << "NEAR mode: " << near_count << " (" + << ((static_cast<double>(near_count) / test_size) * 100) << "%); " + << "SAME mode: " << same_count << " (" + << ((static_cast<double>(same_count) / test_size) * 100) << "%)" + << LOG_ENDL; + } +} + +void VCDiffAddressCacheTest::BM_CacheEncode(int iterations, int test_size) { + VCDAddress here_address = 1; + VCDAddress encoded_addr = 0; + for (int test_iteration = 0; test_iteration < iterations; ++test_iteration) { + cache_.Init(); + large_address_stream_.clear(); + here_address = 1; + for (int i = 0; i < test_size; ++i) { + const unsigned char mode = cache_.EncodeAddress(verify_stream_[i], + here_address, + &encoded_addr); + if (cache_.WriteAddressAsVarintForMode(mode)) { + VarintBE<VCDAddress>::AppendToString(encoded_addr, + &large_address_stream_); + } else { + EXPECT_GT(256, encoded_addr); + large_address_stream_.push_back( + static_cast<unsigned char>(encoded_addr)); + } + mode_stream_[i] = mode; + here_address += 4; + } + } +} + +void VCDiffAddressCacheTest::BM_CacheDecode(int iterations, int test_size) { + VCDAddress here_address = 1; + for (int test_iteration = 0; test_iteration < iterations; ++test_iteration) { + cache_.Init(); + const char* large_decode_pointer = large_address_stream_.data(); + const char* const end_of_encoded_data = + large_decode_pointer + large_address_stream_.size(); + here_address = 1; + for (int i = 0; i < test_size; ++i) { + EXPECT_EQ(verify_stream_[i], + cache_.DecodeAddress(here_address, + mode_stream_[i], + &large_decode_pointer, + end_of_encoded_data)); + here_address += 4; + } + EXPECT_EQ(end_of_encoded_data, large_decode_pointer); + } +} + +TEST_F(VCDiffAddressCacheTest, PerformanceTest) { + const int test_size = 20 * 1024; // 20K random encode/decode operations + const int num_iterations = 40; // run test 40 times and take average + BM_Setup(test_size, true); + { + CycleTimer encode_timer; + encode_timer.Start(); + BM_CacheEncode(num_iterations, test_size); + encode_timer.Stop(); + double encode_time_in_ms = + static_cast<double>(encode_timer.GetInUsec()) / 1000; + LOG(INFO) << "Time to encode: " + << (encode_time_in_ms / num_iterations) << " ms" << LOG_ENDL; + } + { + CycleTimer decode_timer; + decode_timer.Start(); + BM_CacheDecode(num_iterations, test_size); + decode_timer.Stop(); + double decode_time_in_ms = + static_cast<double>(decode_timer.GetInUsec()) / 1000; + LOG(INFO) << "Time to decode: " + << (decode_time_in_ms / num_iterations) << " ms" << LOG_ENDL; + } +} + +} // unnamed namespace +} // namespace open_vcdiff diff --git a/src/adler32.c b/src/adler32.c new file mode 100644 index 0000000..b8f37bb --- /dev/null +++ b/src/adler32.c @@ -0,0 +1,189 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +#define BASE 65521UL /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware */ +#ifdef NO_DIVIDE +# define MOD(a) \ + do { \ + if (a >= (BASE << 16)) a -= (BASE << 16); \ + if (a >= (BASE << 15)) a -= (BASE << 15); \ + if (a >= (BASE << 14)) a -= (BASE << 14); \ + if (a >= (BASE << 13)) a -= (BASE << 13); \ + if (a >= (BASE << 12)) a -= (BASE << 12); \ + if (a >= (BASE << 11)) a -= (BASE << 11); \ + if (a >= (BASE << 10)) a -= (BASE << 10); \ + if (a >= (BASE << 9)) a -= (BASE << 9); \ + if (a >= (BASE << 8)) a -= (BASE << 8); \ + if (a >= (BASE << 7)) a -= (BASE << 7); \ + if (a >= (BASE << 6)) a -= (BASE << 6); \ + if (a >= (BASE << 5)) a -= (BASE << 5); \ + if (a >= (BASE << 4)) a -= (BASE << 4); \ + if (a >= (BASE << 3)) a -= (BASE << 3); \ + if (a >= (BASE << 2)) a -= (BASE << 2); \ + if (a >= (BASE << 1)) a -= (BASE << 1); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD4(a) \ + do { \ + if (a >= (BASE << 4)) a -= (BASE << 4); \ + if (a >= (BASE << 3)) a -= (BASE << 3); \ + if (a >= (BASE << 2)) a -= (BASE << 2); \ + if (a >= (BASE << 1)) a -= (BASE << 1); \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD4(a) a %= BASE +#endif + +/* ========================================================================= */ + +/* + The adler32 code below computes, in effect, + + uLong high = 0; + uLong low = 1; + for (j = 0; j < len; j++) { + low = (low + buf[j]) % BASE; + high = (high + low) % BASE; + } + checksum = (high << 16) | low; + + Both 16-bit halves of the checksum are between 0 and BASE-1 (inclusive). + Hence, the minimum possible checksum value is 0, and the maximum is + ((BASE-1) << 16) | (BASE-1). Applications may have reserved values + outside this range to carry special meanings. + + NOTE: If adler32() is changed in ANY way, be absolutely sure that the + change will NOT cause checksums previously stored to not match the data + they were originally intended to match, or expand the range in such a + way that values reserved by applications to carry special meanings now + become checksums of valid data. Also, be sure to change adler32_range() + accordingly. + + This explanation and adler32_range() are not part of original software + distribution. They are added at Google (2006) in accordance with the + copyright notice in zlib.h, which permits alteration and redistribution + of the original software provided, among other things, that altered + source versions must be plainly marked as such and not misrepresented as + being the original software. +*/ + +void ZEXPORT adler32_range(min, max) + uLong *min; + uLong *max; +{ + *min = 0L; + *max = ((BASE-1) << 16) | (BASE-1); +} + +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD4(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* the derivation of this formula is left as an exercise for the reader */ + rem = (unsigned)(len2 % BASE); + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} diff --git a/src/blockhash.cc b/src/blockhash.cc new file mode 100644 index 0000000..7dbec8d --- /dev/null +++ b/src/blockhash.cc @@ -0,0 +1,439 @@ +// Copyright 2006, 2008 Google Inc. +// Authors: Chandra Chereddi, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "blockhash.h" +#include "compile_assert.h" +#include <stdint.h> // uint32_t +#include "logging.h" +#include "rolling_hash.h" + +namespace open_vcdiff { + +typedef unsigned long uword_t; // a machine word NOLINT + +BlockHash::BlockHash(const char* source_data, + size_t source_size, + int starting_offset) + : source_data_(source_data), + source_size_(source_size), + hash_table_mask_(0), + starting_offset_(starting_offset), + last_block_added_(-1) { +} + +BlockHash::~BlockHash() { } + +// kBlockSize must be at least 2 to be meaningful. Since it's a compile-time +// constant, check its value at compile time rather than wasting CPU cycles +// on runtime checks. +COMPILE_ASSERT(BlockHash::kBlockSize >= 2, kBlockSize_must_be_at_least_2); + +// kBlockSize is required to be a power of 2 because multiplication +// (n * kBlockSize), division (n / kBlockSize) and MOD (n % kBlockSize) +// are commonly-used operations. If kBlockSize is a compile-time +// constant and a power of 2, the compiler can convert these three operations +// into bit-shift (>> or <<) and bitwise-AND (&) operations, which are much +// more efficient than executing full integer multiply, divide, or remainder +// instructions. +COMPILE_ASSERT((BlockHash::kBlockSize & (BlockHash::kBlockSize - 1)) == 0, + kBlockSize_must_be_a_power_of_2); + +bool BlockHash::Init(bool populate_hash_table) { + if (!hash_table_.empty() || + !next_block_table_.empty() || + !last_block_table_.empty()) { + LOG(DFATAL) << "Init() called twice for same BlockHash object" << LOG_ENDL; + return false; + } + const size_t table_size = CalcTableSize(source_size_); + if (table_size == 0) { + LOG(DFATAL) << "Error finding table size for source size " << source_size_ + << LOG_ENDL; + return false; + } + // Since table_size is a power of 2, (table_size - 1) is a bit mask + // containing all the bits below table_size. + hash_table_mask_ = static_cast<uint32_t>(table_size - 1); + hash_table_.resize(table_size, -1); + next_block_table_.resize(GetNumberOfBlocks(), -1); + last_block_table_.resize(GetNumberOfBlocks(), -1); + if (populate_hash_table) { + AddAllBlocks(); + } + return true; +} + +const BlockHash* BlockHash::CreateDictionaryHash(const char* dictionary_data, + size_t dictionary_size) { + BlockHash* new_dictionary_hash = new BlockHash(dictionary_data, + dictionary_size, + 0); + if (!new_dictionary_hash->Init(/* populate_hash_table = */ true)) { + delete new_dictionary_hash; + return NULL; + } else { + return new_dictionary_hash; + } +} + +BlockHash* BlockHash::CreateTargetHash(const char* target_data, + size_t target_size, + size_t dictionary_size) { + BlockHash* new_target_hash = new BlockHash(target_data, + target_size, + static_cast<int>(dictionary_size)); + if (!new_target_hash->Init(/* populate_hash_table = */ false)) { + delete new_target_hash; + return NULL; + } else { + return new_target_hash; + } +} + +// Returns zero if an error occurs. +const size_t BlockHash::CalcTableSize(const size_t dictionary_size) { + // Overallocate the hash table by making it the same size (in bytes) + // as the source data. This is a trade-off between space and time: + // the empty entries in the hash table will reduce the + // probability of a hash collision to (sizeof(int) / kblockSize), + // and so save time comparing false matches. + const size_t min_size = (dictionary_size / sizeof(int)) + 1; // NOLINT + size_t table_size = 1; + // Find the smallest power of 2 that is >= min_size, and assign + // that value to table_size. + while (table_size < min_size) { + table_size <<= 1; + // Guard against an infinite loop + if (table_size <= 0) { + LOG(DFATAL) << "Internal error: CalcTableSize(dictionary_size = " + << dictionary_size + << "): resulting table_size " << table_size + << " is zero or negative" << LOG_ENDL; + return 0; + } + } + // Check size sanity + if ((table_size & (table_size - 1)) != 0) { + LOG(DFATAL) << "Internal error: CalcTableSize(dictionary_size = " + << dictionary_size + << "): resulting table_size " << table_size + << " is not a power of 2" << LOG_ENDL; + return 0; + } + // The loop above tries to find the smallest power of 2 that is >= min_size. + // That value must lie somewhere between min_size and (min_size * 2), + // except for the case (dictionary_size == 0, table_size == 1). + if ((dictionary_size > 0) && (table_size > (min_size * 2))) { + LOG(DFATAL) << "Internal error: CalcTableSize(dictionary_size = " + << dictionary_size + << "): resulting table_size " << table_size + << " is too large" << LOG_ENDL; + return 0; + } + return table_size; +} + +// If the hash value is already available from the rolling hash, +// call this function to save time. +void BlockHash::AddBlock(uint32_t hash_value) { + if (hash_table_.empty()) { + LOG(DFATAL) << "BlockHash::AddBlock() called before BlockHash::Init()" + << LOG_ENDL; + return; + } + // The initial value of last_block_added_ is -1. + int block_number = last_block_added_ + 1; + const int total_blocks = + static_cast<int>(source_size_ / kBlockSize); // round down + if (block_number >= total_blocks) { + LOG(DFATAL) << "BlockHash::AddBlock() called" + " with block number " << block_number + << " that is past last block " << (total_blocks - 1) + << LOG_ENDL; + return; + } + if (next_block_table_[block_number] != -1) { + LOG(DFATAL) << "Internal error in BlockHash::AddBlock(): " + "block number = " << block_number + << ", next block should be -1 but is " + << next_block_table_[block_number] << LOG_ENDL; + return; + } + const uint32_t hash_table_index = GetHashTableIndex(hash_value); + const int first_matching_block = hash_table_[hash_table_index]; + if (first_matching_block < 0) { + // This is the first entry with this hash value + hash_table_[hash_table_index] = block_number; + last_block_table_[block_number] = block_number; + } else { + // Add this entry at the end of the chain of matching blocks + const int last_matching_block = last_block_table_[first_matching_block]; + if (next_block_table_[last_matching_block] != -1) { + LOG(DFATAL) << "Internal error in BlockHash::AddBlock(): " + "first matching block = " << first_matching_block + << ", last matching block = " << last_matching_block + << ", next block should be -1 but is " + << next_block_table_[last_matching_block] << LOG_ENDL; + return; + } + next_block_table_[last_matching_block] = block_number; + last_block_table_[first_matching_block] = block_number; + } + last_block_added_ = block_number; +} + +void BlockHash::AddAllBlocks() { + AddAllBlocksThroughIndex(static_cast<int>(source_size_)); +} + +void BlockHash::AddAllBlocksThroughIndex(int end_index) { + if (end_index > static_cast<int>(source_size_)) { + LOG(DFATAL) << "BlockHash::AddAllBlocksThroughIndex() called" + " with index " << end_index + << " higher than end index " << source_size_ << LOG_ENDL; + return; + } + const int last_index_added = last_block_added_ * kBlockSize; + if (end_index <= last_index_added) { + LOG(DFATAL) << "BlockHash::AddAllBlocksThroughIndex() called" + " with index " << end_index + << " <= last index added ( " << last_index_added + << ")" << LOG_ENDL; + return; + } + int end_limit = end_index; + // Don't allow reading any indices at or past source_size_. + // The Hash function extends (kBlockSize - 1) bytes past the index, + // so leave a margin of that size. + int last_legal_hash_index = static_cast<int>(source_size() - kBlockSize); + if (end_limit > last_legal_hash_index) { + end_limit = last_legal_hash_index + 1; + } + const char* block_ptr = source_data() + NextIndexToAdd(); + const char* const end_ptr = source_data() + end_limit; + while (block_ptr < end_ptr) { + AddBlock(RollingHash<kBlockSize>::Hash(block_ptr)); + block_ptr += kBlockSize; + } +} + +COMPILE_ASSERT((BlockHash::kBlockSize % sizeof(uword_t)) == 0, + kBlockSize_must_be_a_multiple_of_machine_word_size); + +// A recursive template to compare a fixed number +// of (possibly unaligned) machine words starting +// at addresses block1 and block2. Returns true or false +// depending on whether an exact match was found. +template<int number_of_words> +inline bool CompareWholeWordValues(const char* block1, + const char* block2) { + return CompareWholeWordValues<1>(block1, block2) && + CompareWholeWordValues<number_of_words - 1>(block1 + sizeof(uword_t), + block2 + sizeof(uword_t)); +} + +// The base of the recursive template: compare one pair of machine words. +template<> +inline bool CompareWholeWordValues<1>(const char* word1, + const char* word2) { + uword_t aligned_word1, aligned_word2; + memcpy(&aligned_word1, word1, sizeof(aligned_word1)); + memcpy(&aligned_word2, word2, sizeof(aligned_word2)); + return aligned_word1 == aligned_word2; +} + +// A block must be composed of an integral number of machine words +// (uword_t values.) This function takes advantage of that fact +// by comparing the blocks as series of (possibly unaligned) word values. +// A word-sized comparison can be performed as a single +// machine instruction. Comparing words instead of bytes means that, +// on a 64-bit platform, this function will use 8 times fewer test-and-branch +// instructions than a byte-by-byte comparison. Even with the extra +// cost of the calls to memcpy, this method is still at least twice as fast +// as memcmp (measured using gcc on a 64-bit platform, with a block size +// of 32.) For blocks with identical contents (a common case), this method +// is over six times faster than memcmp. +inline bool BlockCompareWordsInline(const char* block1, const char* block2) { + static const size_t kWordsPerBlock = BlockHash::kBlockSize / sizeof(uword_t); + return CompareWholeWordValues<kWordsPerBlock>(block1, block2); +} + +bool BlockHash::BlockCompareWords(const char* block1, const char* block2) { + return BlockCompareWordsInline(block1, block2); +} + +inline bool BlockContentsMatchInline(const char* block1, const char* block2) { + // Optimize for mismatch in first byte. Since this function is called only + // when the hash values of the two blocks match, it is very likely that either + // the blocks are identical, or else the first byte does not match. + if (*block1 != *block2) { + return false; + } +#ifdef VCDIFF_USE_BLOCK_COMPARE_WORDS + return BlockCompareWordsInline(block1, block2); +#else // !VCDIFF_USE_BLOCK_COMPARE_WORDS + return memcmp(block1, block2, BlockHash::kBlockSize) == 0; +#endif // VCDIFF_USE_BLOCK_COMPARE_WORDS +} + +bool BlockHash::BlockContentsMatch(const char* block1, const char* block2) { + return BlockContentsMatchInline(block1, block2); +} + +inline int BlockHash::SkipNonMatchingBlocks(int block_number, + const char* block_ptr) const { + int probes = 0; + while ((block_number >= 0) && + !BlockContentsMatchInline(block_ptr, + &source_data_[block_number * kBlockSize])) { + if (++probes > kMaxProbes) { + return -1; // Avoid too much chaining + } + block_number = next_block_table_[block_number]; + } + return block_number; +} + +// Init() must have been called and returned true before using +// FirstMatchingBlock or NextMatchingBlock. No check is performed +// for this condition; the code will crash if this condition is violated. +inline int BlockHash::FirstMatchingBlockInline(uint32_t hash_value, + const char* block_ptr) const { + return SkipNonMatchingBlocks(hash_table_[GetHashTableIndex(hash_value)], + block_ptr); +} + +int BlockHash::FirstMatchingBlock(uint32_t hash_value, + const char* block_ptr) const { + return FirstMatchingBlockInline(hash_value, block_ptr); +} + +int BlockHash::NextMatchingBlock(int block_number, + const char* block_ptr) const { + if (static_cast<size_t>(block_number) >= GetNumberOfBlocks()) { + LOG(DFATAL) << "NextMatchingBlock called for invalid block number " + << block_number << LOG_ENDL; + return -1; + } + return SkipNonMatchingBlocks(next_block_table_[block_number], block_ptr); +} + +// Keep a count of the number of matches found. This will throttle the +// number of iterations in FindBestMatch. For example, if the entire +// dictionary is made up of spaces (' ') and the search string is also +// made up of spaces, there will be one match for each block in the +// dictionary. +inline bool BlockHash::TooManyMatches(int* match_counter) { + ++(*match_counter); + return (*match_counter) > kMaxMatchesToCheck; +} + +// Returns the number of bytes to the left of source_match_start +// that match the corresponding bytes to the left of target_match_start. +// Will not examine more than max_bytes bytes, which is to say that +// the return value will be in the range [0, max_bytes] inclusive. +int BlockHash::MatchingBytesToLeft(const char* source_match_start, + const char* target_match_start, + int max_bytes) { + const char* source_ptr = source_match_start; + const char* target_ptr = target_match_start; + int bytes_found = 0; + while (bytes_found < max_bytes) { + --source_ptr; + --target_ptr; + if (*source_ptr != *target_ptr) { + break; + } + ++bytes_found; + } + return bytes_found; +} + +// Returns the number of bytes starting at source_match_end +// that match the corresponding bytes starting at target_match_end. +// Will not examine more than max_bytes bytes, which is to say that +// the return value will be in the range [0, max_bytes] inclusive. +int BlockHash::MatchingBytesToRight(const char* source_match_end, + const char* target_match_end, + int max_bytes) { + const char* source_ptr = source_match_end; + const char* target_ptr = target_match_end; + int bytes_found = 0; + while ((bytes_found < max_bytes) && (*source_ptr == *target_ptr)) { + ++bytes_found; + ++source_ptr; + ++target_ptr; + } + return bytes_found; +} + +// No NULL checks are performed on the pointer arguments. The caller +// must guarantee that none of the arguments is NULL, or a crash will occur. +// +// The vast majority of calls to FindBestMatch enter the loop *zero* times, +// which is to say that most candidate blocks find no matches in the dictionary. +// The important sections for optimization are therefore the code outside the +// loop and the code within the loop conditions. Keep this to a minimum. +void BlockHash::FindBestMatch(uint32_t hash_value, + const char* target_candidate_start, + const char* target_start, + size_t target_size, + Match* best_match) const { + int match_counter = 0; + for (int block_number = FirstMatchingBlockInline(hash_value, + target_candidate_start); + (block_number >= 0) && !TooManyMatches(&match_counter); + block_number = NextMatchingBlock(block_number, target_candidate_start)) { + int source_match_offset = block_number * kBlockSize; + const int source_match_end = source_match_offset + kBlockSize; + + int target_match_offset = + static_cast<int>(target_candidate_start - target_start); + const int target_match_end = target_match_offset + kBlockSize; + + size_t match_size = kBlockSize; + { + // Extend match start towards beginning of unencoded data + const int limit_bytes_to_left = std::min(source_match_offset, + target_match_offset); + const int matching_bytes_to_left = + MatchingBytesToLeft(source_data_ + source_match_offset, + target_start + target_match_offset, + limit_bytes_to_left); + source_match_offset -= matching_bytes_to_left; + target_match_offset -= matching_bytes_to_left; + match_size += matching_bytes_to_left; + } + { + // Extend match end towards end of unencoded data + const size_t source_bytes_to_right = source_size_ - source_match_end; + const size_t target_bytes_to_right = target_size - target_match_end; + const size_t limit_bytes_to_right = std::min(source_bytes_to_right, + target_bytes_to_right); + match_size += + MatchingBytesToRight(source_data_ + source_match_end, + target_start + target_match_end, + static_cast<int>(limit_bytes_to_right)); + } + // Update in/out parameter if the best match found was better + // than any match already stored in *best_match. + best_match->ReplaceIfBetterMatch(match_size, + source_match_offset + starting_offset_, + target_match_offset); + } +} + +} // namespace open_vcdiff diff --git a/src/blockhash.h b/src/blockhash.h new file mode 100644 index 0000000..6b3b02e --- /dev/null +++ b/src/blockhash.h @@ -0,0 +1,507 @@ +// Copyright 2006 Google Inc. +// Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Implementation of the Bentley/McIlroy algorithm for finding differences. +// Bentley, McIlroy. DCC 1999. Data Compression Using Long Common Strings. +// http://citeseer.ist.psu.edu/555557.html + +#ifndef OPEN_VCDIFF_BLOCKHASH_H_ +#define OPEN_VCDIFF_BLOCKHASH_H_ + +#include <config.h> +#include <stdint.h> // uint32_t +#include <cstddef> // size_t +#include <vector> + +namespace open_vcdiff { + +// A generic hash table which will be used to keep track of byte runs +// of size kBlockSize in both the incrementally processed target data +// and the preprocessed source dictionary. +// +// A custom hash table implementation is used instead of the standard +// hash_map template because we know that there will be exactly one +// entry in the BlockHash corresponding to each kBlockSize bytes +// in the source data, which makes certain optimizations possible: +// * The memory for the hash table and for all hash entries can be allocated +// in one step rather than incrementally for each insert operation. +// * A single integer can be used to represent both +// the index of the next hash entry in the chain +// and the position of the entry within the source data +// (== kBlockSize * block_number). This greatly reduces the size +// of a hash entry. +// +class BlockHash { + public: + // Block size as per Bentley/McIlroy; must be a power of two. + // + // Using (for example) kBlockSize = 4 guarantees that no match smaller + // than size 4 will be identified, that some matches having sizes + // 4, 5, or 6 may be identified, and that all matches + // having size 7 or greater will be identified (because any string of + // 7 bytes must contain a complete aligned block of 4 bytes.) + // + // Increasing kBlockSize by a factor of two will halve the amount of + // memory needed for the next block table, and will halve the setup time + // for a new BlockHash. However, it also doubles the minimum + // match length that is guaranteed to be found in FindBestMatch(), + // so that function will be less effective in finding matches. + // + // Computational effort in FindBestMatch (which is the inner loop of + // the encoding algorithm) will be proportional to the number of + // matches found, and a low value of kBlockSize will waste time + // tracking down small matches. On the other hand, if this value + // is set too high, no matches will be found at all. + // + // It is suggested that different values of kBlockSize be tried against + // a representative data set to find the best tradeoff between + // memory/CPU and the effectiveness of FindBestMatch(). + // + // If you change kBlockSize to a smaller value, please increase + // kMaxMatchesToCheck accordingly. + static const int kBlockSize = 32; + + // This class is used to store the best match found by FindBestMatch() + // and return it to the caller. + class Match { + public: + Match() : size_(0), source_offset_(-1), target_offset_(-1) { } + + void ReplaceIfBetterMatch(size_t candidate_size, + int candidate_source_offset, + int candidate_target_offset) { + if (candidate_size > size_) { + size_ = candidate_size; + source_offset_ = candidate_source_offset; + target_offset_ = candidate_target_offset; + } + } + + size_t size() const { return size_; } + int source_offset() const { return source_offset_; } + int target_offset() const { return target_offset_; } + + private: + // The size of the best (longest) match passed to ReplaceIfBetterMatch(). + size_t size_; + + // The source offset of the match, including the starting_offset_ + // of the BlockHash for which the match was found. + int source_offset_; + + // The target offset of the match. An offset of 0 corresponds to the + // data at target_start, which is an argument of FindBestMatch(). + int target_offset_; + + // Making these private avoids implicit copy constructor + // & assignment operator + Match(const Match&); // NOLINT + void operator=(const Match&); + }; + + // A BlockHash is created using a buffer of source data. The hash table + // will contain one entry for each kBlockSize-byte block in the + // source data. + // + // See the comments for starting_offset_, below, for a description of + // the starting_offset argument. For a hash of source (dictionary) data, + // starting_offset_ will be zero; for a hash of previously encoded + // target data, starting_offset_ will be equal to the dictionary size. + // + BlockHash(const char* source_data, size_t source_size, int starting_offset); + + ~BlockHash(); + + // Initializes the object before use. + // This method must be called after constructing a BlockHash object, + // and before any other method may be called. This is because + // Init() dynamically allocates hash_table_ and next_block_table_. + // Returns true if initialization succeeded, or false if an error occurred, + // in which case no other method except the destructor may then be used + // on the object. + // + // If populate_hash_table is true, then AddAllBlocks() will be called + // to populate the hash table. If populate_hash_table is false, then + // classes that inherit from BlockHash are expected to call AddBlock() + // to incrementally populate individual blocks of data. + // + bool Init(bool populate_hash_table); + + // In the context of the open-vcdiff encoder, BlockHash is used for two + // purposes: to hash the source (dictionary) data, and to hash + // the previously encoded target data. The main differences between + // a dictionary BlockHash and a target BlockHash are as follows: + // + // 1. The best_match->source_offset() returned from FindBestMatch() + // for a target BlockHash is computed in the following manner: + // the starting offset of the first byte in the target data + // is equal to the dictionary size. FindBestMatch() will add + // starting_offset_ to any best_match->source_offset() value it returns, + // in order to produce the correct offset value for a target BlockHash. + // 2. For a dictionary BlockHash, the entire data set is hashed at once + // when Init() is called with the parameter populate_hash_table = true. + // For a target BlockHash, because the previously encoded target data + // includes only the data seen up to the current encoding position, + // the data blocks are hashed incrementally as the encoding position + // advances, using AddOneIndexHash() and AddAllBlocksThroughIndex(). + // + // The following two factory functions can be used to create BlockHash + // objects for each of these two purposes. Each factory function calls + // the object constructor and also calls Init(). If an error occurs, + // NULL is returned; otherwise a valid BlockHash object is returned. + // Since a dictionary BlockHash is not expected to be modified after + // initialization, a const object is returned. + // The caller is responsible for deleting the returned object + // (using the C++ delete operator) once it is no longer needed. + static const BlockHash* CreateDictionaryHash(const char* dictionary_data, + size_t dictionary_size); + static BlockHash* CreateTargetHash(const char* target_data, + size_t target_size, + size_t dictionary_size); + + // This function will be called to add blocks incrementally to the target hash + // as the encoding position advances through the target data. It will be + // called for every kBlockSize-byte block in the target data, regardless + // of whether the block is aligned evenly on a block boundary. The + // BlockHash will only store hash entries for the evenly-aligned blocks. + // + void AddOneIndexHash(int index, uint32_t hash_value) { + if (index == NextIndexToAdd()) { + AddBlock(hash_value); + } + } + + // Calls AddBlock() for each kBlockSize-byte block in the range + // (last_block_added_ * kBlockSize, end_index), exclusive of the endpoints. + // If end_index <= the last index added (last_block_added_ * kBlockSize), + // this function does nothing. + // + // A partial block beginning anywhere up to (end_index - 1) is also added, + // unless it extends outside the end of the source data. Like AddAllBlocks(), + // this function computes the hash value for each of the blocks in question + // from scratch, so it is not a good option if the hash values have already + // been computed for some other purpose. + // + // Example: assume kBlockSize = 4, last_block_added_ = 1, and there are + // 14 bytes of source data. + // If AddAllBlocksThroughIndex(9) is invoked, then it will call AddBlock() + // only for block number 2 (at index 8). + // If, after that, AddAllBlocksThroughIndex(14) is invoked, it will not call + // AddBlock() at all, because block 3 (beginning at index 12) would + // fall outside the range of source data. + // + // VCDiffEngine::Encode (in vcdiffengine.cc) uses this function to + // add a whole range of data to a target hash when a COPY instruction + // is generated. + void AddAllBlocksThroughIndex(int end_index); + + // FindBestMatch takes a position within the unencoded target data + // (target_candidate_start) and the hash value of the kBlockSize bytes + // beginning at that position (hash_value). It attempts to find a matching + // set of bytes within the source (== dictionary) data, expanding + // the match both below and above the target block. It cannot expand + // the match outside the bounds of the source data, or below + // target_start within the target data, or past + // the end limit of (target_start + target_length). + // + // target_candidate_start is the start of the candidate block within the + // target data for which a match will be sought, while + // target_start (which is <= target_candidate_start) + // is the start of the target data that has yet to be encoded. + // + // If a match is found whose size is greater than the size + // of best_match, this function populates *best_match with the + // size, source_offset, and target_offset of the match found. + // best_match->source_offset() will contain the index of the start of the + // matching source data, plus starting_offset_ + // (see description of starting_offset_ for details); + // best_match->target_offset() will contain the offset of the match + // beginning with target_start = offset 0, such that + // 0 <= best_match->target_offset() + // <= (target_candidate_start - target_start); + // and best_match->size() will contain the size of the match. + // If no such match is found, this function leaves *best_match unmodified. + // + // On calling FindBestMatch(), best_match must + // point to a valid Match object, and cannot be NULL. + // The same Match object can be passed + // when calling FindBestMatch() on a different BlockHash object + // for the same candidate data block, in order to find + // the best match possible across both objects. For example: + // + // open_vcdiff::BlockHash::Match best_match; + // uint32_t hash_value = + // RollingHash<BlockHash::kBlockSize>::Hash(target_candidate_start); + // bh1.FindBestMatch(hash_value, + // target_candidate_start, + // target_start, + // target_length, + // &best_match); + // bh2.FindBestMatch(hash_value, + // target_candidate_start, + // target_start, + // target_length, + // &best_match); + // if (best_size >= 0) { + // // a match was found; its size, source offset, and target offset + // // can be found in best_match + // } + // + // hash_value is passed as a separate parameter from target_candidate_start, + // (rather than calculated within FindBestMatch) in order to take + // advantage of the rolling hash, which quickly calculates the hash value + // of the block starting at target_candidate_start based on + // the known hash value of the block starting at (target_candidate_start - 1). + // See vcdiffengine.cc for more details. + // + // Example: + // kBlockSize: 4 + // target text: "ANDREW LLOYD WEBBER" + // 1^ 5^2^ 3^ + // dictionary: "INSURANCE : LLOYDS OF LONDON" + // 4^ + // hashed dictionary blocks: + // "INSU", "RANC", "E : ", "LLOY", "DS O", "F LON" + // + // 1: target_start (beginning of unencoded data) + // 2: target_candidate_start (for the block "LLOY") + // 3: target_length (points one byte beyond the last byte of data.) + // 4: best_match->source_offset() (after calling FindBestMatch) + // 5: best_match->target_offset() (after calling FindBestMatch) + // + // Under these conditions, FindBestMatch will find a matching + // hashed dictionary block for "LLOY", and will extend the beginning of + // this match backwards by one byte, and the end of the match forwards + // by one byte, finding that the best match is " LLOYD" + // with best_match->source_offset() = 10 + // (offset of " LLOYD" in the source string), + // best_match->target_offset() = 6 + // (offset of " LLOYD" in the target string), + // and best_match->size() = 6. + // + void FindBestMatch(uint32_t hash_value, + const char* target_candidate_start, + const char* target_start, + size_t target_size, + Match* best_match) const; + + protected: + // FindBestMatch() will not process more than this number + // of matching hash entries. + // + // It is necessary to have a limit on the maximum number of matches + // that will be checked in order to avoid the worst-case performance + // possible if, for example, all the blocks in the dictionary have + // the same hash value. See the unit test SearchStringFindsTooManyMatches + // for an example of such a case. The encoder uses a loop in + // VCDiffEngine::Encode over each target byte, containing a loop in + // BlockHash::FindBestMatch over the number of matches (up to a maximum + // of the number of source blocks), containing two loops that extend + // the match forwards and backwards up to the number of source bytes. + // Total complexity in the worst case is + // O([target size] * source_size_ * source_size_) + // Placing a limit on the possible number of matches checked changes this to + // O([target size] * source_size_ * kMaxMatchesToCheck) + // + // In empirical testing on real HTML text, using a block size of 4, + // the number of true matches per call to FindBestMatch() did not exceed 78; + // with a block size of 32, the number of matches did not exceed 3. + // + // The expected number of true matches scales super-linearly + // with the inverse of kBlockSize, but here a linear scale is used + // for block sizes smaller than 32. + static const int kMaxMatchesToCheck = (kBlockSize >= 32) ? 8 : + (8 * (32 / kBlockSize)); + + // Do not skip more than this number of non-matching hash collisions + // to find the next matching entry in the hash chain. + static const int kMaxProbes = 16; + + // Internal routine which calculates a hash table size based on kBlockSize and + // the dictionary_size. Will return a power of two if successful, or 0 if an + // internal error occurs. Some calculations (such as GetHashTableIndex()) + // depend on the table size being a power of two. + static const size_t CalcTableSize(const size_t dictionary_size); + + const size_t GetNumberOfBlocks() const { + return source_size_ / kBlockSize; + } + + // Use the lowest-order bits of the hash value + // as the index into the hash table. + uint32_t GetHashTableIndex(uint32_t hash_value) const { + return hash_value & hash_table_mask_; + } + + // The index within source_data_ of the next block + // for which AddBlock() should be called. + int NextIndexToAdd() const { + return (last_block_added_ + 1) * kBlockSize; + } + + static inline bool TooManyMatches(int* match_counter); + + const char* const source_data() { return source_data_; } + const size_t source_size() { return source_size_; } + + // Adds an entry to the hash table for one block of source data of length + // kBlockSize, starting at source_data_[block_number * kBlockSize], + // where block_number is always (last_block_added_ + 1). That is, + // AddBlock() must be called once for each block in source_data_ + // in increasing order. + void AddBlock(uint32_t hash_value); + + // Calls AddBlock() for each complete kBlockSize-byte block between + // source_data_ and (source_data_ + source_size_). It is equivalent + // to calling AddAllBlocksThroughIndex(source_data + source_size). + // This function is called when Init(true) is invoked. + void AddAllBlocks(); + + // Returns true if the contents of the kBlockSize-byte block + // beginning at block1 are identical to the contents of + // the block beginning at block2; false otherwise. + static bool BlockContentsMatch(const char* block1, const char* block2); + + // Compares each machine word of the two (possibly unaligned) blocks, rather + // than each byte, thus reducing the number of test-and-branch instructions + // executed. Returns a boolean (do the blocks match?) rather than + // the signed byte difference returned by memcmp. + // + // BlockContentsMatch will use either this function or memcmp to do its work, + // depending on which is faster for a particular architecture. + // + // For gcc on x86-based architectures, this function has been shown to run + // about twice as fast as the library function memcmp(), and between five and + // nine times faster than the assembly instructions (repz and cmpsb) that gcc + // uses by default for builtin memcmp. On other architectures, or using + // other compilers, this function has not shown to be faster than memcmp. + static bool BlockCompareWords(const char* block1, const char* block2); + + // Finds the first block number within the hashed data + // that represents a match for the given hash value. + // Returns -1 if no match was found. + // + // Init() must have been called and returned true before using + // FirstMatchingBlock or NextMatchingBlock. No check is performed + // for this condition; the code will crash if this condition is violated. + // + // The hash table is initially populated with -1 (not found) values, + // so if this function is called before the hash table has been populated + // using AddAllBlocks() or AddBlock(), it will simply return -1 + // for any value of hash_value. + int FirstMatchingBlock(uint32_t hash_value, const char* block_ptr) const; + + // Given a block number returned by FirstMatchingBlock() + // or by a previous call to NextMatchingBlock(), returns + // the next block number that matches the same hash value. + // Returns -1 if no match was found. + int NextMatchingBlock(int block_number, const char* block_ptr) const; + + // Inline version of FirstMatchingBlock. This saves the cost of a function + // call when this routine is called from within the module. The external + // (non-inlined) version is called only by unit tests. + inline int FirstMatchingBlockInline(uint32_t hash_value, + const char* block_ptr) const; + + // Walk through the hash entry chain, skipping over any false matches + // (for which the lowest bits of the fingerprints match, + // but the actual block data does not.) Returns the block number of + // the first true match found, or -1 if no true match was found. + // If block_number is a matching block, the function will return block_number + // without skipping to the next block. + int SkipNonMatchingBlocks(int block_number, const char* block_ptr) const; + + // Returns the number of bytes to the left of source_match_start + // that match the corresponding bytes to the left of target_match_start. + // Will not examine more than max_bytes bytes, which is to say that + // the return value will be in the range [0, max_bytes] inclusive. + static int MatchingBytesToLeft(const char* source_match_start, + const char* target_match_start, + int max_bytes); + + // Returns the number of bytes starting at source_match_end + // that match the corresponding bytes starting at target_match_end. + // Will not examine more than max_bytes bytes, which is to say that + // the return value will be in the range [0, max_bytes] inclusive. + static int MatchingBytesToRight(const char* source_match_end, + const char* target_match_end, + int max_bytes); + + // The protected functions BlockContentsMatch, FirstMatchingBlock, + // NextMatchingBlock, MatchingBytesToLeft, and MatchingBytesToRight + // should be made accessible to unit tests. + friend class BlockHashTest; + + private: + const char* const source_data_; + const size_t source_size_; + + // The size of this array is determined using CalcTableSize(). It has at + // least one element for each kBlockSize-byte block in the source data. + // GetHashTableIndex() returns an index into this table for a given hash + // value. The value of each element of hash_table_ is the lowest block + // number in the source data whose hash value would return the same value from + // GetHashTableIndex(), or -1 if there is no matching block. This value can + // then be used as an index into next_block_table_ to retrieve the entire set + // of matching block numbers. + std::vector<int> hash_table_; + + // An array containing one element for each source block. Each element is + // either -1 (== not found) or the index of the next block whose hash value + // would produce a matching result from GetHashTableIndex(). + std::vector<int> next_block_table_; + + // This vector has the same size as next_block_table_. For every block number + // B that is referenced in hash_table_, last_block_table_[B] will contain + // the maximum block number that has the same GetHashTableIndex() value + // as block B. This number may be B itself. For a block number B' that + // is not referenced in hash_table_, the value of last_block_table_[B'] is -1. + // This table is used only while populating the hash table, not while looking + // up hash values in the table. Keeping track of the last block number in the + // chain allows us to construct the block chains as FIFO rather than LIFO + // lists, so that the match with the lowest index is returned first. This + // should result in a more compact encoding because the VCDIFF format favors + // smaller index values and repeated index values. + std::vector<int> last_block_table_; + + // Performing a bitwise AND with hash_table_mask_ will produce a value ranging + // from 0 to the number of elements in hash_table_. + uint32_t hash_table_mask_; + + // The offset of the first byte of source data (the data at source_data_[0]). + // For the purpose of computing offsets, the source data and target data + // are considered to be concatenated -- not literally in a single memory + // buffer, but conceptually as described in the RFC. + // The first byte of the previously encoded target data + // has an offset that is equal to dictionary_size, i.e., just after + // the last byte of source data. + // For a hash of source (dictionary) data, starting_offset_ will be zero; + // for a hash of previously encoded target data, starting_offset_ will be + // equal to the dictionary size. + const int starting_offset_; + + // The last index added by AddBlock(). This determines the block number + // for successive calls to AddBlock(), and is also + // used to determine the starting block for AddAllBlocksThroughIndex(). + int last_block_added_; + + // Making these private avoids implicit copy constructor & assignment operator + BlockHash(const BlockHash&); // NOLINT + void operator=(const BlockHash&); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_BLOCKHASH_H_ diff --git a/src/blockhash_test.cc b/src/blockhash_test.cc new file mode 100644 index 0000000..a0ddecb --- /dev/null +++ b/src/blockhash_test.cc @@ -0,0 +1,930 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "blockhash.h" +#include <climits> // INT_MIN +#include <memory> // auto_ptr +#include "encodetable.h" +#include "logging.h" +#include "rolling_hash.h" +#include "testing.h" + +namespace open_vcdiff { + +const int kBlockSize = BlockHash::kBlockSize; + +class BlockHashTest : public testing::Test { + protected: + static const int kTimingTestSize = 1 << 21; // 2M + static const int kTimingTestIterations = 32; + + BlockHashTest() { + dh_.reset(BlockHash::CreateDictionaryHash(sample_text, + strlen(sample_text))); + th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0)); + EXPECT_TRUE(dh_.get() != NULL); + EXPECT_TRUE(th_.get() != NULL); + } + + // BlockHashTest is a friend to BlockHash. Expose the protected functions + // that will be tested by the children of BlockHashTest. + static bool BlockContentsMatch(const char* block1, const char* block2) { + return BlockHash::BlockContentsMatch(block1, block2); + } + + int FirstMatchingBlock(const BlockHash& block_hash, + uint32_t hash_value, + const char* block_ptr) const { + return block_hash.FirstMatchingBlock(hash_value, block_ptr); + } + + int NextMatchingBlock(const BlockHash& block_hash, + int block_number, + const char* block_ptr) const { + return block_hash.NextMatchingBlock(block_number, block_ptr); + } + + static int MatchingBytesToLeft(const char* source_match_start, + const char* target_match_start, + int max_bytes) { + return BlockHash::MatchingBytesToLeft(source_match_start, + target_match_start, + max_bytes); + } + + static int MatchingBytesToRight(const char* source_match_end, + const char* target_match_end, + int max_bytes) { + return BlockHash::MatchingBytesToRight(source_match_end, + target_match_end, + max_bytes); + } + + static int StringLengthAsInt(const char* s) { + return static_cast<int>(strlen(s)); + } + + void InitBlocksToDifferAtNthByte(int n) { + CHECK(n < kBlockSize); + memset(compare_buffer_1_, 0xBE, kTimingTestSize); + memset(compare_buffer_2_, 0xBE, kTimingTestSize); + for (int index = n; index < kTimingTestSize; index += kBlockSize) { + compare_buffer_1_[index] = 0x00; + compare_buffer_2_[index] = 0x01; + } + } + + void TestAndPrintTimesForCompareFunctions(bool should_be_identical); + + void TimingTestForBlocksThatDifferAtByte(int n) { + InitBlocksToDifferAtNthByte(n); + LOG(INFO) << "Comparing blocks that differ at byte " << n << LOG_ENDL; + TestAndPrintTimesForCompareFunctions(false); + } + + // Copy sample_text_without_spaces and search_string_without_spaces + // into newly allocated sample_text and search_string buffers, + // but pad them with space characters so that every character + // in sample_text_without_spaces matches (kBlockSize - 1) + // space characters in sample_text, followed by that character. + // For example: + // Since sample_text_without_spaces begins "The only thing"..., + // if kBlockSize is 4, then 3 space characters will be inserted + // between each letter of sample_text, as follows: + // " T h e o n l y t h i n g"... + // This makes testing simpler, because finding a kBlockSize-byte match + // between the sample text and search string only depends on the + // trailing letter in each block. + static void MakeEachLetterABlock(const char* string_without_spaces, + const char** result) { + const size_t length_without_spaces = strlen(string_without_spaces); + char* padded_text = new char[(kBlockSize * length_without_spaces) + 1]; + memset(padded_text, ' ', kBlockSize * length_without_spaces); + char* padded_text_ptr = padded_text + (kBlockSize - 1); + for (size_t i = 0; i < length_without_spaces; ++i) { + *padded_text_ptr = string_without_spaces[i]; + padded_text_ptr += kBlockSize; + } + padded_text[kBlockSize * length_without_spaces] = '\0'; + *result = padded_text; + } + + static void SetUpTestCase() { + MakeEachLetterABlock(sample_text_without_spaces, &sample_text); + MakeEachLetterABlock(search_string_without_spaces, &search_string); + MakeEachLetterABlock(search_string_altered_without_spaces, + &search_string_altered); + MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string); + MakeEachLetterABlock(search_to_beginning_without_spaces, + &search_to_beginning_string); + MakeEachLetterABlock(sample_text_many_matches_without_spaces, + &sample_text_many_matches); + MakeEachLetterABlock(search_string_many_matches_without_spaces, + &search_string_many_matches); + MakeEachLetterABlock("y", &test_string_y); + MakeEachLetterABlock("e", &test_string_e); + char* new_test_string_unaligned_e = new char[kBlockSize]; + memset(new_test_string_unaligned_e, ' ', kBlockSize); + new_test_string_unaligned_e[kBlockSize - 2] = 'e'; + test_string_unaligned_e = new_test_string_unaligned_e; + char* new_test_string_all_Qs = new char[kBlockSize]; + memset(new_test_string_all_Qs, 'Q', kBlockSize); + test_string_all_Qs = new_test_string_all_Qs; + hashed_y = RollingHash<kBlockSize>::Hash(test_string_y); + hashed_e = RollingHash<kBlockSize>::Hash(test_string_e); + hashed_f = + RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]); + hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e); + hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs); + } + + static void TearDownTestCase() { + delete[] sample_text; + delete[] search_string; + delete[] search_string_altered; + delete[] search_to_end_string; + delete[] search_to_beginning_string; + delete[] sample_text_many_matches; + delete[] search_string_many_matches; + delete[] test_string_y; + delete[] test_string_e; + delete[] test_string_unaligned_e; + delete[] test_string_all_Qs; + } + + // Each block in the sample text and search string is kBlockSize bytes long, + // and consists of (kBlockSize - 1) space characters + // followed by a single letter of text. + + // Block numbers of certain characters within the sample text: + // All six occurrences of "e", in order. + static const int block_of_first_e = 2; + static const int block_of_second_e = 16; + static const int block_of_third_e = 21; + static const int block_of_fourth_e = 27; + static const int block_of_fifth_e = 35; + static const int block_of_sixth_e = 42; + + static const int block_of_y_in_only = 7; + // The block number is multiplied by kBlockSize to arrive at the + // index, which points to the (kBlockSize - 1) space characters before + // the letter specified. + // Indices of certain characters within the sample text. + static const int index_of_first_e = block_of_first_e * kBlockSize; + static const int index_of_fourth_e = block_of_fourth_e * kBlockSize; + static const int index_of_sixth_e = block_of_sixth_e * kBlockSize; + static const int index_of_y_in_only = block_of_y_in_only * kBlockSize; + static const int index_of_space_before_fear_is_fear = 25 * kBlockSize; + static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize; + static const int index_of_i_in_fear_is_fear = 31 * kBlockSize; + static const int index_of_space_before_fear_itself = 33 * kBlockSize; + static const int index_of_space_before_itself = 38 * kBlockSize; + static const int index_of_ababc = 4 * kBlockSize; + + // Indices of certain characters within the search strings. + static const int index_of_second_w_in_what_we = 5 * kBlockSize; + static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize; + static const int index_of_f_in_fearsome = 16 * kBlockSize; + static const int index_of_space_in_eat_itself = 12 * kBlockSize; + static const int index_of_i_in_itself = 13 * kBlockSize; + static const int index_of_t_in_use_the = 4 * kBlockSize; + static const int index_of_o_in_online = 8 * kBlockSize; + + static const char sample_text_without_spaces[]; + static const char search_string_without_spaces[]; + static const char search_string_altered_without_spaces[]; + static const char search_to_end_without_spaces[]; + static const char search_to_beginning_without_spaces[]; + static const char sample_text_many_matches_without_spaces[]; + static const char search_string_many_matches_without_spaces[]; + + static const char* sample_text; + static const char* search_string; + static const char* search_string_altered; + static const char* search_to_end_string; + static const char* search_to_beginning_string; + static const char* sample_text_many_matches; + static const char* search_string_many_matches; + + static const char* test_string_y; + static const char* test_string_e; + static const char* test_string_all_Qs; + static const char* test_string_unaligned_e; + + static uint32_t hashed_y; + static uint32_t hashed_e; + static uint32_t hashed_f; + static uint32_t hashed_unaligned_e; + static uint32_t hashed_all_Qs; + + // Boost scoped_ptr, if available, could be used instead of std::auto_ptr. + std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup + std::auto_ptr<BlockHash> th_; // hash table not populated; + // used to test incremental adds + + BlockHash::Match best_match_; + char* compare_buffer_1_; + char* compare_buffer_2_; + int prime_result_; +}; + +#ifdef GTEST_HAS_DEATH_TEST +typedef BlockHashTest BlockHashDeathTest; +#endif // GTEST_HAS_DEATH_TEST + +// The C++ standard requires a separate definition of these static const values, +// even though their initializers are given within the class definition. +const int BlockHashTest::block_of_first_e; +const int BlockHashTest::block_of_second_e; +const int BlockHashTest::block_of_third_e; +const int BlockHashTest::block_of_fourth_e; +const int BlockHashTest::block_of_fifth_e; +const int BlockHashTest::block_of_sixth_e; +const int BlockHashTest::block_of_y_in_only; +const int BlockHashTest::index_of_first_e; +const int BlockHashTest::index_of_fourth_e; +const int BlockHashTest::index_of_sixth_e; +const int BlockHashTest::index_of_y_in_only; +const int BlockHashTest::index_of_space_before_fear_is_fear; +const int BlockHashTest::index_of_longest_match_ear_is_fear; +const int BlockHashTest::index_of_i_in_fear_is_fear; +const int BlockHashTest::index_of_space_before_fear_itself; +const int BlockHashTest::index_of_space_before_itself; +const int BlockHashTest::index_of_ababc; +const int BlockHashTest::index_of_second_w_in_what_we; +const int BlockHashTest::index_of_second_e_in_what_we_hear; +const int BlockHashTest::index_of_f_in_fearsome; +const int BlockHashTest::index_of_space_in_eat_itself; +const int BlockHashTest::index_of_i_in_itself; +const int BlockHashTest::index_of_t_in_use_the; +const int BlockHashTest::index_of_o_in_online; + +const char BlockHashTest::sample_text_without_spaces[] = + "The only thing we have to fear is fear itself"; + +const char BlockHashTest::search_string_without_spaces[] = + "What we hear is fearsome"; + +const char BlockHashTest::search_string_altered_without_spaces[] = + "Vhat ve hear is fearsomm"; + +const char BlockHashTest::search_to_end_without_spaces[] = + "Pop will eat itself, eventually"; + +const char BlockHashTest::search_to_beginning_without_spaces[] = + "Use The online dictionary"; + +const char BlockHashTest::sample_text_many_matches_without_spaces[] = + "ababababcab"; + +const char BlockHashTest::search_string_many_matches_without_spaces[] = + "ababc"; + +const char* BlockHashTest::sample_text = NULL; +const char* BlockHashTest::search_string = NULL; +const char* BlockHashTest::search_string_altered = NULL; +const char* BlockHashTest::search_to_end_string = NULL; +const char* BlockHashTest::search_to_beginning_string = NULL; +const char* BlockHashTest::sample_text_many_matches = NULL; +const char* BlockHashTest::search_string_many_matches = NULL; + +const char* BlockHashTest::test_string_y = NULL; +const char* BlockHashTest::test_string_e = NULL; +const char* BlockHashTest::test_string_unaligned_e = NULL; +const char* BlockHashTest::test_string_all_Qs = NULL; + +uint32_t BlockHashTest::hashed_y = 0; +uint32_t BlockHashTest::hashed_e = 0; +uint32_t BlockHashTest::hashed_f = 0; +uint32_t BlockHashTest::hashed_unaligned_e = 0; +uint32_t BlockHashTest::hashed_all_Qs = 0; + +void BlockHashTest::TestAndPrintTimesForCompareFunctions( + bool should_be_identical) { + CHECK(compare_buffer_1_ != NULL); + CHECK(compare_buffer_2_ != NULL); + // Prime the memory cache. + prime_result_ = + memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize); + const char* const block1_limit = + &compare_buffer_1_[kTimingTestSize - kBlockSize]; + int block_compare_words_result = 0; + CycleTimer block_compare_words_timer; + block_compare_words_timer.Start(); + for (int i = 0; i < kTimingTestIterations; ++i) { + const char* block1 = compare_buffer_1_; + const char* block2 = compare_buffer_2_; + while (block1 < block1_limit) { + if (!BlockHash::BlockCompareWords(block1, block2)) { + ++block_compare_words_result; + } + block1 += kBlockSize; + block2 += kBlockSize; + } + } + block_compare_words_timer.Stop(); + double time_for_block_compare_words = + static_cast<double>(block_compare_words_timer.GetInUsec()) + / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); + int block_contents_match_result = 0; + CycleTimer block_contents_match_timer; + block_contents_match_timer.Start(); + for (int i = 0; i < kTimingTestIterations; ++i) { + const char* block1 = compare_buffer_1_; + const char* block2 = compare_buffer_2_; + while (block1 < block1_limit) { + if (!BlockHash::BlockContentsMatch(block1, block2)) { + ++block_contents_match_result; + } + block1 += kBlockSize; + block2 += kBlockSize; + } + } + block_contents_match_timer.Stop(); + double time_for_block_contents_match = + static_cast<double>(block_contents_match_timer.GetInUsec()) + / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); + EXPECT_EQ(block_contents_match_result, block_compare_words_result); + if (should_be_identical) { + CHECK_EQ(0, block_compare_words_result); + } else { + CHECK_GT(block_compare_words_result, 0); + } + LOG(INFO) << "BlockHash::BlockCompareWords: " + << time_for_block_compare_words << " us per operation" << LOG_ENDL; + LOG(INFO) << "BlockHash::BlockContentsMatch: " + << time_for_block_contents_match << " us per operation" << LOG_ENDL; + if (time_for_block_compare_words > 0) { + double percent_change = + (((time_for_block_contents_match - time_for_block_compare_words) + / time_for_block_compare_words) * 100.0); + if (percent_change >= 0.0) { + LOG(INFO) << "BlockContentsMatch is " << percent_change << "%" + << " SLOWER than BlockCompareWords" << LOG_ENDL; + } else { + LOG(INFO) << "BlockContentsMatch is " << (-percent_change) << "%" + << " FASTER than BlockCompareWords" << LOG_ENDL; + } + } +#ifdef NDEBUG + // Only check timings for optimized build. There's plenty of margin: this + // check will fail only if BlockContentsMatch is at least twice as slow as + // BlockCompareWords. + EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match); +#endif // NDEBUG +} + +// The two strings passed to BlockHash::MatchingBytesToLeft do have matching +// characters -- in fact, they're the same string -- but since max_bytes is zero +// or negative, BlockHash::MatchingBytesToLeft should not read from the strings +// and should return 0. +TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) { + EXPECT_EQ(0, MatchingBytesToLeft( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_f_in_fearsome], + 0)); + EXPECT_EQ(0, MatchingBytesToRight( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_f_in_fearsome], + 0)); +} + +TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) { + EXPECT_EQ(0, MatchingBytesToLeft( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_f_in_fearsome], + -1)); + EXPECT_EQ(0, MatchingBytesToLeft( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_f_in_fearsome], + INT_MIN)); + EXPECT_EQ(0, MatchingBytesToRight( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_f_in_fearsome], + -1)); + EXPECT_EQ(0, MatchingBytesToRight( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_f_in_fearsome], + INT_MIN)); +} + +TEST_F(BlockHashTest, MaxBytesOneMatch) { + EXPECT_EQ(1, MatchingBytesToLeft( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_f_in_fearsome], + 1)); + EXPECT_EQ(1, MatchingBytesToRight( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_f_in_fearsome], + 1)); +} + +TEST_F(BlockHashTest, MaxBytesOneNoMatch) { + EXPECT_EQ(0, MatchingBytesToLeft( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_second_e_in_what_we_hear], + 1)); + EXPECT_EQ(0, MatchingBytesToRight( + &search_string[index_of_f_in_fearsome], + &search_string[index_of_second_e_in_what_we_hear - 1], + 1)); +} + +TEST_F(BlockHashTest, LeftLimitedByMaxBytes) { + // The number of bytes that match between the original "we hear is fearsome" + // and the altered "ve hear is fearsome". + const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); + const int max_bytes = expected_length - 1; + EXPECT_EQ(max_bytes, MatchingBytesToLeft( + &search_string[index_of_f_in_fearsome], + &search_string_altered[index_of_f_in_fearsome], + max_bytes)); +} + +TEST_F(BlockHashTest, LeftNotLimited) { + // The number of bytes that match between the original "we hear is fearsome" + // and the altered "ve hear is fearsome". + const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); + const int max_bytes = expected_length + 1; + EXPECT_EQ(expected_length, MatchingBytesToLeft( + &search_string[index_of_f_in_fearsome], + &search_string_altered[index_of_f_in_fearsome], + max_bytes)); + EXPECT_EQ(expected_length, MatchingBytesToLeft( + &search_string[index_of_f_in_fearsome], + &search_string_altered[index_of_f_in_fearsome], + INT_MAX)); +} + +TEST_F(BlockHashTest, RightLimitedByMaxBytes) { + // The number of bytes that match between the original "fearsome" + // and the altered "fearsomm". + const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) + + (kBlockSize - 1); // spacing between letters + const int max_bytes = expected_length - 1; + EXPECT_EQ(max_bytes, MatchingBytesToRight( + &search_string[index_of_f_in_fearsome], + &search_string_altered[index_of_f_in_fearsome], + max_bytes)); +} + +TEST_F(BlockHashTest, RightNotLimited) { + // The number of bytes that match between the original "we hear is fearsome" + // and the altered "ve hear is fearsome". + const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) + + (kBlockSize - 1); // spacing between letters + const int max_bytes = expected_length + 1; + EXPECT_EQ(expected_length, MatchingBytesToRight( + &search_string[index_of_f_in_fearsome], + &search_string_altered[index_of_f_in_fearsome], + max_bytes)); + EXPECT_EQ(expected_length, MatchingBytesToRight( + &search_string[index_of_f_in_fearsome], + &search_string_altered[index_of_f_in_fearsome], + INT_MAX)); +} + +// If this test fails in a non-x86 or non-gcc environment, consider adding +// -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and +// Makefile.in, and reconstructing the Makefile. That will cause blockhash.cc +// to use a special implementation (BlockCompareWords) to compare blocks +// rather than using standard memcmp. +TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) { + compare_buffer_1_ = new char[kTimingTestSize]; + compare_buffer_2_ = new char[kTimingTestSize]; + + // The value 0xBE is arbitrarily chosen. First test with identical contents + // in the buffers, so that the comparison functions cannot short-circuit + // and will return true. + memset(compare_buffer_1_, 0xBE, kTimingTestSize); + memset(compare_buffer_2_, 0xBE, kTimingTestSize); + LOG(INFO) << "Comparing " + << (kTimingTestSize / kBlockSize) << " identical values:" + << LOG_ENDL; + TestAndPrintTimesForCompareFunctions(true); + + // Now change one value in the middle of one buffer, so that the contents + // are no longer the same. + compare_buffer_1_[kTimingTestSize / 2] = 0x00; + LOG(INFO) << "Comparing " + << ((kTimingTestSize / kBlockSize) - 1) << " identical values" + << " and one mismatch:" << LOG_ENDL; + TestAndPrintTimesForCompareFunctions(false); + + // Set one of the bytes of each block to differ so that + // none of the compare operations will return true, and run timing tests. + // In practice, BlockHash::BlockContentsMatch will only be called + // for two blocks whose hash values match, and the two most important + // cases are: (1) the blocks are identical, or (2) none of their bytes match. + TimingTestForBlocksThatDifferAtByte(0); + TimingTestForBlocksThatDifferAtByte(1); + TimingTestForBlocksThatDifferAtByte(kBlockSize / 2); + TimingTestForBlocksThatDifferAtByte(kBlockSize - 1); + + delete[] compare_buffer_1_; + delete[] compare_buffer_2_; +} + +TEST_F(BlockHashTest, FindFailsBeforeHashing) { + EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); +} + +TEST_F(BlockHashTest, HashOneFindOne) { + for (int i = 0; i <= index_of_y_in_only; ++i) { + th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i])); + } + EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y, + test_string_y)); + EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y)); +} + +TEST_F(BlockHashTest, HashAllFindOne) { + EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y, + test_string_y)); + EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y)); +} + +TEST_F(BlockHashTest, NonMatchingTextNotFound) { + EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs)); +} + +// Search for unaligned text. The test string is contained in the +// sample text (unlike the non-matching string in NonMatchingTextNotFound, +// above), but it is not aligned on a block boundary. FindMatchingBlock +// will only work if the test string is aligned on a block boundary. +// +// " T h e o n l y" +// ^^^^ Here is the test string +// +TEST_F(BlockHashTest, UnalignedTextNotFound) { + EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e, + test_string_unaligned_e)); +} + +TEST_F(BlockHashTest, FindSixMatches) { + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, + test_string_e)); + EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e, + test_string_e)); + EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e, + test_string_e)); + EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e, + test_string_e)); + EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e, + test_string_e)); + EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e, + test_string_e)); + EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e)); + + // Starting over gives same result + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, + test_string_e)); +} + +TEST_F(BlockHashTest, AddRangeFindThreeMatches) { + // Add hash values only for those characters before the fourth instance + // of "e" in the sample text. Tests that the ending index + // of AddAllBlocksThroughIndex() is not inclusive: only three matches + // for "e" should be found. + th_->AddAllBlocksThroughIndex(index_of_fourth_e); + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, + test_string_e)); + EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, + test_string_e)); + EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); + + // Starting over gives same result + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); +} + +// Try indices that are not even multiples of the block size. +// Add three ranges and verify the results after each +// call to AddAllBlocksThroughIndex(). +TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) { + th_->AddAllBlocksThroughIndex(index_of_first_e + 1); + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); + + // Starting over gives same result + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + + // Add the second range to expand the result set + th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3); + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, + test_string_e)); + EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, + test_string_e)); + EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); + + // Starting over gives same result + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + + // Add the third range to expand the result set + th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); + + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, + test_string_e)); + EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, + test_string_e)); + EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, + test_string_e)); + EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); + + // Starting over gives same result + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) { + th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); + + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, + test_string_e)); + EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, + test_string_e)); + EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, + test_string_e)); + EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); + + // Starting over gives same result + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + + // These calls will produce DFATAL error messages, and will do nothing, + // since the ranges have already been added. + EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3), + "<"); + EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1), + "<"); + + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, + test_string_e)); + EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, + test_string_e)); + EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, + test_string_e)); + EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); + + // Starting over gives same result + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) { + th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text)); + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); + EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, + test_string_e)); + EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, + test_string_e)); + EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, + test_string_e)); + EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e, + test_string_e)); + EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e, + test_string_e)); + EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e)); + + // Starting over gives same result + EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, + test_string_e)); +} + +TEST_F(BlockHashTest, ZeroSizeSourceAccepted) { + BlockHash zero_sized_hash(sample_text, 0, 0); + EXPECT_EQ(true, zero_sized_hash.Init(true)); + EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) { + EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")), + "invalid"); +} + +TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) { + BlockHash bh(sample_text, strlen(sample_text), 0); + EXPECT_TRUE(bh.Init(false)); + EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice"); +} + +TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) { + BlockHash bh(sample_text, strlen(sample_text), 0); + EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e), + "called before"); +} + +TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) { + EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(strlen(sample_text) + 1), + "higher than end"); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) { + EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA")); +} + +TEST_F(BlockHashTest, FindBestMatch) { + dh_->FindBestMatch(hashed_f, + &search_string[index_of_f_in_fearsome], + search_string, + strlen(search_string), + &best_match_); + EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset()); + EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); + // The match includes the spaces after the final character, + // which is why (kBlockSize - 1) is added to the expected best size. + EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), + best_match_.size()); +} + +TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) { + BlockHash th2(sample_text, strlen(sample_text), 0x10000); + th2.Init(true); // hash all blocks + th2.FindBestMatch(hashed_f, + &search_string[index_of_f_in_fearsome], + search_string, + strlen(search_string), + &best_match_); + // Offset should begin with dictionary_size + EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear), + best_match_.source_offset()); + EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); + // The match includes the spaces after the final character, + // which is why (kBlockSize - 1) is added to the expected best size. + EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), + best_match_.size()); +} + +TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) { + // Hash the "i" in "fear itself" + uint32_t hash_value = RollingHash<kBlockSize>::Hash( + &search_to_end_string[index_of_i_in_itself]); + dh_->FindBestMatch(hash_value, + &search_to_end_string[index_of_i_in_itself], + search_to_end_string, + strlen(search_to_end_string), + &best_match_); + EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset()); + EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset()); + EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size()); +} + +TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) { + // Hash the "i" in "fear itself" + uint32_t hash_value = RollingHash<kBlockSize>::Hash( + &search_to_beginning_string[index_of_o_in_online]); + dh_->FindBestMatch(hash_value, + &search_to_beginning_string[index_of_o_in_online], + search_to_beginning_string, + strlen(search_to_beginning_string), + &best_match_); + EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary + EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset()); + // The match includes the spaces after the final character, + // which is why (kBlockSize - 1) is added to the expected best size. + EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1), + best_match_.size()); +} + +TEST_F(BlockHashTest, BestMatchWithManyMatches) { + BlockHash many_matches_hash(sample_text_many_matches, + strlen(sample_text_many_matches), + 0); + EXPECT_TRUE(many_matches_hash.Init(true)); + // Hash the " a" at the beginning of the search string "ababc" + uint32_t hash_value = + RollingHash<kBlockSize>::Hash(search_string_many_matches); + many_matches_hash.FindBestMatch(hash_value, + search_string_many_matches, + search_string_many_matches, + strlen(search_string_many_matches), + &best_match_); + EXPECT_EQ(index_of_ababc, best_match_.source_offset()); + EXPECT_EQ(0, best_match_.target_offset()); + EXPECT_EQ(strlen(search_string_many_matches), best_match_.size()); +} + +TEST_F(BlockHashTest, HashCollisionFindsNoMatch) { + char* collision_search_string = new char[strlen(search_string) + 1]; + memcpy(collision_search_string, search_string, strlen(search_string) + 1); + char* fearsome_location = &collision_search_string[index_of_f_in_fearsome]; + + // Tweak the collision string so that it has the same hash value + // but different text. The last four characters of the search string + // should be " f", and the bytes given below have the same hash value + // as those characters. + CHECK_GE(kBlockSize, 4); + fearsome_location[kBlockSize - 4] = 0x84; + fearsome_location[kBlockSize - 3] = 0xF1; + fearsome_location[kBlockSize - 2] = 0x51; + fearsome_location[kBlockSize - 1] = 0x00; + EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location)); + EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome], + fearsome_location, + kBlockSize)); + // No match should be found this time. + dh_->FindBestMatch(hashed_f, + fearsome_location, + collision_search_string, + strlen(search_string), // since collision_search_string has embedded \0 + &best_match_); + EXPECT_EQ(-1, best_match_.source_offset()); + EXPECT_EQ(-1, best_match_.target_offset()); + EXPECT_EQ(0U, best_match_.size()); + delete[] collision_search_string; +} + +// If the footprint passed to FindBestMatch does not actually match +// the search string, it should not find any matches. +TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) { + dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"! + &search_string[index_of_f_in_fearsome], + search_string, + strlen(search_string), + &best_match_); + EXPECT_EQ(-1, best_match_.source_offset()); + EXPECT_EQ(-1, best_match_.target_offset()); + EXPECT_EQ(0U, best_match_.size()); +} + +// Use a dictionary containing 1M copies of the letter 'Q', +// and target data that also contains 1M Qs. If FindBestMatch +// is not throttled to find a maximum number of matches, this +// will take a very long time -- several seconds at least. +// If this test appears to hang, it is because the throttling code +// (see BlockHash::kMaxMatchesToCheck for details) is not working. +TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) { + const int kTestSize = 1 << 20; // 1M + char* huge_dictionary = new char[kTestSize]; + memset(huge_dictionary, 'Q', kTestSize); + BlockHash huge_bh(huge_dictionary, kTestSize, 0); + EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true)); + char* huge_target = new char[kTestSize]; + memset(huge_target, 'Q', kTestSize); + CycleTimer timer; + timer.Start(); + huge_bh.FindBestMatch(hashed_all_Qs, + huge_target + (kTestSize / 2), // middle of target + huge_target, + kTestSize, + &best_match_); + timer.Stop(); + double elapsed_time_in_us = static_cast<double>(timer.GetInUsec()); + LOG(INFO) << "Time to search for best match with 1M matches: " + << elapsed_time_in_us << " us" << LOG_ENDL; + // All blocks match the candidate block. FindBestMatch should have checked + // a certain number of matches before giving up. The best match + // should include at least half the source and target, since the candidate + // block was in the middle of the target data. + EXPECT_GT((kTestSize / 2), best_match_.source_offset()); + EXPECT_GT((kTestSize / 2), best_match_.target_offset()); + EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size()); + EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second + delete[] huge_target; + delete[] huge_dictionary; +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(BlockHashDeathTest, AddTooManyBlocks) { + for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) { + th_->AddOneIndexHash(i * kBlockSize, hashed_e); + } + // Didn't expect another block to be added + EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text), + hashed_e), + "AddBlock"); +} +#endif // GTEST_HAS_DEATH_TEST + +} // namespace open_vcdiff diff --git a/src/checksum.h b/src/checksum.h new file mode 100644 index 0000000..0d315a1 --- /dev/null +++ b/src/checksum.h @@ -0,0 +1,48 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A wrapper for the adler32() function from zlib. This can be replaced +// with another checksum implementation if desired. + +#ifndef OPEN_VCDIFF_CHECKSUM_H_ +#define OPEN_VCDIFF_CHECKSUM_H_ + +#include <config.h> +#include "zlib.h" + +namespace open_vcdiff { + +typedef uLong VCDChecksum; + +const VCDChecksum kNoPartialChecksum = 0; + +inline VCDChecksum ComputeAdler32(const char* buffer, + size_t size) { + return adler32(kNoPartialChecksum, + reinterpret_cast<const Bytef*>(buffer), + static_cast<uInt>(size)); +} + +inline VCDChecksum UpdateAdler32(VCDChecksum partial_checksum, + const char* buffer, + size_t size) { + return adler32(partial_checksum, + reinterpret_cast<const Bytef*>(buffer), + static_cast<uInt>(size)); +} + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_CHECKSUM_H_ diff --git a/src/codetable.cc b/src/codetable.cc new file mode 100644 index 0000000..117ed7e --- /dev/null +++ b/src/codetable.cc @@ -0,0 +1,279 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// codetable.cc: +// Classes to implement the Code Table +// described in sections 5.5, 5.6 and 7 of +// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html + +#include <config.h> +#include "addrcache.h" +#include "codetable.h" +#include "logging.h" +#include "vcdiff_defs.h" // VCD_MAX_MODES + +namespace open_vcdiff { + +const char* VCDiffInstructionName(VCDiffInstructionType inst) { + switch (inst) { + case VCD_NOOP: + return "NOOP"; + case VCD_ADD: + return "ADD"; + case VCD_RUN: + return "RUN"; + case VCD_COPY: + return "COPY"; + default: + LOG(ERROR) << "Unexpected instruction type " << inst << LOG_ENDL; + return ""; + } +} + +// This is the default code table defined in the RFC, section 5.6. +// Using a static struct means that the compiler will do the work of +// laying out the values in memory rather than having to use loops to do so +// at runtime. The letters "N", "A", "R", and "C" are defined as VCD_NOOP, +// VCD_ADD, VCD_RUN, and VCD_COPY respectively (see the definition of +// struct VCDiffCodeTableData), which allows for a compact +// representation of the code table data. +// +const VCDiffCodeTableData VCDiffCodeTableData::kDefaultCodeTableData = + // inst1 + { { R, // opcode 0 + A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, // opcodes 1-18 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 19-34 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 35-50 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 51-66 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 67-82 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 83-98 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 99-114 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 115-130 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 131-146 + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 147-162 + A, A, A, A, A, A, A, A, A, A, A, A, // opcodes 163-174 + A, A, A, A, A, A, A, A, A, A, A, A, // opcodes 175-186 + A, A, A, A, A, A, A, A, A, A, A, A, // opcodes 187-198 + A, A, A, A, A, A, A, A, A, A, A, A, // opcodes 199-210 + A, A, A, A, A, A, A, A, A, A, A, A, // opcodes 211-222 + A, A, A, A, A, A, A, A, A, A, A, A, // opcodes 223-234 + A, A, A, A, // opcodes 235-238 + A, A, A, A, // opcodes 239-242 + A, A, A, A, // opcodes 243-246 + C, C, C, C, C, C, C, C, C }, // opcodes 247-255 + // inst2 + { N, // opcode 0 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 1-18 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 19-34 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 35-50 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 51-66 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 67-82 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 83-98 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 99-114 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 115-130 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 131-146 + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, // opcodes 147-162 + C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 163-174 + C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 175-186 + C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 187-198 + C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 199-210 + C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 211-222 + C, C, C, C, C, C, C, C, C, C, C, C, // opcodes 223-234 + C, C, C, C, // opcodes 235-238 + C, C, C, C, // opcodes 239-242 + C, C, C, C, // opcodes 243-246 + A, A, A, A, A, A, A, A, A }, // opcodes 247-255 + // size1 + { 0, // opcode 0 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, // 1-18 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 19-34 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 35-50 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 51-66 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 67-82 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 83-98 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 99-114 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 115-130 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 131-146 + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // 147-162 + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, // opcodes 163-174 + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, // opcodes 175-186 + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, // opcodes 187-198 + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, // opcodes 199-210 + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, // opcodes 211-222 + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, // opcodes 223-234 + 1, 2, 3, 4, // opcodes 235-238 + 1, 2, 3, 4, // opcodes 239-242 + 1, 2, 3, 4, // opcodes 243-246 + 4, 4, 4, 4, 4, 4, 4, 4, 4 }, // opcodes 247-255 + // size2 + { 0, // opcode 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 1-18 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 19-34 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 35-50 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 51-66 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 67-82 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 83-98 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 99-114 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 115-130 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 131-146 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 147-162 + 4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 5, 6, // opcodes 163-174 + 4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 5, 6, // opcodes 175-186 + 4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 5, 6, // opcodes 187-198 + 4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 5, 6, // opcodes 199-210 + 4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 5, 6, // opcodes 211-222 + 4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 5, 6, // opcodes 223-234 + 4, 4, 4, 4, // opcodes 235-238 + 4, 4, 4, 4, // opcodes 239-242 + 4, 4, 4, 4, // opcodes 243-246 + 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // opcodes 247-255 + // mode1 + { 0, // opcode 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 1-18 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 19-34 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // opcodes 35-50 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // opcodes 51-66 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // opcodes 67-82 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // opcodes 83-98 + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // opcodes 99-114 + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // opcodes 115-130 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // opcodes 131-146 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // opcodes 147-162 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 163-174 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 175-186 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 187-198 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 199-210 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 211-222 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 223-234 + 0, 0, 0, 0, // opcodes 235-238 + 0, 0, 0, 0, // opcodes 239-242 + 0, 0, 0, 0, // opcodes 243-246 + 0, 1, 2, 3, 4, 5, 6, 7, 8 }, // opcodes 247-255 + // mode2 + { 0, // opcode 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 1-18 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 19-34 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 35-50 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 51-66 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 67-82 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 83-98 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 99-114 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 115-130 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 131-146 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 147-162 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // opcodes 163-174 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // opcodes 175-186 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // opcodes 187-198 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // opcodes 199-210 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // opcodes 211-222 + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // opcodes 223-234 + 6, 6, 6, 6, // opcodes 235-238 + 7, 7, 7, 7, // opcodes 239-242 + 8, 8, 8, 8, // opcodes 243-246 + 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; // opcodes 247-255 + +bool VCDiffCodeTableData::ValidateOpcode(int opcode, + unsigned char inst, + unsigned char size, + unsigned char mode, + unsigned char max_mode, + const char* first_or_second) { + bool no_errors_found = true; + // Check upper limits of inst and mode. inst, size, and mode are + // unsigned, so there is no lower limit on them. + if (inst > VCD_LAST_INSTRUCTION_TYPE) { + LOG(ERROR) << "VCDiff: Bad code table; opcode " << opcode << " has invalid " + << first_or_second << " instruction type " + << static_cast<int>(inst) << LOG_ENDL; + no_errors_found = false; + } + if (mode > max_mode) { + LOG(ERROR) << "VCDiff: Bad code table; opcode " << opcode << " has invalid " + << first_or_second << " mode " + << static_cast<int>(mode) << LOG_ENDL; + no_errors_found = false; + } + // A NOOP instruction must have size 0 + // (and mode 0, which is included in the next rule) + if ((inst == VCD_NOOP) && (size != 0)) { + LOG(ERROR) << "VCDiff: Bad code table; opcode " << opcode << " has " + << first_or_second << " instruction NOOP with nonzero size " + << static_cast<int>(size) << LOG_ENDL; + no_errors_found = false; + } + // A nonzero mode can only be used with a COPY instruction + if ((inst != VCD_COPY) && (mode != 0)) { + LOG(ERROR) << "VCDiff: Bad code table; opcode " << opcode + << " has non-COPY " + << first_or_second << " instruction with nonzero mode " + << static_cast<int>(mode) << LOG_ENDL; + no_errors_found = false; + } + return no_errors_found; +} + +// If an error is found while validating, continue to validate the rest +// of the code table so that all validation errors will appear in +// the error log. Otherwise the user would have to fix a single error +// and then rerun validation to find the next error. +// +bool VCDiffCodeTableData::Validate(unsigned char max_mode) const { + const int kNumberOfTypesAndModes = VCD_LAST_INSTRUCTION_TYPE + max_mode + 1; + bool hasOpcodeForTypeAndMode[VCD_LAST_INSTRUCTION_TYPE + VCD_MAX_MODES]; + bool no_errors_found = true; + for (int i = 0; i < kNumberOfTypesAndModes; ++i) { + hasOpcodeForTypeAndMode[i] = false; + } + for (int i = 0; i < kCodeTableSize; ++i) { + no_errors_found = + ValidateOpcode(i, inst1[i], size1[i], mode1[i], max_mode, "first") + && no_errors_found; // use as 2nd operand to avoid short-circuit + no_errors_found = + ValidateOpcode(i, inst2[i], size2[i], mode2[i], max_mode, "second") + && no_errors_found; + // A valid code table must have an opcode to encode every possible + // combination of inst and mode with size=0 as its first instruction, + // and NOOP as its second instruction. If this condition fails, + // then there exists a set of input instructions that cannot be encoded. + if ((size1[i] == 0) && + (inst2[i] == VCD_NOOP) && + ((static_cast<int>(inst1[i]) + static_cast<int>(mode1[i])) + < kNumberOfTypesAndModes)) { + hasOpcodeForTypeAndMode[inst1[i] + mode1[i]] = true; + } + } + for (int i = 0; i < kNumberOfTypesAndModes; ++i) { + if (i == VCD_NOOP) continue; + if (!hasOpcodeForTypeAndMode[i]) { + if (i >= VCD_COPY) { + LOG(ERROR) << "VCDiff: Bad code table; there is no opcode for inst " + "COPY, size 0, mode " << (i - VCD_COPY) << LOG_ENDL; + } else { + LOG(ERROR) << "VCDiff: Bad code table; there is no opcode for inst " + << VCDiffInstructionName(static_cast<VCDiffInstructionType>(i)) + << ", size 0, mode 0" << LOG_ENDL; + } + no_errors_found = false; + } + } + return no_errors_found; +} + +bool VCDiffCodeTableData::Validate() const { + return Validate(VCDiffAddressCache::DefaultLastMode()); +} + +} // namespace open_vcdiff diff --git a/src/codetable.h b/src/codetable.h new file mode 100644 index 0000000..5beaa52 --- /dev/null +++ b/src/codetable.h @@ -0,0 +1,127 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Classes to implement the Code Table +// described in sections 5.5, 5.6 and 7 of +// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html + +#ifndef OPEN_VCDIFF_CODETABLE_H_ +#define OPEN_VCDIFF_CODETABLE_H_ + +#include <config.h> +#include <stdint.h> // uint16_t + +namespace open_vcdiff { + +// The instruction types from section 5.5 (mistakenly labeled 5.4) of the RFC. +// +enum VCDiffInstructionType { + VCD_NOOP = 0, + VCD_ADD = 1, + VCD_RUN = 2, + VCD_COPY = 3, + VCD_LAST_INSTRUCTION_TYPE = VCD_COPY, + // The following values are not true instruction types, but rather + // special condition values for functions that return VCDiffInstructionType. + VCD_INSTRUCTION_ERROR = 4, + VCD_INSTRUCTION_END_OF_DATA = 5 +}; + +const char* VCDiffInstructionName(VCDiffInstructionType inst); + +// OpcodeOrNone: An opcode is a value between 0-255. There is not room +// in a single byte to express all these values plus a "no opcode found" +// value. So use a 16-bit integer to hold either an opcode or kNoOpcode. +// +typedef uint16_t OpcodeOrNone; +const OpcodeOrNone kNoOpcode = 0x100; // outside the opcode range 0x00 - 0xFF + +// struct VCDiffCodeTableData: +// +// A representation of the VCDiff code table as six 256-byte arrays +// as described in Section 7 of RFC 3284. Each instruction code +// can represent up to two delta instructions, which is why inst, +// size, and mode each appear twice. Each of the two delta instructions +// has the following three attributes: +// * inst (NOOP, ADD, RUN, or COPY) +// * size (0-255 bytes) of the data to be copied; if this value is zero, then +// the size will be encoded separately from the instruction code, as a Varint +// * mode (SELF, HERE, NEAR(n), or SAME(n)), only used for COPY instructions +// +// Every valid code table should contain AT LEAST the following instructions: +// inst1=ADD size1=0 mode1=X inst2=NOOP size2=X mode2=X +// inst1=RUN size1=0 mode1=X inst2=NOOP size2=X mode2=X +// inst1=COPY size1=0 mode1=N inst2=NOOP size2=X mode2=X (for all N) +// ... where X represents a "don't care" value which will not be read, +// and N stands for every possible COPY mode between 0 and +// ([same cache size] + [here cache size]) inclusive. +// Without these instructions, it will be impossible to guarantee that +// all ADD, RUN, and COPY encoding requests can be fulfilled. +// +struct VCDiffCodeTableData { + static const int kCodeTableSize = 256; + + static const VCDiffCodeTableData kDefaultCodeTableData; + + // Validates that the data contained in the VCDiffCodeTableData structure + // does not violate certain assumptions. Returns true if none of these + // assumptions are violated, or false if an unexpected value is found. + // This function should be called on any non-default code table that is + // received as part of an encoded transmission. + // max_mode is the maximum value for the mode of a COPY instruction; + // this is equal to same_cache_size + near_cache_size + 1. + // + bool Validate(unsigned char max_mode) const; + + // This version of Validate() assumes that the default address cache sizes + // are being used, and calculates max_mode based on that assumption. + bool Validate() const; + + // The names of these elements are taken from RFC 3284 section 5.4 + // (Instruction Codes), which contains the following specification: + // + // Each instruction code entry contains six fields, each of which is a single + // byte with an unsigned value: + // +-----------------------------------------------+ + // | inst1 | size1 | mode1 | inst2 | size2 | mode2 | + // +-----------------------------------------------+ + // + unsigned char inst1[kCodeTableSize]; // from enum VCDiffInstructionType + unsigned char inst2[kCodeTableSize]; // from enum VCDiffInstructionType + unsigned char size1[kCodeTableSize]; + unsigned char size2[kCodeTableSize]; + unsigned char mode1[kCodeTableSize]; // from enum VCDiffModes + unsigned char mode2[kCodeTableSize]; // from enum VCDiffModes + + private: + // Single-letter abbreviations that make it easier to read + // the default code table data. + static const VCDiffInstructionType N = VCD_NOOP; + static const VCDiffInstructionType A = VCD_ADD; + static const VCDiffInstructionType R = VCD_RUN; + static const VCDiffInstructionType C = VCD_COPY; + + static bool ValidateOpcode(int opcode, + unsigned char inst, + unsigned char size, + unsigned char mode, + unsigned char max_mode, + const char* first_or_second); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_CODETABLE_H_ diff --git a/src/codetable_test.cc b/src/codetable_test.cc new file mode 100644 index 0000000..8e716fb --- /dev/null +++ b/src/codetable_test.cc @@ -0,0 +1,254 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Unit tests for struct VCDiffCodeTableData, found in codetable.h. + +#include <config.h> +#include "codetable.h" +#include "addrcache.h" +#include "testing.h" + +namespace open_vcdiff { +namespace { + +class CodeTableTest : public testing::Test { + protected: + CodeTableTest() + : code_table_data_(VCDiffCodeTableData::kDefaultCodeTableData) { } + + virtual ~CodeTableTest() { } + + virtual void SetUp() { + // Default code table must pass + EXPECT_TRUE(ValidateCodeTable()); + } + + static void AddExerciseOpcode(unsigned char inst1, + unsigned char mode1, + unsigned char size1, + unsigned char inst2, + unsigned char mode2, + unsigned char size2, + int opcode) { + g_exercise_code_table_->inst1[opcode] = inst1; + g_exercise_code_table_->mode1[opcode] = mode1; + g_exercise_code_table_->size1[opcode] = (inst1 == VCD_NOOP) ? 0 : size1; + g_exercise_code_table_->inst2[opcode] = inst2; + g_exercise_code_table_->mode2[opcode] = mode2; + g_exercise_code_table_->size2[opcode] = (inst2 == VCD_NOOP) ? 0 : size2; + } + + static void SetUpTestCase() { + g_exercise_code_table_ = new VCDiffCodeTableData; + int opcode = 0; + for (unsigned char inst_mode1 = 0; + inst_mode1 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode1) { + unsigned char inst1 = inst_mode1; + unsigned char mode1 = 0; + if (inst_mode1 > VCD_COPY) { + inst1 = VCD_COPY; + mode1 = inst_mode1 - VCD_COPY; + } + for (unsigned char inst_mode2 = 0; + inst_mode2 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode2) { + unsigned char inst2 = inst_mode2; + unsigned char mode2 = 0; + if (inst_mode2 > VCD_COPY) { + inst2 = VCD_COPY; + mode2 = inst_mode2 - VCD_COPY; + } + AddExerciseOpcode(inst1, mode1, 0, inst2, mode2, 0, opcode++); + AddExerciseOpcode(inst1, mode1, 0, inst2, mode2, 255, opcode++); + AddExerciseOpcode(inst1, mode1, 255, inst2, mode2, 0, opcode++); + AddExerciseOpcode(inst1, mode1, 255, inst2, mode2, 255, opcode++); + } + } + // This is a CHECK rather than an EXPECT because it validates only + // the logic of the test, not of the code being tested. + CHECK_EQ(VCDiffCodeTableData::kCodeTableSize, opcode); + + EXPECT_TRUE(VCDiffCodeTableData::kDefaultCodeTableData.Validate()); + EXPECT_TRUE(g_exercise_code_table_->Validate(kLastExerciseMode)); + } + + static void TearDownTestCase() { + delete g_exercise_code_table_; + } + + void VerifyInstruction(unsigned char opcode, + unsigned char inst, + unsigned char size, + unsigned char mode) { + EXPECT_EQ(inst, code_table_data_.inst1[opcode]); + EXPECT_EQ(size, code_table_data_.size1[opcode]); + EXPECT_EQ(mode, code_table_data_.mode1[opcode]); + EXPECT_EQ(VCD_NOOP, code_table_data_.inst2[opcode]); + EXPECT_EQ(0, code_table_data_.size2[opcode]); + EXPECT_EQ(0, code_table_data_.mode2[opcode]); + } + + bool ValidateCodeTable() { + return code_table_data_.Validate(); + } + + // This value is designed so that the total number of inst values and modes + // will equal 8 (VCD_NOOP, VCD_ADD, VCD_RUN, VCD_COPY modes 0 - 4). + // Eight combinations of inst and mode, times two possible size values, + // squared (because there are two instructions per opcode), makes + // exactly 256 possible instruction combinations, which fits kCodeTableSize + // (the number of opcodes in the table.) + static const int kLastExerciseMode = 4; + + // A code table that exercises as many combinations as possible: + // 2 instructions, each is a NOOP, ADD, RUN, or one of 5 copy modes + // (== 8 total combinations of inst and mode), and each has + // size == 0 or 255 (2 possibilities.) + static VCDiffCodeTableData* g_exercise_code_table_; + + // The code table used by the current test. + VCDiffCodeTableData code_table_data_; +}; + +VCDiffCodeTableData* CodeTableTest::g_exercise_code_table_ = NULL; + +// These tests make sure that ValidateCodeTable() catches particular +// error conditions in a custom code table. + +// All possible combinations of inst and mode should have an opcode with size 0. +TEST_F(CodeTableTest, MissingCopyMode) { + VerifyInstruction(/* opcode */ 131, VCD_COPY, /* size */ 0, /* mode */ 7); + code_table_data_.size1[131] = 0xFF; + // Now there is no opcode expressing COPY with mode 7 and size 0. + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, MissingAdd) { + VerifyInstruction(/* opcode */ 1, VCD_ADD, /* size */ 0, /* mode */ 0); + code_table_data_.size1[1] = 0xFF; // Add size 0 => size 255 + // Now there is no opcode expressing ADD with size 0. + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, MissingRun) { + VerifyInstruction(/* opcode */ 0, VCD_RUN, /* size */ 0, /* mode */ 0); + code_table_data_.size1[0] = 0xFF; // Run size 0 => size 255 + // Now there is no opcode expressing RUN with size 0. + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, BadOpcode) { + VerifyInstruction(/* opcode */ 0, VCD_RUN, /* size */ 0, /* mode */ 0); + code_table_data_.inst1[0] = VCD_LAST_INSTRUCTION_TYPE + 1; + EXPECT_FALSE(ValidateCodeTable()); + code_table_data_.inst1[0] = 0xFF; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, BadMode) { + VerifyInstruction(/* opcode */ 131, VCD_COPY, /* size */ 0, /* mode */ 7); + code_table_data_.mode1[131] = VCDiffAddressCache::DefaultLastMode() + 1; + EXPECT_FALSE(ValidateCodeTable()); + code_table_data_.mode1[131] = 0xFF; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, AddWithNonzeroMode) { + VerifyInstruction(/* opcode */ 1, VCD_ADD, /* size */ 0, /* mode */ 0); + code_table_data_.mode1[1] = 1; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, RunWithNonzeroMode) { + VerifyInstruction(/* opcode */ 0, VCD_RUN, /* size */ 0, /* mode */ 0); + code_table_data_.mode1[0] = 1; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, NoOpWithNonzeroMode) { + VerifyInstruction(/* opcode */ 20, VCD_COPY, /* size */ 4, /* mode */ 0); + code_table_data_.inst1[20] = VCD_NOOP; + code_table_data_.mode1[20] = 0; + code_table_data_.size1[20] = 0; + EXPECT_TRUE(ValidateCodeTable()); + code_table_data_.mode1[20] = 1; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, NoOpWithNonzeroSize) { + VerifyInstruction(/* opcode */ 20, VCD_COPY, /* size */ 4, /* mode */ 0); + code_table_data_.inst1[20] = VCD_NOOP; + code_table_data_.mode1[20] = 0; + code_table_data_.size1[20] = 0; + EXPECT_TRUE(ValidateCodeTable()); + code_table_data_.size1[20] = 1; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, BadSecondOpcode) { + VerifyInstruction(/* opcode */ 20, VCD_COPY, /* size */ 4, /* mode */ 0); + code_table_data_.inst2[20] = VCD_LAST_INSTRUCTION_TYPE + 1; + EXPECT_FALSE(ValidateCodeTable()); + code_table_data_.inst2[20] = 0xFF; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, BadSecondMode) { + VerifyInstruction(/* opcode */ 20, VCD_COPY, /* size */ 4, /* mode */ 0); + code_table_data_.inst2[20] = VCD_COPY; + EXPECT_TRUE(ValidateCodeTable()); + code_table_data_.mode2[20] = VCDiffAddressCache::DefaultLastMode() + 1; + EXPECT_FALSE(ValidateCodeTable()); + code_table_data_.mode2[20] = 0xFF; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, AddSecondWithNonzeroMode) { + VerifyInstruction(/* opcode */ 20, VCD_COPY, /* size */ 4, /* mode */ 0); + code_table_data_.inst2[20] = VCD_ADD; + EXPECT_TRUE(ValidateCodeTable()); + code_table_data_.mode2[20] = 1; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, RunSecondWithNonzeroMode) { + VerifyInstruction(/* opcode */ 20, VCD_COPY, /* size */ 4, /* mode */ 0); + code_table_data_.inst2[20] = VCD_RUN; + EXPECT_TRUE(ValidateCodeTable()); + code_table_data_.mode2[20] = 1; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, SecondNoOpWithNonzeroMode) { + VerifyInstruction(/* opcode */ 20, VCD_COPY, /* size */ 4, /* mode */ 0); + EXPECT_EQ(VCD_NOOP, code_table_data_.inst2[20]); + code_table_data_.mode2[20] = 1; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, SecondNoOpWithNonzeroSize) { + VerifyInstruction(/* opcode */ 20, VCD_COPY, /* size */ 4, /* mode */ 0); + EXPECT_EQ(VCD_NOOP, code_table_data_.inst2[20]); + code_table_data_.size2[20] = 1; + EXPECT_FALSE(ValidateCodeTable()); +} + +TEST_F(CodeTableTest, ValidateExerciseCodeTable) { + EXPECT_TRUE(g_exercise_code_table_->Validate(kLastExerciseMode)); +} + +} // unnamed namespace +} // namespace open_vcdiff diff --git a/src/compile_assert.h b/src/compile_assert.h new file mode 100644 index 0000000..f4ba293 --- /dev/null +++ b/src/compile_assert.h @@ -0,0 +1,78 @@ +// Copyright 2008 Google Inc. +// Authors: Zhanyong Wan, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_COMPILE_ASSERT_H_ +#define OPEN_VCDIFF_COMPILE_ASSERT_H_ + +#include <config.h> + +// The COMPILE_ASSERT macro can be used to verify that a compile-time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// COMPILE_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES, +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large); +// +// For the second argument to COMPILE_ASSERT, the programmer should supply +// a variable name that meets C++ naming rules, but that provides +// a description of the compile-time rule that has been violated. +// (In the example above, the name used is "foo_too_large".) +// If the expression is false, most compilers will issue a warning/error +// containing the name of the variable. +// This refinement (adding a descriptive variable name argument) +// is what differentiates COMPILE_ASSERT from Boost static asserts. + +template <bool> +struct CompileAssert { +}; + +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<static_cast<bool>(expr)> \ + msg[static_cast<bool>(expr) ? 1 : -1] + +// Implementation details of COMPILE_ASSERT: +// +// - COMPILE_ASSERT works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(static_cast<bool>(expr))>, we ensure that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The array size is (static_cast<bool>(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + +#endif // OPEN_VCDIFF_COMPILE_ASSERT_H_ diff --git a/src/config.h.in b/src/config.h.in new file mode 100644 index 0000000..004b792 --- /dev/null +++ b/src/config.h.in @@ -0,0 +1,125 @@ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the <ext/rope> header file. */ +#undef HAVE_EXT_ROPE + +/* Define to 1 if you have the <fnmatch.h> header file. */ +#undef HAVE_FNMATCH_H + +/* Define to 1 if you have the <getopt.h> header file. */ +#undef HAVE_GETOPT_H + +/* Define to 1 if you have the `gettimeofday' function. */ +#undef HAVE_GETTIMEOFDAY + +/* Define to 1 if you have the `InitializeCriticalSection' function. */ +#undef HAVE_INITIALIZECRITICALSECTION + +/* Define to 1 if you have the `InterlockedCompareExchange' function. */ +#undef HAVE_INTERLOCKEDCOMPAREEXCHANGE + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <malloc.h> header file. */ +#undef HAVE_MALLOC_H + +/* Define to 1 if you have the `memalign' function. */ +#undef HAVE_MEMALIGN + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `mprotect' function. */ +#undef HAVE_MPROTECT + +/* Define to 1 if you have the `posix_memalign' function. */ +#undef HAVE_POSIX_MEMALIGN + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Define to 1 if you have the `QueryPerformanceCounter' function. */ +#undef HAVE_QUERYPERFORMANCECOUNTER + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the `strtoll' function. */ +#undef HAVE_STRTOLL + +/* Define to 1 if you have the `strtoq' function. */ +#undef HAVE_STRTOQ + +/* Define to 1 if you have the <sys/mman.h> header file. */ +#undef HAVE_SYS_MMAN_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/time.h> header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if the system has the type `uint16_t'. */ +#undef HAVE_UINT16_T + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the system has the type `u_int16_t'. */ +#undef HAVE_U_INT16_T + +/* Define to 1 if you have the <windows.h> header file. */ +#undef HAVE_WINDOWS_H + +/* define if your compiler has __attribute__ */ +#undef HAVE___ATTRIBUTE__ + +/* Define to 1 if the system has the type `__int16'. */ +#undef HAVE___INT16 + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Use custom compare function instead of memcmp */ +#undef VCDIFF_USE_BLOCK_COMPARE_WORDS + +/* Version number of package */ +#undef VERSION diff --git a/src/decodetable.cc b/src/decodetable.cc new file mode 100644 index 0000000..ebcec6b --- /dev/null +++ b/src/decodetable.cc @@ -0,0 +1,114 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// VCDiffCodeTableReader is a class to interpret a stream of opcodes +// as VCDIFF instruction types, based on a VCDiffCodeTableData structure. + +#include <config.h> +#include "decodetable.h" +#include "codetable.h" +#include "logging.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +VCDiffCodeTableReader::VCDiffCodeTableReader() + : code_table_data_(&VCDiffCodeTableData::kDefaultCodeTableData), + non_default_code_table_data_(NULL), + instructions_and_sizes_(NULL), + instructions_and_sizes_end_(NULL), + last_instruction_start_(NULL), + pending_second_instruction_(kNoOpcode), + last_pending_second_instruction_(kNoOpcode) { +} + +bool VCDiffCodeTableReader::UseCodeTable( + const VCDiffCodeTableData& code_table_data, unsigned char max_mode) { + if (!code_table_data.Validate(max_mode)) return false; + if (!non_default_code_table_data_.get()) { + non_default_code_table_data_.reset(new VCDiffCodeTableData); + } + *non_default_code_table_data_ = code_table_data; + code_table_data_ = non_default_code_table_data_.get(); + return true; +} + +VCDiffInstructionType VCDiffCodeTableReader::GetNextInstruction( + int32_t* size, + unsigned char* mode) { + if (!instructions_and_sizes_) { + LOG(ERROR) << "Internal error: GetNextInstruction() called before Init()" + << LOG_ENDL; + return VCD_INSTRUCTION_ERROR; + } + last_instruction_start_ = *instructions_and_sizes_; + last_pending_second_instruction_ = pending_second_instruction_; + unsigned char opcode = 0; + unsigned char instruction_type = VCD_NOOP; + int32_t instruction_size = 0; + unsigned char instruction_mode = 0; + do { + if (pending_second_instruction_ != kNoOpcode) { + // There is a second instruction left over + // from the most recently processed opcode. + opcode = static_cast<unsigned char>(pending_second_instruction_); + pending_second_instruction_ = kNoOpcode; + instruction_type = code_table_data_->inst2[opcode]; + instruction_size = code_table_data_->size2[opcode]; + instruction_mode = code_table_data_->mode2[opcode]; + break; + } + if (*instructions_and_sizes_ >= instructions_and_sizes_end_) { + // Ran off end of instruction stream + return VCD_INSTRUCTION_END_OF_DATA; + } + opcode = **instructions_and_sizes_; + if (code_table_data_->inst2[opcode] != VCD_NOOP) { + // This opcode contains two instructions; process the first one now, and + // save a pointer to the second instruction, which should be returned + // by the next call to GetNextInstruction + pending_second_instruction_ = **instructions_and_sizes_; + } + ++(*instructions_and_sizes_); + instruction_type = code_table_data_->inst1[opcode]; + instruction_size = code_table_data_->size1[opcode]; + instruction_mode = code_table_data_->mode1[opcode]; + // This do-while loop is necessary in case inst1 == VCD_NOOP for an opcode + // that was actually used in the encoding. That case is unusual, but it + // is not prohibited by the standard. + } while (instruction_type == VCD_NOOP); + if (instruction_size == 0) { + // Parse the size as a Varint in the instruction stream. + switch (*size = VarintBE<int32_t>::Parse(instructions_and_sizes_end_, + instructions_and_sizes_)) { + case RESULT_ERROR: + LOG(ERROR) << "Instruction size is not a valid variable-length integer" + << LOG_ENDL; + return VCD_INSTRUCTION_ERROR; + case RESULT_END_OF_DATA: + UnGetInstruction(); // Rewind to instruction start + return VCD_INSTRUCTION_END_OF_DATA; + default: + break; // Successfully parsed Varint + } + } else { + *size = instruction_size; + } + *mode = instruction_mode; + return static_cast<VCDiffInstructionType>(instruction_type); +} + +}; // namespace open_vcdiff diff --git a/src/decodetable.h b/src/decodetable.h new file mode 100644 index 0000000..3ba997a --- /dev/null +++ b/src/decodetable.h @@ -0,0 +1,152 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_DECODETABLE_H_ +#define OPEN_VCDIFF_DECODETABLE_H_ + +#include <config.h> +#include <stdint.h> // int32_t +#include <cstddef> // NULL +#include <memory> // auto_ptr +#include "codetable.h" // VCDiffInstructi... +#include "logging.h" + +namespace open_vcdiff { + +// This class is used by the decoder. It can use a standard or +// non-standard code table, and will translate the opcodes in the code table +// into delta instructions. +// +// NOT threadsafe. +// +class VCDiffCodeTableReader { + public: + // When constructed, the object will be set up to use the default code table. + // If a non-default code table is to be used, then UseCodeTable() + // should be called after the VCDiffCodeTableReader has been constructed. + // In any case, the Init() method must be called before GetNextInstruction() + // may be used. + // + VCDiffCodeTableReader(); + + // Sets up a non-standard code table. The caller + // may free the memory occupied by the argument code table after + // passing it to this method, because the argument code table + // allocates space to store a copy of it. + // UseCodeTable() may be called either before or after calling Init(). + // Returns true if the code table was accepted, or false if the + // argument did not appear to be a valid code table. + // + bool UseCodeTable(const VCDiffCodeTableData& code_table_data, + unsigned char max_mode); + + // Defines the buffer containing the instructions and sizes. + // This method must be called before GetNextInstruction() may be used. + // Init() may be called any number of times to reset the state of + // the object. + // + void Init(const char** instructions_and_sizes, + const char* instructions_and_sizes_end) { + instructions_and_sizes_ = instructions_and_sizes; + instructions_and_sizes_end_ = instructions_and_sizes_end; + last_instruction_start_ = NULL; + pending_second_instruction_ = kNoOpcode; + last_pending_second_instruction_ = kNoOpcode; + } + + // Updates the pointers to the buffer containing the instructions and sizes, + // but leaves the rest of the reader state intact, so that (for example) + // any pending second instruction or unread instruction will still be + // read when requested. NOTE: UnGetInstruction() will not work immediately + // after using UpdatePointers(); GetNextInstruction() must be called first. + // + void UpdatePointers(const char** instructions_and_sizes, + const char* instructions_and_sizes_end) { + instructions_and_sizes_ = instructions_and_sizes; + instructions_and_sizes_end_ = instructions_and_sizes_end; + last_instruction_start_ = *instructions_and_sizes; + // pending_second_instruction_ is unchanged + last_pending_second_instruction_ = pending_second_instruction_; + } + + // Returns the next instruction from the stream of opcodes, + // or VCD_INSTRUCTION_END_OF_DATA if the end of the opcode stream is reached, + // or VCD_INSTRUCTION_ERROR if an error occurred. + // In the first of these cases, increments *instructions_and_sizes_ + // past the values it reads, and populates *size + // with the corresponding size for the returned instruction; + // otherwise, the value of *size is undefined, and is not + // guaranteed to be preserved. + // If the instruction returned is VCD_COPY, *mode will + // be populated with the copy mode; otherwise, the value of *mode + // is undefined, and is not guaranteed to be preserved. + // Any occurrences of VCD_NOOP in the opcode stream + // are skipped over and ignored, not returned. + // If Init() was not called before calling this method, then + // VCD_INSTRUCTION_ERROR will be returned. + // + VCDiffInstructionType GetNextInstruction(int32_t* size, unsigned char* mode); + + // Puts a single instruction back onto the front of the + // instruction stream. The next call to GetNextInstruction() + // will return the same value that was returned by the last + // call. Calling UnGetInstruction() more than once before calling + // GetNextInstruction() will have no additional effect; you can + // only rewind one instruction. + // + void UnGetInstruction() { + if (last_instruction_start_) { + if (last_instruction_start_ > *instructions_and_sizes_) { + LOG(DFATAL) << "Internal error: last_instruction_start past end of " + "instructions_and_sizes in UnGetInstruction" << LOG_ENDL; + } + *instructions_and_sizes_ = last_instruction_start_; + if ((pending_second_instruction_ != kNoOpcode) && + (last_pending_second_instruction_ != kNoOpcode)) { + LOG(DFATAL) << "Internal error: two pending instructions in a row " + "in UnGetInstruction" << LOG_ENDL; + } + pending_second_instruction_ = last_pending_second_instruction_; + } + } + + private: + // A pointer to the code table. This is the object that will be used + // to interpret opcodes in GetNextInstruction(). + const VCDiffCodeTableData* code_table_data_; + + // If the default code table is not being used, then space for the + // code table data will be allocated using this pointer and freed + // when the VCDiffCodeTableReader is destroyed. This will keep the + // code that uses the object from having to worry about memory + // management for the non-standard code table, whose contents have + // been read as part of the encoded data file/stream. + // + std::auto_ptr<VCDiffCodeTableData> non_default_code_table_data_; + + const char** instructions_and_sizes_; + const char* instructions_and_sizes_end_; + const char* last_instruction_start_; + OpcodeOrNone pending_second_instruction_; + OpcodeOrNone last_pending_second_instruction_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffCodeTableReader(const VCDiffCodeTableReader&); + void operator=(const VCDiffCodeTableReader&); +}; + +}; // namespace open_vcdiff + +#endif // OPEN_VCDIFF_DECODETABLE_H_ diff --git a/src/decodetable_test.cc b/src/decodetable_test.cc new file mode 100644 index 0000000..214cb52 --- /dev/null +++ b/src/decodetable_test.cc @@ -0,0 +1,464 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Unit tests for the class VCDiffCodeTableReader, found in decodetable.h. + +#include <config.h> +#include "decodetable.h" +#include <stdint.h> // int32_t +#include <vector> +#include "addrcache.h" +#include "codetable.h" +#include "testing.h" +#include "varint_bigendian.h" + +namespace open_vcdiff { +namespace { + +class DecodeTableTest : public testing::Test { + protected: + DecodeTableTest() + : instructions_and_sizes_(instruction_buffer_size), + found_size_(0), + found_mode_(0) { + instructions_and_sizes_ptr_ = &instructions_and_sizes_[0]; + reader_.Init(&instructions_and_sizes_ptr_, + instructions_and_sizes_ptr_ + instruction_buffer_size); + } + + static void AddExerciseOpcode(unsigned char inst1, + unsigned char mode1, + unsigned char size1, + unsigned char inst2, + unsigned char mode2, + unsigned char size2, + int opcode) { + g_exercise_code_table_->inst1[opcode] = inst1; + g_exercise_code_table_->mode1[opcode] = mode1; + g_exercise_code_table_->size1[opcode] = (inst1 == VCD_NOOP) ? 0 : size1; + g_exercise_code_table_->inst2[opcode] = inst2; + g_exercise_code_table_->mode2[opcode] = mode2; + g_exercise_code_table_->size2[opcode] = (inst2 == VCD_NOOP) ? 0 : size2; + } + + static void SetUpTestCase() { + g_exercise_code_table_ = new VCDiffCodeTableData; + int opcode = 0; + for (unsigned char inst_mode1 = 0; + inst_mode1 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode1) { + unsigned char inst1 = inst_mode1; + unsigned char mode1 = 0; + if (inst_mode1 > VCD_COPY) { + inst1 = VCD_COPY; + mode1 = inst_mode1 - VCD_COPY; + } + for (unsigned char inst_mode2 = 0; + inst_mode2 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode2) { + unsigned char inst2 = inst_mode2; + unsigned char mode2 = 0; + if (inst_mode2 > VCD_COPY) { + inst2 = VCD_COPY; + mode2 = inst_mode2 - VCD_COPY; + } + AddExerciseOpcode(inst1, mode1, 0, inst2, mode2, 0, opcode++); + AddExerciseOpcode(inst1, mode1, 0, inst2, mode2, 255, opcode++); + AddExerciseOpcode(inst1, mode1, 255, inst2, mode2, 0, opcode++); + AddExerciseOpcode(inst1, mode1, 255, inst2, mode2, 255, opcode++); + } + } + if (VCDiffCodeTableData::kCodeTableSize != opcode) { + LOG(FATAL) << "Internal error in test: The nested loops in " + "DecodeTableTest::SetUpTestCase should iterate through " + "exactly " << VCDiffCodeTableData::kCodeTableSize + << " opcodes."; + return; + } + EXPECT_TRUE(VCDiffCodeTableData::kDefaultCodeTableData.Validate()); + EXPECT_TRUE(g_exercise_code_table_->Validate(kLastExerciseMode)); + } + + static void TearDownTestCase() { + delete g_exercise_code_table_; + } + + void VerifyInstModeSize(unsigned char inst, + unsigned char mode, + unsigned char size, + unsigned char opcode) { + if (inst == VCD_NOOP) return; // GetNextInstruction skips NOOPs + int32_t found_size = 0; + unsigned char found_mode = 0; + unsigned char found_inst = reader_.GetNextInstruction(&found_size, + &found_mode); + EXPECT_EQ(inst, found_inst); + EXPECT_EQ(mode, found_mode); + if (size == 0) { + EXPECT_EQ(1000 + opcode, found_size); + } else { + EXPECT_EQ(size, found_size); + } + } + + void VerifyInstModeSize1(unsigned char inst, + unsigned char mode, + unsigned char size, + unsigned char opcode) { + if (inst == VCD_NOOP) size = 0; + EXPECT_EQ(g_exercise_code_table_->inst1[opcode], inst); + EXPECT_EQ(g_exercise_code_table_->mode1[opcode], mode); + EXPECT_EQ(g_exercise_code_table_->size1[opcode], size); + VerifyInstModeSize(inst, mode, size, opcode); + } + + void VerifyInstModeSize2(unsigned char inst, + unsigned char mode, + unsigned char size, + unsigned char opcode) { + if (inst == VCD_NOOP) size = 0; + EXPECT_EQ(g_exercise_code_table_->inst2[opcode], inst); + EXPECT_EQ(g_exercise_code_table_->mode2[opcode], mode); + EXPECT_EQ(g_exercise_code_table_->size2[opcode], size); + VerifyInstModeSize(inst, mode, size, opcode); + } + + // This value is designed so that the total number of inst values and modes + // will equal 8 (VCD_NOOP, VCD_ADD, VCD_RUN, VCD_COPY modes 0 - 4). + // Eight combinations of inst and mode, times two possible size values, + // squared (because there are two instructions per opcode), makes + // exactly 256 possible instruction combinations, which fits kCodeTableSize + // (the number of opcodes in the table.) + static const int kLastExerciseMode = 4; + + // The buffer size (in bytes) needed to store kCodeTableSize opcodes plus + // up to kCodeTableSize VarintBE-encoded size values. + static const int instruction_buffer_size; + + // A code table that exercises as many combinations as possible: + // 2 instructions, each is a NOOP, ADD, RUN, or one of 5 copy modes + // (== 8 total combinations of inst and mode), and each has + // size == 0 or 255 (2 possibilities.) + static VCDiffCodeTableData* g_exercise_code_table_; + + VCDiffCodeTableReader reader_; + + // A buffer to which instructions and sizes will be added manually + // in order to exercise VCDiffCodeTableReader. + std::vector<char> instructions_and_sizes_; + + // The buffer pointer used by the VCDiffCodeTableReader. + const char* instructions_and_sizes_ptr_; + + // The size and mode returned by GetNextInstruction(). + int32_t found_size_; + unsigned char found_mode_; +}; + +VCDiffCodeTableData* DecodeTableTest::g_exercise_code_table_ = NULL; + +const int DecodeTableTest::instruction_buffer_size = + VCDiffCodeTableData::kCodeTableSize * + (1 + (VarintBE<VCDAddress>::kMaxBytes)); + +TEST_F(DecodeTableTest, ReadAdd) { + instructions_and_sizes_[0] = 1; + VarintBE<VCDAddress>::Encode(257, &instructions_and_sizes_[1]); + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(257, found_size_); + EXPECT_EQ(0, found_mode_); +} + +TEST_F(DecodeTableTest, ReadRun) { + instructions_and_sizes_[0] = 0; + VarintBE<VCDAddress>::Encode(111, &instructions_and_sizes_[1]); + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_RUN, found_inst); + EXPECT_EQ(111, found_size_); + EXPECT_EQ(0, found_mode_); +} + +TEST_F(DecodeTableTest, ReadCopy) { + instructions_and_sizes_[0] = 58; + instructions_and_sizes_[1] = 0; + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(10, found_size_); + EXPECT_EQ(2, found_mode_); +} + +TEST_F(DecodeTableTest, ReadAddCopy) { + instructions_and_sizes_[0] = 175; + instructions_and_sizes_[1] = 0; + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(1, found_size_); + EXPECT_EQ(0, found_mode_); + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(1, found_mode_); +} + +TEST_F(DecodeTableTest, ReadCopyAdd) { + instructions_and_sizes_[0] = 255; + instructions_and_sizes_[1] = 0; + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(8, found_mode_); + found_mode_ = 0; + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(1, found_size_); + EXPECT_EQ(0, found_mode_); +} + +TEST_F(DecodeTableTest, UnGetAdd) { + instructions_and_sizes_[0] = 1; + instructions_and_sizes_[1] = 255; + VarintBE<VCDAddress>::Encode(257, &instructions_and_sizes_[1]); + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(257, found_size_); + EXPECT_EQ(0, found_mode_); + reader_.UnGetInstruction(); + found_size_ = 0; + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(257, found_size_); + EXPECT_EQ(0, found_mode_); +} + +TEST_F(DecodeTableTest, UnGetCopy) { + instructions_and_sizes_[0] = 58; + instructions_and_sizes_[1] = 0; + instructions_and_sizes_[2] = 255; + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(10, found_size_); + EXPECT_EQ(2, found_mode_); + reader_.UnGetInstruction(); + found_size_ = 0; + found_mode_ = 0; + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(10, found_size_); + EXPECT_EQ(2, found_mode_); +} + +TEST_F(DecodeTableTest, UnGetCopyAdd) { + instructions_and_sizes_[0] = 255; + instructions_and_sizes_[1] = 0; + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(8, found_mode_); + reader_.UnGetInstruction(); + found_mode_ = 0; + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(8, found_mode_); + found_mode_ = 0; + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(1, found_size_); + EXPECT_EQ(0, found_mode_); +} + +TEST_F(DecodeTableTest, UnGetTwice) { + instructions_and_sizes_[0] = 255; + instructions_and_sizes_[1] = 0; + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(8, found_mode_); + reader_.UnGetInstruction(); + reader_.UnGetInstruction(); + found_mode_ = 0; + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(8, found_mode_); + found_mode_ = 0; + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(1, found_size_); + EXPECT_EQ(0, found_mode_); +} + +TEST_F(DecodeTableTest, UnGetBeforeGet) { + instructions_and_sizes_[0] = 255; + instructions_and_sizes_[1] = 0; + reader_.UnGetInstruction(); + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(8, found_mode_); +} + +TEST_F(DecodeTableTest, UnGetAddCopy) { + instructions_and_sizes_[0] = 175; + instructions_and_sizes_[1] = 0; + unsigned char found_inst = reader_.GetNextInstruction(&found_size_, + &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(1, found_size_); + EXPECT_EQ(0, found_mode_); + reader_.UnGetInstruction(); + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_ADD, found_inst); + EXPECT_EQ(1, found_size_); + EXPECT_EQ(0, found_mode_); + found_inst = reader_.GetNextInstruction(&found_size_, &found_mode_); + EXPECT_EQ(VCD_COPY, found_inst); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(1, found_mode_); +} + +TEST_F(DecodeTableTest, ReReadIncomplete) { + instructions_and_sizes_[0] = 175; // Add(1) + Copy1(4) + instructions_and_sizes_[1] = 1; // Add(0) + instructions_and_sizes_[2] = 111; // with size 111 + instructions_and_sizes_[3] = 255; // Copy8(4) + Add(1) + + reader_.Init(&instructions_and_sizes_ptr_, + instructions_and_sizes_ptr_ + 0); // 0 bytes available + EXPECT_EQ(VCD_INSTRUCTION_END_OF_DATA, + reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(&instructions_and_sizes_[0], instructions_and_sizes_ptr_); + + reader_.Init(&instructions_and_sizes_ptr_, + instructions_and_sizes_ptr_ + 1); // 1 more byte available + EXPECT_EQ(VCD_ADD, reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(1, found_size_); + EXPECT_EQ(0, found_mode_); + EXPECT_EQ(VCD_COPY, reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(1, found_mode_); + EXPECT_EQ(VCD_INSTRUCTION_END_OF_DATA, + reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(&instructions_and_sizes_[1], instructions_and_sizes_ptr_); + + reader_.Init(&instructions_and_sizes_ptr_, + instructions_and_sizes_ptr_ + 1); // 1 more byte available + // The opcode is available, but the separately encoded size is not + EXPECT_EQ(VCD_INSTRUCTION_END_OF_DATA, + reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(&instructions_and_sizes_[1], instructions_and_sizes_ptr_); + + reader_.Init(&instructions_and_sizes_ptr_, + instructions_and_sizes_ptr_ + 2); // 2 more bytes available + EXPECT_EQ(VCD_ADD, reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(111, found_size_); + EXPECT_EQ(0, found_mode_); + EXPECT_EQ(VCD_INSTRUCTION_END_OF_DATA, + reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(&instructions_and_sizes_[3], instructions_and_sizes_ptr_); + + reader_.Init(&instructions_and_sizes_ptr_, + instructions_and_sizes_ptr_ + 1); // 1 more byte available + EXPECT_EQ(VCD_COPY, reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(4, found_size_); + EXPECT_EQ(8, found_mode_); + EXPECT_EQ(VCD_ADD, reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(1, found_size_); + EXPECT_EQ(0, found_mode_); + EXPECT_EQ(VCD_INSTRUCTION_END_OF_DATA, + reader_.GetNextInstruction(&found_size_, &found_mode_)); + EXPECT_EQ(&instructions_and_sizes_[4], instructions_and_sizes_ptr_); +} + +TEST_F(DecodeTableTest, ExerciseCodeTableReader) { + char* instruction_ptr = &instructions_and_sizes_[0]; + for (int opcode = 0; opcode < VCDiffCodeTableData::kCodeTableSize; ++opcode) { + *instruction_ptr = opcode; + ++instruction_ptr; + if ((g_exercise_code_table_->inst1[opcode] != VCD_NOOP) && + (g_exercise_code_table_->size1[opcode] == 0)) { + // A separately-encoded size value + int encoded_size = VarintBE<VCDAddress>::Encode(1000 + opcode, + instruction_ptr); + EXPECT_LT(0, encoded_size); + instruction_ptr += encoded_size; + } + if ((g_exercise_code_table_->inst2[opcode] != VCD_NOOP) && + (g_exercise_code_table_->size2[opcode] == 0)) { + int encoded_size = VarintBE<VCDAddress>::Encode(1000 + opcode, + instruction_ptr); + EXPECT_LT(0, encoded_size); + instruction_ptr += encoded_size; + } + } + EXPECT_TRUE(reader_.UseCodeTable(*g_exercise_code_table_, kLastExerciseMode)); + int opcode = 0; + // This loop has the same bounds as the one in SetUpTestCase. + // Iterate over the instruction types and make sure that the opcodes, + // interpreted in order, return exactly those instruction types. + for (unsigned char inst_mode1 = 0; + inst_mode1 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode1) { + unsigned char inst1 = inst_mode1; + unsigned char mode1 = 0; + if (inst_mode1 > VCD_COPY) { + inst1 = VCD_COPY; + mode1 = inst_mode1 - VCD_COPY; + } + for (unsigned char inst_mode2 = 0; + inst_mode2 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode2) { + unsigned char inst2 = inst_mode2; + unsigned char mode2 = 0; + if (inst_mode2 > VCD_COPY) { + inst2 = VCD_COPY; + mode2 = inst_mode2 - VCD_COPY; + } + VerifyInstModeSize1(inst1, mode1, 0, opcode); + VerifyInstModeSize2(inst2, mode2, 0, opcode); + ++opcode; + VerifyInstModeSize1(inst1, mode1, 0, opcode); + VerifyInstModeSize2(inst2, mode2, 255, opcode); + ++opcode; + VerifyInstModeSize1(inst1, mode1, 255, opcode); + VerifyInstModeSize2(inst2, mode2, 0, opcode); + ++opcode; + VerifyInstModeSize1(inst1, mode1, 255, opcode); + VerifyInstModeSize2(inst2, mode2, 255, opcode); + ++opcode; + } + } + if (VCDiffCodeTableData::kCodeTableSize != opcode) { + LOG(FATAL) << "Internal error in test: The nested loops in " + "DecodeTableTest::ExerciseCodeTableReader should iterate " + "through exactly " << VCDiffCodeTableData::kCodeTableSize + << " opcodes."; + return; + } +} + +} // unnamed namespace +} // namespace open_vcdiff diff --git a/src/encodetable.cc b/src/encodetable.cc new file mode 100644 index 0000000..dd8305a --- /dev/null +++ b/src/encodetable.cc @@ -0,0 +1,364 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "encodetable.h" +#include <string> +#include "addrcache.h" +#include "codetable.h" +#include "instruction_map.h" +#include "logging.h" +#include "google/output_string.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +// VCDiffCodeTableWriter members and methods + +// If interleaved is true, the encoder writes each delta file window +// by interleaving instructions and sizes with their corresponding +// addresses and data, rather than placing these elements into three +// separate sections. This facilitates providing partially +// decoded results when only a portion of a delta file window +// is received (e.g. when HTTP over TCP is used as the +// transmission protocol.) The interleaved format is +// not consistent with the VCDIFF draft standard. +// +VCDiffCodeTableWriter::VCDiffCodeTableWriter(bool interleaved) + : max_mode_(VCDiffAddressCache::DefaultLastMode()), + dictionary_size_(0), + target_length_(0), + code_table_data_(&VCDiffCodeTableData::kDefaultCodeTableData), + instruction_map_(NULL), + last_opcode_index_(-1), + add_checksum_(false), + checksum_(0), + match_counts_(kMaxMatchSize, 0) { + InitSectionPointers(interleaved); +} + +VCDiffCodeTableWriter::VCDiffCodeTableWriter( + bool interleaved, + int near_cache_size, + int same_cache_size, + const VCDiffCodeTableData& code_table_data, + unsigned char max_mode) + : max_mode_(max_mode), + address_cache_(near_cache_size, same_cache_size), + dictionary_size_(0), + target_length_(0), + code_table_data_(&code_table_data), + instruction_map_(NULL), + last_opcode_index_(-1), + add_checksum_(false), + checksum_(0) { + InitSectionPointers(interleaved); +} + +VCDiffCodeTableWriter::~VCDiffCodeTableWriter() { + if (code_table_data_ != &VCDiffCodeTableData::kDefaultCodeTableData) { + delete instruction_map_; + } +} + +void VCDiffCodeTableWriter::InitSectionPointers(bool interleaved) { + if (interleaved) { + data_for_add_and_run_ = &instructions_and_sizes_; + addresses_for_copy_ = &instructions_and_sizes_; + } else { + data_for_add_and_run_ = &separate_data_for_add_and_run_; + addresses_for_copy_ = &separate_addresses_for_copy_; + } +} + +bool VCDiffCodeTableWriter::Init(size_t dictionary_size) { + dictionary_size_ = dictionary_size; + if (!instruction_map_) { + if (code_table_data_ == &VCDiffCodeTableData::kDefaultCodeTableData) { + instruction_map_ = VCDiffInstructionMap::GetDefaultInstructionMap(); + } else { + instruction_map_ = new VCDiffInstructionMap(*code_table_data_, max_mode_); + } + if (!instruction_map_) { + return false; + } + } + if (!address_cache_.Init()) { + return false; + } + target_length_ = 0; + last_opcode_index_ = -1; + return true; +} + +// The VCDiff format allows each opcode to represent either +// one or two delta instructions. This function will first +// examine the opcode generated by the last call to EncodeInstruction. +// If that opcode was a single-instruction opcode, this function checks +// whether there is a compound (double-instruction) opcode that can +// combine that single instruction with the instruction that is now +// being added, and so save a byte of space. In that case, the +// single-instruction opcode at position last_opcode_index_ will be +// overwritten with the new double-instruction opcode. +// +// In the majority of cases, no compound opcode will be possible, +// and a new single-instruction opcode will be appended to +// instructions_and_sizes_, followed by a representation of its size +// if the opcode does not implicitly give the instruction size. +// +// As an example, say instructions_and_sizes_ contains 10 bytes, the last +// of which contains the opcode 0x02 (ADD size 1). Because that was the +// most recently added opcode, last_opcode_index_ has the value 10. +// EncodeInstruction is then called with inst = VCD_COPY, size = 4, mode = 0. +// The function will replace the old opcode 0x02 with the double-instruction +// opcode 0xA3 (ADD size 1 + COPY size 4 mode 0). +// +// All of the double-instruction opcodes in the standard code table +// have implicit sizes, meaning that the size of the instruction will not +// need to be written to the instructions_and_sizes_ string separately +// from the opcode. If a custom code table were used that did not have +// this property, then instructions_and_sizes_ might contain a +// double-instruction opcode (say, COPY size 0 mode 0 + ADD size 0) +// followed by the size of the COPY, then by the size of the ADD. +// If using the SDCH interleaved format, the address of the COPY instruction +// would follow its size, so the ordering would be +// [Compound Opcode][Size of COPY][Address of COPY][Size of ADD] +// +void VCDiffCodeTableWriter::EncodeInstruction(VCDiffInstructionType inst, + size_t size, + unsigned char mode) { + if (!instruction_map_) { + LOG(DFATAL) << "EncodeInstruction() called without calling Init()" + << LOG_ENDL; + return; + } + if (last_opcode_index_ >= 0) { + const unsigned char last_opcode = + instructions_and_sizes_[last_opcode_index_]; + // The encoding engine should not generate two ADD instructions in a row. + // This won't cause a failure, but it's inefficient encoding and probably + // represents a bug in the higher-level logic of the encoder. + if ((inst == VCD_ADD) && + (code_table_data_->inst1[last_opcode] == VCD_ADD)) { + LOG(WARNING) << "EncodeInstruction() called for two ADD instructions" + " in a row" << LOG_ENDL; + } + OpcodeOrNone compound_opcode = kNoOpcode; + if (size <= UCHAR_MAX) { + compound_opcode = + instruction_map_->LookupSecondOpcode(last_opcode, + inst, + static_cast<unsigned char>(size), + mode); + if (compound_opcode != kNoOpcode) { + instructions_and_sizes_[last_opcode_index_] = + static_cast<unsigned char>(compound_opcode); + last_opcode_index_ = -1; + return; + } + } + // Try finding a compound opcode with size 0. + compound_opcode = instruction_map_->LookupSecondOpcode(last_opcode, + inst, + 0, + mode); + if (compound_opcode != kNoOpcode) { + instructions_and_sizes_[last_opcode_index_] = + static_cast<unsigned char>(compound_opcode); + last_opcode_index_ = -1; + AppendSizeToString(size, &instructions_and_sizes_); + return; + } + } + OpcodeOrNone opcode = kNoOpcode; + if (size <= UCHAR_MAX) { + opcode = + instruction_map_->LookupFirstOpcode(inst, + static_cast<unsigned char>(size), + mode); + if (opcode != kNoOpcode) { + instructions_and_sizes_.push_back(static_cast<char>(opcode)); + last_opcode_index_ = static_cast<int>(instructions_and_sizes_.size() - 1); + return; + } + } + // There should always be an opcode with size 0. + opcode = instruction_map_->LookupFirstOpcode(inst, 0, mode); + if (opcode == kNoOpcode) { + LOG(DFATAL) << "No matching opcode found for inst " << inst + << ", mode " << mode << ", size 0" << LOG_ENDL; + return; + } + instructions_and_sizes_.push_back(static_cast<char>(opcode)); + last_opcode_index_ = static_cast<int>(instructions_and_sizes_.size() - 1); + AppendSizeToString(size, &instructions_and_sizes_); +} + +void VCDiffCodeTableWriter::Add(const char* data, size_t size) { + EncodeInstruction(VCD_ADD, size); + data_for_add_and_run_->append(data, size); + target_length_ += size; +} + +void VCDiffCodeTableWriter::Copy(int32_t offset, size_t size) { + if (!instruction_map_) { + LOG(DFATAL) << "VCDiffCodeTableWriter::Copy() called without calling Init()" + << LOG_ENDL; + return; + } + // If a single interleaved stream of encoded values is used + // instead of separate sections for instructions, addresses, and data, + // then the string instructions_and_sizes_ may be the same as + // addresses_for_copy_. The address should therefore be encoded + // *after* the instruction and its size. + int32_t encoded_addr = 0; + const unsigned char mode = address_cache_.EncodeAddress( + offset, + static_cast<VCDAddress>(dictionary_size_ + target_length_), + &encoded_addr); + EncodeInstruction(VCD_COPY, size, mode); + if (address_cache_.WriteAddressAsVarintForMode(mode)) { + VarintBE<int32_t>::AppendToString(encoded_addr, addresses_for_copy_); + } else { + addresses_for_copy_->push_back(static_cast<unsigned char>(encoded_addr)); + } + target_length_ += size; + if (size >= match_counts_.size()) { + match_counts_.resize(size * 2, 0); // Be generous to avoid resizing again + } + ++match_counts_[size]; +} + +void VCDiffCodeTableWriter::Run(size_t size, unsigned char byte) { + EncodeInstruction(VCD_RUN, size); + data_for_add_and_run_->push_back(byte); + target_length_ += size; +} + +size_t VCDiffCodeTableWriter::CalculateLengthOfSizeAsVarint(size_t size) { + return VarintBE<int32_t>::Length(static_cast<int32_t>(size)); +} + +void VCDiffCodeTableWriter::AppendSizeToString(size_t size, string* out) { + VarintBE<int32_t>::AppendToString(static_cast<int32_t>(size), out); +} + +void VCDiffCodeTableWriter::AppendSizeToOutputString( + size_t size, + OutputStringInterface* out) { + VarintBE<int32_t>::AppendToOutputString(static_cast<int32_t>(size), out); +} + +// This calculation must match the items added between "Start of Delta Encoding" +// and "End of Delta Encoding" in Output(), below. +size_t VCDiffCodeTableWriter::CalculateLengthOfTheDeltaEncoding() const { + size_t length_of_the_delta_encoding = + CalculateLengthOfSizeAsVarint(target_length_) + + 1 + // Delta_Indicator + CalculateLengthOfSizeAsVarint(separate_data_for_add_and_run_.size()) + + CalculateLengthOfSizeAsVarint(instructions_and_sizes_.size()) + + CalculateLengthOfSizeAsVarint(separate_addresses_for_copy_.size()) + + separate_data_for_add_and_run_.size() + + instructions_and_sizes_.size() + + separate_addresses_for_copy_.size(); + if (add_checksum_) { + length_of_the_delta_encoding += + VarintBE<int64_t>::Length(static_cast<int64_t>(checksum_)); + } + return length_of_the_delta_encoding; +} + +void VCDiffCodeTableWriter::Output(OutputStringInterface* out) { + if (instructions_and_sizes_.empty()) { + LOG(WARNING) << "Empty input; no delta window produced" << LOG_ENDL; + } else { + const size_t length_of_the_delta_encoding = + CalculateLengthOfTheDeltaEncoding(); + const size_t delta_window_size = + length_of_the_delta_encoding + + 1 + // Win_Indicator + CalculateLengthOfSizeAsVarint(dictionary_size_) + + CalculateLengthOfSizeAsVarint(0) + + CalculateLengthOfSizeAsVarint(length_of_the_delta_encoding); + // append() will be called many times on the output string; make sure + // the output string is resized only once at most. + out->ReserveAdditionalBytes(delta_window_size); + + // Add first element: Win_Indicator + if (add_checksum_) { + out->push_back(VCD_SOURCE | VCD_CHECKSUM); + } else { + out->push_back(VCD_SOURCE); + } + // Source segment size: dictionary size + AppendSizeToOutputString(dictionary_size_, out); + // Source segment position: 0 (start of dictionary) + AppendSizeToOutputString(0, out); + + // [Here is where a secondary compressor would be used + // if the encoder and decoder supported that feature.] + + AppendSizeToOutputString(length_of_the_delta_encoding, out); + // Start of Delta Encoding + const size_t size_before_delta_encoding = out->size(); + AppendSizeToOutputString(target_length_, out); + out->push_back(0x00); // Delta_Indicator: no compression + AppendSizeToOutputString(separate_data_for_add_and_run_.size(), out); + AppendSizeToOutputString(instructions_and_sizes_.size(), out); + AppendSizeToOutputString(separate_addresses_for_copy_.size(), out); + if (add_checksum_) { + // The checksum is a 32-bit *unsigned* integer. VarintBE requires a + // signed type, so use a 64-bit signed integer to store the checksum. + VarintBE<int64_t>::AppendToOutputString(static_cast<int64_t>(checksum_), + out); + } + out->append(separate_data_for_add_and_run_.data(), + separate_data_for_add_and_run_.size()); + out->append(instructions_and_sizes_.data(), + instructions_and_sizes_.size()); + out->append(separate_addresses_for_copy_.data(), + separate_addresses_for_copy_.size()); + // End of Delta Encoding + const size_t size_after_delta_encoding = out->size(); + if (length_of_the_delta_encoding != + (size_after_delta_encoding - size_before_delta_encoding)) { + LOG(DFATAL) << "Internal error: calculated length of the delta encoding (" + << length_of_the_delta_encoding + << ") does not match actual length (" + << (size_after_delta_encoding - size_before_delta_encoding) + << LOG_ENDL; + } + separate_data_for_add_and_run_.clear(); + instructions_and_sizes_.clear(); + separate_addresses_for_copy_.clear(); + if (target_length_ == 0) { + LOG(WARNING) << "Empty target window" << LOG_ENDL; + } + } + + // Reset state for next window; assume we are using same code table + // and dictionary. The caller will have to invoke Init() if a different + // dictionary is used. + // + // Notably, Init() calls address_cache_.Init(). This resets the address + // cache between delta windows, as required by RFC section 5.1. + if (!Init(dictionary_size_)) { + LOG(DFATAL) << "Internal error: calling Init() to reset " + "VCDiffCodeTableWriter state failed" << LOG_ENDL; + } +} + +}; // namespace open_vcdiff diff --git a/src/encodetable.h b/src/encodetable.h new file mode 100644 index 0000000..b14c795 --- /dev/null +++ b/src/encodetable.h @@ -0,0 +1,246 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_ENCODETABLE_H_ +#define OPEN_VCDIFF_ENCODETABLE_H_ + +#include <config.h> +#include <stdint.h> // int32_t +#include <cstddef> // size_t +#include <string> +#include <vector> +#include "addrcache.h" +#include "checksum.h" +#include "codetable.h" + +namespace open_vcdiff { + +using std::string; + +class OutputStringInterface; +class VCDiffInstructionMap; + +// The method calls after construction *must* conform +// to the following pattern: +// {{Add|Copy|Run}* [AddChecksum] Output}* +// +// When Output has been called in this sequence, a complete target window +// (as defined in RFC 3284 section 4.3) will have been appended to +// out (unless no calls to Add, Run, or Copy were made, in which +// case Output will do nothing.) The output will not be available for use +// until after each call to Output(). +// +// This class can also write a compressed +// representation of a non-standard code table to an string so that it +// can be sent with the data that was encoded using that code table. +// +// NOT threadsafe. +// +class VCDiffCodeTableWriter { + public: + // This constructor uses the default code table. + // If interleaved is true, the encoder writes each delta file window + // by interleaving instructions and sizes with their corresponding + // addresses and data, rather than placing these elements into three + // separate sections. This facilitates providing partially + // decoded results when only a portion of a delta file window + // is received (e.g. when HTTP over TCP is used as the + // transmission protocol.) The interleaved format is + // not consistent with the VCDIFF draft standard. + // + explicit VCDiffCodeTableWriter(bool interleaved); + + // Uses a non-standard code table and non-standard cache sizes. The caller + // must guarantee that code_table_data remains allocated for the lifetime of + // the VCDiffCodeTableWriter object. Note that this is different from how + // VCDiffCodeTableReader::UseCodeTable works. It is assumed that a given + // encoder will use either the default code table or a statically-defined + // non-standard code table, whereas the decoder must have the ability to read + // an arbitrary non-standard code table from a delta file and discard it once + // the file has been decoded. + // + VCDiffCodeTableWriter(bool interleaved, + int near_cache_size, + int same_cache_size, + const VCDiffCodeTableData& code_table_data, + unsigned char max_mode); + + ~VCDiffCodeTableWriter(); + + // Initializes the constructed object for use. + // This method must be called after a VCDiffCodeTableWriter is constructed + // and before any of its other methods can be called. It will return + // false if there was an error initializing the object, or true if it + // was successful. After the object has been initialized and used, + // Init() can be called again to restore the initial state of the object. + // + bool Init(size_t dictionary_size); + + size_t target_length() const { return target_length_; } + + // Encode an ADD opcode with the "size" bytes starting at data + // + void Add(const char* data, size_t size); + + // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes. + // + void Copy(int32_t offset, size_t size); + + // Encode a RUN opcode for "size" copies of the value "byte". + // + void Run(size_t size, unsigned char byte); + + void AddChecksum(VCDChecksum checksum) { + add_checksum_ = true; + checksum_ = checksum; + } + + // Finishes encoding and appends the encoded delta window to the output + // string. The output string is not null-terminated and may contain embedded + // '\0' characters. + void Output(OutputStringInterface* out); + + const std::vector<int>& match_counts() const { return match_counts_; } + + private: + // This is an estimate of the longest match size the encoder expects to find. + // It is used to determine the initial size of the vector match_counts_. + // If it is too large, then some space will be wasted on vector elements + // that are not used. If it is too small, then some time will be wasted + // expanding match_counts_ to accommodate larger match sizes. + static const size_t kMaxMatchSize = 2000; + + // The maximum value for the mode of a COPY instruction. + const unsigned char max_mode_; + + // If interleaved is true, sets data_for_add_and_run_ and + // addresses_for_copy_ to point at instructions_and_sizes_, + // so that instructions, sizes, addresses and data will be + // combined into a single interleaved stream. + // If interleaved is false, sets data_for_add_and_run_ and + // addresses_for_copy_ to point at their corresponding + // separate_... strings, so that the three sections will + // be generated separately from one another. + // + void InitSectionPointers(bool interleaved); + + // Determines the best opcode to encode an instruction, and appends + // or substitutes that opcode and its size into the + // instructions_and_sizes_ string. + // + void EncodeInstruction(VCDiffInstructionType inst, + size_t size, + unsigned char mode); + + void EncodeInstruction(VCDiffInstructionType inst, size_t size) { + return EncodeInstruction(inst, size, 0); + } + + // Calculates the number of bytes needed to store the given size value as a + // variable-length integer (VarintBE). + static size_t CalculateLengthOfSizeAsVarint(size_t size); + + // Appends the size value to the string as a variable-length integer. + static void AppendSizeToString(size_t size, string* out); + + // Appends the size value to the output string as a variable-length integer. + static void AppendSizeToOutputString(size_t size, OutputStringInterface* out); + + // Calculates the "Length of the delta encoding" field for the delta window + // header, based on the sizes of the sections and of the other header + // elements. + size_t CalculateLengthOfTheDeltaEncoding() const; + + // None of the following 'string' objects are null-terminated. + + // A series of instruction opcodes, each of which may be followed + // by one or two Varint values representing the size parameters + // of the first and second instruction in the opcode. + string instructions_and_sizes_; + + // A series of data arguments (byte values) used for ADD and RUN + // instructions. Depending on whether interleaved output is used + // for streaming or not, the pointer may point to + // separate_data_for_add_and_run_ or to instructions_and_sizes_. + string *data_for_add_and_run_; + string separate_data_for_add_and_run_; + + // A series of Varint addresses used for COPY instructions. + // For the SAME mode, a byte value is stored instead of a Varint. + // Depending on whether interleaved output is used + // for streaming or not, the pointer may point to + // separate_addresses_for_copy_ or to instructions_and_sizes_. + string *addresses_for_copy_; + string separate_addresses_for_copy_; + + VCDiffAddressCache address_cache_; + + size_t dictionary_size_; + + // The number of bytes of target data that has been encoded so far. + // Each time Add(), Copy(), or Run() is called, this will be incremented. + // The target length is used to compute HERE mode addresses + // for COPY instructions, and is also written into the header + // of the delta window when Output() is called. + // + size_t target_length_; + + const VCDiffCodeTableData* code_table_data_; + + // The instruction map facilitates finding an opcode quickly given an + // instruction inst, size, and mode. This is an alternate representation + // of the same information that is found in code_table_data_. + // + const VCDiffInstructionMap* instruction_map_; + + // The zero-based index within instructions_and_sizes_ of the byte + // that contains the last single-instruction opcode generated by + // EncodeInstruction(). (See that function for exhaustive details.) + // It is necessary to use an index rather than a pointer for this value + // because instructions_and_sizes_ may be resized, which would invalidate + // any pointers into its data buffer. The value -1 is reserved to mean that + // either no opcodes have been generated yet, or else the last opcode + // generated was a double-instruction opcode. + // + int last_opcode_index_; + + // If true, an Adler32 checksum of the target window data will be written as + // a variable-length integer, just after the size of the addresses section. + // + bool add_checksum_; + + // The checksum to be written to the current target window, + // if add_checksum_ is true. + // This will not be calculated based on the individual calls to Add(), Run(), + // and Copy(), which would be unnecessarily expensive. Instead, the code + // that uses the VCDiffCodeTableWriter object is expected to calculate + // the checksum all at once and to call AddChecksum() with that value. + // Must be called sometime before calling Output(), though it can be called + // either before or after the calls to Add(), Run(), and Copy(). + // + VCDChecksum checksum_; + + // The value of match_counts_[n] is equal to the number of matches + // of length n (that is, COPY instructions of size n) found so far. + std::vector<int> match_counts_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffCodeTableWriter(const VCDiffCodeTableWriter&); // NOLINT + void operator=(const VCDiffCodeTableWriter&); +}; + +}; // namespace open_vcdiff + +#endif // OPEN_VCDIFF_ENCODETABLE_H_ diff --git a/src/encodetable_test.cc b/src/encodetable_test.cc new file mode 100644 index 0000000..50a74ca --- /dev/null +++ b/src/encodetable_test.cc @@ -0,0 +1,508 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Unit tests for the class VCDiffCodeTableWriter, found in encodetable.h. + +#include <config.h> +#include "encodetable.h" +#include <algorithm> +#include <cstring> // strlen +#include <string> +#include <vector> +#include "addrcache.h" // VCDiffAddressCache::kDefaultNearCacheSize +#include "checksum.h" +#include "codetable.h" +#include "google/output_string.h" +#include "testing.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { +namespace { + +using std::string; + +class CodeTableWriterTest : public testing::Test { + protected: + // Remove all of the functions below that are not useful for this + // test fixture. + + CodeTableWriterTest() + : standard_writer(false), + interleaved_writer(true), + exercise_writer(true, + VCDiffAddressCache::kDefaultNearCacheSize, + VCDiffAddressCache::kDefaultSameCacheSize, + *g_exercise_code_table_, kLastExerciseMode), + output_string(&out), + out_index(0) { } + + virtual ~CodeTableWriterTest() { } + + static void AddExerciseOpcode(unsigned char inst1, + unsigned char mode1, + unsigned char size1, + unsigned char inst2, + unsigned char mode2, + unsigned char size2, + int opcode) { + g_exercise_code_table_->inst1[opcode] = inst1; + g_exercise_code_table_->mode1[opcode] = mode1; + g_exercise_code_table_->size1[opcode] = (inst1 == VCD_NOOP) ? 0 : size1; + g_exercise_code_table_->inst2[opcode] = inst2; + g_exercise_code_table_->mode2[opcode] = mode2; + g_exercise_code_table_->size2[opcode] = (inst2 == VCD_NOOP) ? 0 : size2; + } + + static void SetUpTestCase() { + g_exercise_code_table_ = new VCDiffCodeTableData; + int opcode = 0; + for (unsigned char inst_mode1 = 0; + inst_mode1 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode1) { + unsigned char inst1 = inst_mode1; + unsigned char mode1 = 0; + if (inst_mode1 > VCD_COPY) { + inst1 = VCD_COPY; + mode1 = inst_mode1 - VCD_COPY; + } + for (unsigned char inst_mode2 = 0; + inst_mode2 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode2) { + unsigned char inst2 = inst_mode2; + unsigned char mode2 = 0; + if (inst_mode2 > VCD_COPY) { + inst2 = VCD_COPY; + mode2 = inst_mode2 - VCD_COPY; + } + AddExerciseOpcode(inst1, mode1, 0, inst2, mode2, 0, opcode++); + AddExerciseOpcode(inst1, mode1, 0, inst2, mode2, 255, opcode++); + AddExerciseOpcode(inst1, mode1, 255, inst2, mode2, 0, opcode++); + AddExerciseOpcode(inst1, mode1, 255, inst2, mode2, 255, opcode++); + } + } + // This is a CHECK rather than an EXPECT because it validates only + // the logic of the test, not of the code being tested. + CHECK_EQ(VCDiffCodeTableData::kCodeTableSize, opcode); + + EXPECT_TRUE(g_exercise_code_table_->Validate(kLastExerciseMode)); + } + + static void TearDownTestCase() { + delete g_exercise_code_table_; + } + + void ExpectByte(unsigned char b) { + EXPECT_EQ(b, static_cast<unsigned char>(out[out_index])); + ++out_index; + } + + void ExpectString(const char* s) { + const size_t size = strlen(s); // don't include terminating NULL char + EXPECT_EQ(string(s, size), + string(out.data() + out_index, size)); + out_index += size; + } + + void ExpectNoMoreBytes() { + EXPECT_EQ(out_index, out.size()); + } + + static bool AnyMatch(int match_count) { return match_count != 0; } + + static void ExpectNoMatchesForWriter(const VCDiffCodeTableWriter& writer) { + const std::vector<int>& match_counts = writer.match_counts(); + EXPECT_TRUE(find_if(match_counts.begin(), match_counts.end(), AnyMatch) + == match_counts.end()); + } + + void ExpectNoMatches() const { + ExpectNoMatchesForWriter(standard_writer); + ExpectNoMatchesForWriter(interleaved_writer); + ExpectNoMatchesForWriter(exercise_writer); + } + + // This value is designed so that the total number of inst values and modes + // will equal 8 (VCD_NOOP, VCD_ADD, VCD_RUN, VCD_COPY modes 0 - 4). + // Eight combinations of inst and mode, times two possible size values, + // squared (because there are two instructions per opcode), makes + // exactly 256 possible instruction combinations, which fits kCodeTableSize + // (the number of opcodes in the table.) + static const int kLastExerciseMode = 4; + + // A code table that exercises as many combinations as possible: + // 2 instructions, each is a NOOP, ADD, RUN, or one of 5 copy modes + // (== 8 total combinations of inst and mode), and each has + // size == 0 or 255 (2 possibilities.) + static VCDiffCodeTableData* g_exercise_code_table_; + + // The code table writer for standard encoding, default code table. + VCDiffCodeTableWriter standard_writer; + + // The code table writer for interleaved encoding, default code table. + VCDiffCodeTableWriter interleaved_writer; + + // The code table writer corresponding to g_exercise_code_table_ + // (interleaved encoding). + VCDiffCodeTableWriter exercise_writer; + + // Destination for VCDiffCodeTableWriter::Output() + string out; + OutputString<string> output_string; + size_t out_index; +}; + +VCDiffCodeTableData* CodeTableWriterTest::g_exercise_code_table_; + +#ifdef GTEST_HAS_DEATH_TEST +typedef CodeTableWriterTest CodeTableWriterDeathTest; +#endif // GTEST_HAS_DEATH_TEST + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(CodeTableWriterDeathTest, WriterAddWithoutInit) { +#ifndef NDEBUG + // This condition is only checked in the debug build. + EXPECT_DEBUG_DEATH(standard_writer.Add("Hello", 5), + "Init"); +#endif // !NDEBUG +} + +TEST_F(CodeTableWriterDeathTest, WriterRunWithoutInit) { +#ifndef NDEBUG + // This condition is only checked in the debug build. + EXPECT_DEBUG_DEATH(standard_writer.Run(3, 'a'), + "Init"); +#endif // !NDEBUG +} + +TEST_F(CodeTableWriterDeathTest, WriterCopyWithoutInit) { +#ifndef NDEBUG + // This condition is only checked in the debug build. + EXPECT_DEBUG_DEATH(standard_writer.Copy(6, 5), + "Init"); +#endif // !NDEBUG +} +#endif // GTEST_HAS_DEATH_TEST + +// Output() without Init() is harmless, but will produce no output. +TEST_F(CodeTableWriterTest, WriterOutputWithoutInit) { + standard_writer.Output(&output_string); + EXPECT_TRUE(out.empty()); +} + +TEST_F(CodeTableWriterTest, WriterEncodeNothing) { + EXPECT_TRUE(standard_writer.Init(0)); + standard_writer.Output(&output_string); + // The writer should know not to append a delta file window + // if nothing was encoded. + EXPECT_TRUE(out.empty()); + + out.clear(); + EXPECT_TRUE(interleaved_writer.Init(0x10)); + interleaved_writer.Output(&output_string); + EXPECT_TRUE(out.empty()); + + out.clear(); + EXPECT_TRUE(exercise_writer.Init(0x20)); + exercise_writer.Output(&output_string); + EXPECT_TRUE(out.empty()); + + ExpectNoMatches(); +} + +TEST_F(CodeTableWriterTest, StandardWriterEncodeAdd) { + EXPECT_TRUE(standard_writer.Init(0x11)); + standard_writer.Add("foo", 3); + standard_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x09); // Length of the delta encoding + ExpectByte(0x03); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x03); // length of data for ADDs and RUNs + ExpectByte(0x01); // length of instructions section + ExpectByte(0x00); // length of addresses for COPYs + ExpectString("foo"); + ExpectByte(0x04); // ADD(3) opcode + ExpectNoMoreBytes(); + ExpectNoMatches(); +} + +TEST_F(CodeTableWriterTest, ExerciseWriterEncodeAdd) { + EXPECT_TRUE(exercise_writer.Init(0x11)); + exercise_writer.Add("foo", 3); + exercise_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x0A); // Length of the delta encoding + ExpectByte(0x03); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // length of data for ADDs and RUNs + ExpectByte(0x05); // length of instructions section + ExpectByte(0x00); // length of addresses for COPYs + ExpectByte(0x04); // Opcode: NOOP + ADD(0) + ExpectByte(0x03); // Size of ADD (3) + ExpectString("foo"); + ExpectNoMatches(); +} + +TEST_F(CodeTableWriterTest, StandardWriterEncodeRun) { + EXPECT_TRUE(standard_writer.Init(0x11)); + standard_writer.Run(3, 'a'); + standard_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x08); // Length of the delta encoding + ExpectByte(0x03); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x01); // length of data for ADDs and RUNs + ExpectByte(0x02); // length of instructions section + ExpectByte(0x00); // length of addresses for COPYs + ExpectByte('a'); + ExpectByte(0x00); // RUN(0) opcode + ExpectByte(0x03); // Size of RUN (3) + ExpectNoMoreBytes(); + ExpectNoMatches(); +} + +TEST_F(CodeTableWriterTest, ExerciseWriterEncodeRun) { + EXPECT_TRUE(exercise_writer.Init(0x11)); + exercise_writer.Run(3, 'a'); + exercise_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x08); // Length of the delta encoding + ExpectByte(0x03); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // length of data for ADDs and RUNs + ExpectByte(0x03); // length of instructions section + ExpectByte(0x00); // length of addresses for COPYs + ExpectByte(0x08); // Opcode: NOOP + RUN(0) + ExpectByte(0x03); // Size of RUN (3) + ExpectByte('a'); + ExpectNoMoreBytes(); + ExpectNoMatches(); +} + +TEST_F(CodeTableWriterTest, StandardWriterEncodeCopy) { + EXPECT_TRUE(standard_writer.Init(0x11)); + standard_writer.Copy(2, 8); + standard_writer.Copy(2, 8); + standard_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x09); // Length of the delta encoding + ExpectByte(0x10); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // length of data for ADDs and RUNs + ExpectByte(0x02); // length of instructions section + ExpectByte(0x02); // length of addresses for COPYs + ExpectByte(0x18); // COPY mode SELF, size 8 + ExpectByte(0x78); // COPY mode SAME(0), size 8 + ExpectByte(0x02); // COPY address (2) + ExpectByte(0x02); // COPY address (2) + ExpectNoMoreBytes(); + EXPECT_LE(9U, standard_writer.match_counts().size()); + EXPECT_EQ(0, standard_writer.match_counts()[0]); + EXPECT_EQ(0, standard_writer.match_counts()[1]); + EXPECT_EQ(0, standard_writer.match_counts()[2]); + EXPECT_EQ(0, standard_writer.match_counts()[3]); + EXPECT_EQ(0, standard_writer.match_counts()[4]); + EXPECT_EQ(0, standard_writer.match_counts()[5]); + EXPECT_EQ(0, standard_writer.match_counts()[6]); + EXPECT_EQ(0, standard_writer.match_counts()[7]); + EXPECT_EQ(2, standard_writer.match_counts()[8]); +} + +// The exercise code table can't be used to test how the code table +// writer encodes COPY instructions because the code table writer +// always uses the default cache sizes, which exceed the maximum mode +// used in the exercise table. +TEST_F(CodeTableWriterTest, InterleavedWriterEncodeCopy) { + EXPECT_TRUE(interleaved_writer.Init(0x11)); + interleaved_writer.Copy(2, 8); + interleaved_writer.Copy(2, 8); + interleaved_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x09); // Length of the delta encoding + ExpectByte(0x10); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // length of data for ADDs and RUNs + ExpectByte(0x04); // length of instructions section + ExpectByte(0x00); // length of addresses for COPYs + ExpectByte(0x18); // COPY mode SELF, size 8 + ExpectByte(0x02); // COPY address (2) + ExpectByte(0x78); // COPY mode SAME(0), size 8 + ExpectByte(0x02); // COPY address (2) + ExpectNoMoreBytes(); + EXPECT_LE(9U, interleaved_writer.match_counts().size()); + EXPECT_EQ(0, interleaved_writer.match_counts()[0]); + EXPECT_EQ(0, interleaved_writer.match_counts()[1]); + EXPECT_EQ(0, interleaved_writer.match_counts()[2]); + EXPECT_EQ(0, interleaved_writer.match_counts()[3]); + EXPECT_EQ(0, interleaved_writer.match_counts()[4]); + EXPECT_EQ(0, interleaved_writer.match_counts()[5]); + EXPECT_EQ(0, interleaved_writer.match_counts()[6]); + EXPECT_EQ(0, interleaved_writer.match_counts()[7]); + EXPECT_EQ(2, interleaved_writer.match_counts()[8]); +} + +TEST_F(CodeTableWriterTest, StandardWriterEncodeCombo) { + EXPECT_TRUE(standard_writer.Init(0x11)); + standard_writer.Add("rayo", 4); + standard_writer.Copy(2, 5); + standard_writer.Copy(0, 4); + standard_writer.Add("X", 1); + standard_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x0E); // Length of the delta encoding + ExpectByte(0x0E); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x05); // length of data for ADDs and RUNs + ExpectByte(0x02); // length of instructions section + ExpectByte(0x02); // length of addresses for COPYs + ExpectString("rayoX"); + ExpectByte(0xAD); // Combo: Add size 4 + COPY mode SELF, size 5 + ExpectByte(0xFD); // Combo: COPY mode SAME(0), size 4 + Add size 1 + ExpectByte(0x02); // COPY address (2) + ExpectByte(0x00); // COPY address (0) + ExpectNoMoreBytes(); + EXPECT_LE(6U, standard_writer.match_counts().size()); + EXPECT_EQ(0, standard_writer.match_counts()[0]); + EXPECT_EQ(0, standard_writer.match_counts()[1]); + EXPECT_EQ(0, standard_writer.match_counts()[2]); + EXPECT_EQ(0, standard_writer.match_counts()[3]); + EXPECT_EQ(1, standard_writer.match_counts()[4]); + EXPECT_EQ(1, standard_writer.match_counts()[5]); +} + +TEST_F(CodeTableWriterTest, InterleavedWriterEncodeCombo) { + EXPECT_TRUE(interleaved_writer.Init(0x11)); + interleaved_writer.Add("rayo", 4); + interleaved_writer.Copy(2, 5); + interleaved_writer.Copy(0, 4); + interleaved_writer.Add("X", 1); + interleaved_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x0E); // Length of the delta encoding + ExpectByte(0x0E); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // length of data for ADDs and RUNs + ExpectByte(0x09); // length of instructions section + ExpectByte(0x00); // length of addresses for COPYs + ExpectByte(0xAD); // Combo: Add size 4 + COPY mode SELF, size 5 + ExpectString("rayo"); + ExpectByte(0x02); // COPY address (2) + ExpectByte(0xFD); // Combo: COPY mode SAME(0), size 4 + Add size 1 + ExpectByte(0x00); // COPY address (0) + ExpectByte('X'); + ExpectNoMoreBytes(); + EXPECT_LE(6U, interleaved_writer.match_counts().size()); + EXPECT_EQ(0, interleaved_writer.match_counts()[0]); + EXPECT_EQ(0, interleaved_writer.match_counts()[1]); + EXPECT_EQ(0, interleaved_writer.match_counts()[2]); + EXPECT_EQ(0, interleaved_writer.match_counts()[3]); + EXPECT_EQ(1, interleaved_writer.match_counts()[4]); + EXPECT_EQ(1, interleaved_writer.match_counts()[5]); +} + +TEST_F(CodeTableWriterTest, InterleavedWriterEncodeComboWithChecksum) { + EXPECT_TRUE(interleaved_writer.Init(0x11)); + const VCDChecksum checksum = 0xFFFFFFFF; // would be negative if signed + interleaved_writer.AddChecksum(checksum); + interleaved_writer.Add("rayo", 4); + interleaved_writer.Copy(2, 5); + interleaved_writer.Copy(0, 4); + interleaved_writer.Add("X", 1); + interleaved_writer.Output(&output_string); + ExpectByte(VCD_SOURCE | VCD_CHECKSUM); // Win_Indicator + ExpectByte(0x11); // Source segment size: dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x13); // Length of the delta encoding + ExpectByte(0x0E); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // length of data for ADDs and RUNs + ExpectByte(0x09); // length of instructions section + ExpectByte(0x00); // length of addresses for COPYs + ExpectByte(0x8F); // checksum byte 1 + ExpectByte(0xFF); // checksum byte 2 + ExpectByte(0xFF); // checksum byte 3 + ExpectByte(0xFF); // checksum byte 4 + ExpectByte(0x7F); // checksum byte 5 + ExpectByte(0xAD); // Combo: Add size 4 + COPY mode SELF, size 5 + ExpectString("rayo"); + ExpectByte(0x02); // COPY address (2) + ExpectByte(0xFD); // Combo: COPY mode SAME(0), size 4 + Add size 1 + ExpectByte(0x00); // COPY address (0) + ExpectByte('X'); + ExpectNoMoreBytes(); +} + +TEST_F(CodeTableWriterTest, ReallyBigDictionary) { + EXPECT_TRUE(interleaved_writer.Init(0x3FFFFFFF)); + interleaved_writer.Copy(2, 8); + interleaved_writer.Copy(0x3FFFFFFE, 8); + interleaved_writer.Output(&output_string); + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(0x83); // Source segment size: dictionary length (1) + ExpectByte(0xFF); // Source segment size: dictionary length (2) + ExpectByte(0xFF); // Source segment size: dictionary length (3) + ExpectByte(0xFF); // Source segment size: dictionary length (4) + ExpectByte(0x7F); // Source segment size: dictionary length (5) + ExpectByte(0x00); // Source segment position: start of dictionary + ExpectByte(0x09); // Length of the delta encoding + ExpectByte(0x10); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // length of data for ADDs and RUNs + ExpectByte(0x04); // length of instructions section + ExpectByte(0x00); // length of addresses for COPYs + ExpectByte(0x18); // COPY mode SELF, size 8 + ExpectByte(0x02); // COPY address (2) + ExpectByte(0x28); // COPY mode HERE, size 8 + ExpectByte(0x09); // COPY address (9) + ExpectNoMoreBytes(); + EXPECT_LE(9U, interleaved_writer.match_counts().size()); + EXPECT_EQ(0, interleaved_writer.match_counts()[0]); + EXPECT_EQ(0, interleaved_writer.match_counts()[1]); + EXPECT_EQ(0, interleaved_writer.match_counts()[2]); + EXPECT_EQ(0, interleaved_writer.match_counts()[3]); + EXPECT_EQ(0, interleaved_writer.match_counts()[4]); + EXPECT_EQ(0, interleaved_writer.match_counts()[5]); + EXPECT_EQ(0, interleaved_writer.match_counts()[6]); + EXPECT_EQ(0, interleaved_writer.match_counts()[7]); + EXPECT_EQ(2, interleaved_writer.match_counts()[8]); +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(CodeTableWriterDeathTest, DictionaryTooBig) { + EXPECT_TRUE(interleaved_writer.Init(0x7FFFFFFF)); + interleaved_writer.Copy(2, 8); + EXPECT_DEBUG_DEATH(interleaved_writer.Copy(0x7FFFFFFE, 8), + "address.*<.*here_address"); +} +#endif // GTEST_HAS_DEATH_TEST + +} // unnamed namespace +} // namespace open_vcdiff diff --git a/src/gflags.cc b/src/gflags.cc new file mode 100644 index 0000000..be6f661 --- /dev/null +++ b/src/gflags.cc @@ -0,0 +1,1984 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ray Sidney +// Revamped and reorganized by Craig Silverstein +// +// This file contains the implementation of all our command line flags +// stuff. Here's how everything fits together +// +// * FlagRegistry owns CommandLineFlags owns FlagValue. +// * FlagSaver holds a FlagRegistry (saves it at construct time, +// restores it at destroy time). +// * CommandLineFlagParser lives outside that hierarchy, but works on +// CommandLineFlags (modifying the FlagValues). +// * Free functions like SetCommandLineOption() work via one of the +// above (such as CommandLineFlagParser). +// +// In more detail: +// +// -- The main classes that hold flag data: +// +// FlagValue holds the current value of a flag. It's +// pseudo-templatized: every operation on a FlagValue is typed. It +// also deals with storage-lifetime issues (so flag values don't go +// away in a destructor), which is why we need a whole class to hold a +// variable's value. +// +// CommandLineFlag is all the information about a single command-line +// flag. It has a FlagValue for the flag's current value, but also +// the flag's name, type, etc. +// +// FlagRegistry is a collection of CommandLineFlags. There's the +// global registry, which is where flags defined via DEFINE_foo() +// live. But it's possible to define your own flag, manually, in a +// different registry you create. (In practice, multiple registries +// are used only by FlagSaver). +// +// A given FlagValue is owned by exactly one CommandLineFlag. A given +// CommandLineFlag is owned by exactly one FlagRegistry. FlagRegistry +// has a lock; any operation that writes to a FlagValue or +// CommandLineFlag owned by that registry must acquire the +// FlagRegistry lock before doing so. +// +// --- Some other classes and free functions: +// +// CommandLineFlagInfo is a client-exposed version of CommandLineFlag. +// Once it's instantiated, it has no dependencies or relationships +// with any other part of this file. +// +// FlagRegisterer is the helper class used by the DEFINE_* macros to +// allow work to be done at global initialization time. +// +// CommandLineFlagParser is the class that reads from the commandline +// and instantiates flag values based on that. It needs to poke into +// the innards of the FlagValue->CommandLineFlag->FlagRegistry class +// hierarchy to do that. It's careful to acquire the FlagRegistry +// lock before doing any writing or other non-const actions. +// +// GetCommandLineOption is just a hook into registry routines to +// retrieve a flag based on its name. SetCommandLineOption, on the +// other hand, hooks into CommandLineFlagParser. Other API functions +// are, similarly, mostly hooks into the functionality described above. + +#include "config.h" +// This comes first to ensure we define __STDC_FORMAT_MACROS in time. +#ifdef HAVE_INTTYPES_H +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 // gcc requires this to get PRId64, etc. +#endif +#include <inttypes.h> +#endif // HAVE_INTTYPES_H +#include <stdio.h> // for snprintf +#include <ctype.h> +#include <errno.h> +#include <string.h> +#include <assert.h> +#ifdef HAVE_FNMATCH_H +#include <fnmatch.h> +#endif // HAVE_FNMATCH_H +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif // HAVE_PTHREAD +#ifdef HAVE_WINDOWS_H +#include <windows.h> +#endif // HAVE_WINDOWS_H +#include <iostream> // for cerr +#include <string> +#include <map> +#include <vector> +#include <utility> // for pair<> +#include <algorithm> +#include "gflags/gflags.h" + +#ifndef PATH_SEPARATOR +#define PATH_SEPARATOR '/' +#endif + +// Work properly if either strtoll or strtoq is on this system +#ifdef HAVE_STRTOLL +# define strtoint64 strtoll +# define strtouint64 strtoull +#elif HAVE_STRTOQ +# define strtoint64 strtoq +# define strtouint64 strtouq +#else +// Neither strtoll nor strtoq are defined. I hope strtol works! +# define strtoint64 strtol +# define strtouint64 strtoul +#endif + +// If we have inttypes.h, it will have defined PRId32/etc for us. If +// not, take our best guess. +#ifndef PRId32 +# define PRId32 "d" +#endif +#ifndef PRId64 +# define PRId64 "lld" +#endif +#ifndef PRIu64 +# define PRIu64 "llu" +#endif + +using std::string; +using std::map; +using std::vector; +using std::pair; +using std::cerr; +using std::sort; + +// Special flags, type 1: the 'recursive' flags. They set another flag's val. +DEFINE_string(flagfile, "", + "load flags from file"); +DEFINE_string(fromenv, "", + "set flags from the environment" + " [use 'export FLAGS_flag1=value']"); +DEFINE_string(tryfromenv, "", + "set flags from the environment if present"); + +// Special flags, type 2: the 'parsing' flags. They modify how we parse. +DEFINE_string(undefok, "", + "comma-separated list of flag names that it is okay to specify " + "on the command line even if the program does not define a flag " + "with that name. IMPORTANT: flags in this list that have " + "arguments MUST use the flag=value format"); + +namespace google { + +// The help message indicating that the commandline flag has been +// 'stripped'. It will not show up when doing "-help" and its +// variants. The flag is stripped if STRIP_FLAG_HELP is set to 1 +// before including gflags/gflags.h. + +// This is used by this file, and also in commandlineflags_reporting.cc +const char kStrippedFlagHelp[] = "\001\002\003\004 (unknown) \004\003\002\001"; + +// This is used by the unittest to test error-exit code +void (*commandlineflags_exitfunc)(int) = &exit; // from stdlib.h + +namespace { + +// There are also 'reporting' flags, in commandlineflags_reporting.cc. + +static const char kError[] = "ERROR: "; + +// Indicates that undefined options are to be ignored. +// Enables deferred processing of flags in dynamically loaded libraries. +static bool allow_command_line_reparsing = false; + +static bool logging_is_probably_set_up = false; + +// This is a 'prototype' validate-function. 'Real' validate +// functions, take a flag-value as an argument: ValidateFn(bool) or +// ValidateFn(uint64). However, for easier storage, we strip off this +// argument and then restore it when actually calling the function on +// a flag value. +typedef bool (*ValidateFnProto)(); + + +// -------------------------------------------------------------------- +// FlagValue +// This represent the value a single flag might have. The major +// functionality is to convert from a string to an object of a +// given type, and back. Thread-compatible. +// -------------------------------------------------------------------- + +class FlagValue { + public: + FlagValue(void* valbuf, const char* type); + ~FlagValue(); + + bool ParseFrom(const char* spec); + string ToString() const; + + private: + friend class CommandLineFlag; // for many things, including Validate() + friend class ::google::FlagSaverImpl; // calls New() + friend class FlagRegistry; // checks value_buffer_ for flags_by_ptr_ map + template <typename T> friend T GetFromEnv(const char*, const char*, T); + friend bool TryParseLocked(const class CommandLineFlag*, FlagValue*, + const char*, string*); // for New(), CopyFrom() + + enum ValueType {FV_BOOL, FV_INT32, FV_INT64, FV_UINT64, FV_DOUBLE, FV_STRING}; + + const char* TypeName() const; + bool Equal(const FlagValue& x) const; + FlagValue* New() const; // creates a new one with default value + void CopyFrom(const FlagValue& x); + + // Calls the given validate-fn on value_buffer_, and returns + // whatever it returns. But first casts validate_fn_proto to a + // function that takes our value as an argument (eg void + // (*validate_fn)(bool) for a bool flag). + bool Validate(const char* flagname, ValidateFnProto validate_fn_proto) const; + + void* value_buffer_; // points to the buffer holding our data + ValueType type_; // how to interpret value_ + + FlagValue(const FlagValue&); // no copying! + void operator=(const FlagValue&); +}; + + +// This could be a templated method of FlagValue, but doing so adds to the +// size of the .o. Since there's no type-safety here anyway, macro is ok. +#define VALUE_AS(type) *reinterpret_cast<type*>(value_buffer_) +#define OTHER_VALUE_AS(fv, type) *reinterpret_cast<type*>(fv.value_buffer_) +#define SET_VALUE_AS(type, value) VALUE_AS(type) = (value) + +FlagValue::FlagValue(void* valbuf, const char* type) : value_buffer_(valbuf) { + if (strcmp(type, "bool") == 0) type_ = FV_BOOL; + else if (strcmp(type, "int32") == 0) type_ = FV_INT32; + else if (strcmp(type, "int64") == 0) type_ = FV_INT64; + else if (strcmp(type, "uint64") == 0) type_ = FV_UINT64; + else if (strcmp(type, "double") == 0) type_ = FV_DOUBLE; + else if (strcmp(type, "string") == 0) type_ = FV_STRING; + else assert(false); // Unknown typename +} + +FlagValue::~FlagValue() { + switch (type_) { + case FV_BOOL: delete reinterpret_cast<bool*>(value_buffer_); break; + case FV_INT32: delete reinterpret_cast<int32*>(value_buffer_); break; + case FV_INT64: delete reinterpret_cast<int64*>(value_buffer_); break; + case FV_UINT64: delete reinterpret_cast<uint64*>(value_buffer_); break; + case FV_DOUBLE: delete reinterpret_cast<double*>(value_buffer_); break; + case FV_STRING: delete reinterpret_cast<string*>(value_buffer_); break; + } +} + +bool FlagValue::ParseFrom(const char* value) { + if (type_ == FV_BOOL) { + const char* kTrue[] = { "1", "t", "true", "y", "yes" }; + const char* kFalse[] = { "0", "f", "false", "n", "no" }; + for (size_t i = 0; i < sizeof(kTrue)/sizeof(*kTrue); ++i) { + if (strcasecmp(value, kTrue[i]) == 0) { + SET_VALUE_AS(bool, true); + return true; + } else if (strcasecmp(value, kFalse[i]) == 0) { + SET_VALUE_AS(bool, false); + return true; + } + } + return false; // didn't match a legal input + + } else if (type_ == FV_STRING) { + SET_VALUE_AS(string, value); + return true; + } + + // OK, it's likely to be numeric, and we'll be using a strtoXXX method. + if (value[0] == '\0') // empty-string is only allowed for string type. + return false; + char* end; + // Leading 0x puts us in base 16. But leading 0 does not put us in base 8! + // It caused too many bugs when we had that behavior. + int base = 10; // by default + if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) + base = 16; + errno = 0; + + switch (type_) { + case FV_INT32: { + const int64 r = strtoint64(value, &end, base); + if (errno || end != value + strlen(value)) return false; // bad parse + if (static_cast<int32>(r) != r) // worked, but number out of range + return false; + SET_VALUE_AS(int32, r); + return true; + } + case FV_INT64: { + const int64 r = strtoint64(value, &end, base); + if (errno || end != value + strlen(value)) return false; // bad parse + SET_VALUE_AS(int64, r); + return true; + } + case FV_UINT64: { + while (*value == ' ') value++; + if (*value == '-') return false; // negative number + const uint64 r = strtouint64(value, &end, base); + if (errno || end != value + strlen(value)) return false; // bad parse + SET_VALUE_AS(uint64, r); + return true; + } + case FV_DOUBLE: { + const double r = strtod(value, &end); + if (errno || end != value + strlen(value)) return false; // bad parse + SET_VALUE_AS(double, r); + return true; + } + default: { + assert(false); // unknown type + return false; + } + } +} + +string FlagValue::ToString() const { + char intbuf[64]; // enough to hold even the biggest number + switch (type_) { + case FV_BOOL: + return VALUE_AS(bool) ? "true" : "false"; + case FV_INT32: + snprintf(intbuf, sizeof(intbuf), "%"PRId32, VALUE_AS(int32)); + return intbuf; + case FV_INT64: + snprintf(intbuf, sizeof(intbuf), "%"PRId64, VALUE_AS(int64)); + return intbuf; + case FV_UINT64: + snprintf(intbuf, sizeof(intbuf), "%"PRIu64, VALUE_AS(uint64)); + return intbuf; + case FV_DOUBLE: + snprintf(intbuf, sizeof(intbuf), "%.17g", VALUE_AS(double)); + return intbuf; + case FV_STRING: + return VALUE_AS(string); + default: + assert(false); + return ""; // unknown type + } +} + +bool FlagValue::Validate(const char* flagname, + ValidateFnProto validate_fn_proto) const { + switch (type_) { + case FV_BOOL: + return reinterpret_cast<bool (*)(const char*, bool)>( + validate_fn_proto)(flagname, VALUE_AS(bool)); + case FV_INT32: + return reinterpret_cast<bool (*)(const char*, int32)>( + validate_fn_proto)(flagname, VALUE_AS(int32)); + case FV_INT64: + return reinterpret_cast<bool (*)(const char*, int64)>( + validate_fn_proto)(flagname, VALUE_AS(int64)); + case FV_UINT64: + return reinterpret_cast<bool (*)(const char*, uint64)>( + validate_fn_proto)(flagname, VALUE_AS(uint64)); + case FV_DOUBLE: + return reinterpret_cast<bool (*)(const char*, double)>( + validate_fn_proto)(flagname, VALUE_AS(double)); + case FV_STRING: + return reinterpret_cast<bool (*)(const char*, const string&)>( + validate_fn_proto)(flagname, VALUE_AS(string)); + default: + assert(false); // unknown type + return false; + } +} + +const char* FlagValue::TypeName() const { + switch (type_) { + case FV_BOOL: return "bool"; + case FV_INT32: return "int32"; + case FV_INT64: return "int64"; + case FV_UINT64: return "uint64"; + case FV_DOUBLE: return "double"; + case FV_STRING: return "string"; + default: assert(false); return ""; // unknown type + } +} + +bool FlagValue::Equal(const FlagValue& x) const { + if (type_ != x.type_) + return false; + switch (type_) { + case FV_BOOL: return VALUE_AS(bool) == OTHER_VALUE_AS(x, bool); + case FV_INT32: return VALUE_AS(int32) == OTHER_VALUE_AS(x, int32); + case FV_INT64: return VALUE_AS(int64) == OTHER_VALUE_AS(x, int64); + case FV_UINT64: return VALUE_AS(uint64) == OTHER_VALUE_AS(x, uint64); + case FV_DOUBLE: return VALUE_AS(double) == OTHER_VALUE_AS(x, double); + case FV_STRING: return VALUE_AS(string) == OTHER_VALUE_AS(x, string); + default: assert(false); return false; // unknown type + } +} + +FlagValue* FlagValue::New() const { + switch (type_) { + case FV_BOOL: return new FlagValue(new bool(false), "bool"); + case FV_INT32: return new FlagValue(new int32(0), "int32"); + case FV_INT64: return new FlagValue(new int64(0), "int64"); + case FV_UINT64: return new FlagValue(new uint64(0), "uint64"); + case FV_DOUBLE: return new FlagValue(new double(0.0), "double"); + case FV_STRING: return new FlagValue(new string, "string"); + default: assert(false); return NULL; // unknown type + } +} + +void FlagValue::CopyFrom(const FlagValue& x) { + assert(type_ == x.type_); + switch (type_) { + case FV_BOOL: SET_VALUE_AS(bool, OTHER_VALUE_AS(x, bool)); break; + case FV_INT32: SET_VALUE_AS(int32, OTHER_VALUE_AS(x, int32)); break; + case FV_INT64: SET_VALUE_AS(int64, OTHER_VALUE_AS(x, int64)); break; + case FV_UINT64: SET_VALUE_AS(uint64, OTHER_VALUE_AS(x, uint64)); break; + case FV_DOUBLE: SET_VALUE_AS(double, OTHER_VALUE_AS(x, double)); break; + case FV_STRING: SET_VALUE_AS(string, OTHER_VALUE_AS(x, string)); break; + default: assert(false); // unknown type + } +} + +// -------------------------------------------------------------------- +// CommandLineFlag +// This represents a single flag, including its name, description, +// default value, and current value. Mostly this serves as a +// struct, though it also knows how to register itself. +// All CommandLineFlags are owned by a (exactly one) +// FlagRegistry. If you wish to modify fields in this class, you +// should acquire the FlagRegistry lock for the registry that owns +// this flag. +// -------------------------------------------------------------------- + +class CommandLineFlag { + public: + // Note: we take over memory-ownership of current_val and default_val. + CommandLineFlag(const char* name, const char* help, const char* filename, + FlagValue* current_val, FlagValue* default_val); + ~CommandLineFlag(); + + const char* name() const { return name_; } + const char* help() const { return help_; } + const char* filename() const { return file_; } + const char* CleanFileName() const; // nixes irrelevant prefix such as homedir + string current_value() const { return current_->ToString(); } + string default_value() const { return defvalue_->ToString(); } + const char* type_name() const { return defvalue_->TypeName(); } + ValidateFnProto validate_function() const { return validate_fn_proto_; } + + void FillCommandLineFlagInfo(struct CommandLineFlagInfo* result); + + // If validate_fn_proto_ is non-NULL, calls it on value, returns result. + bool Validate(const FlagValue& value) const; + bool ValidateCurrent() const { return Validate(*current_); } + + private: + // for SetFlagLocked() and setting flags_by_ptr_ + friend class FlagRegistry; + friend class ::google::FlagSaverImpl; // for cloning the values + friend bool GetCommandLineOption(const char*, string*, bool*); + // set validate_fn + friend bool AddFlagValidator(const void*, ValidateFnProto); + + // This copies all the non-const members: modified, processed, defvalue, etc. + void CopyFrom(const CommandLineFlag& src); + + void UpdateModifiedBit(); + + const char* const name_; // Flag name + const char* const help_; // Help message + const char* const file_; // Which file did this come from? + bool modified_; // Set after default assignment? + FlagValue* defvalue_; // Default value for flag + FlagValue* current_; // Current value for flag + // This is a casted, 'generic' version of validate_fn, which actually + // takes a flag-value as an arg (void (*validate_fn)(bool), say). + // When we pass this to current_->Validate(), it will cast it back to + // the proper type. This may be NULL to mean we have no validate_fn. + ValidateFnProto validate_fn_proto_; + + CommandLineFlag(const CommandLineFlag&); // no copying! + void operator=(const CommandLineFlag&); +}; + +CommandLineFlag::CommandLineFlag(const char* name, const char* help, + const char* filename, + FlagValue* current_val, FlagValue* default_val) + : name_(name), help_(help), file_(filename), modified_(false), + defvalue_(default_val), current_(current_val), validate_fn_proto_(NULL) { +} + +CommandLineFlag::~CommandLineFlag() { + delete current_; + delete defvalue_; +} + +const char* CommandLineFlag::CleanFileName() const { + // Compute top-level directory & file that this appears in + // search full path backwards. + // Stop going backwards at kRootDir; and skip by the first slash. + static const char kRootDir[] = ""; // can set this to root directory, + // e.g. "myproject" + + if (sizeof(kRootDir)-1 == 0) // no prefix to strip + return filename(); + + const char* clean_name = filename() + strlen(filename()) - 1; + while ( clean_name > filename() ) { + if (*clean_name == PATH_SEPARATOR) { + if (strncmp(clean_name, kRootDir, sizeof(kRootDir)-1) == 0) { + // ".../myproject/base/logging.cc" ==> "base/logging.cc" + clean_name += sizeof(kRootDir)-1; // past "/myproject/" + break; + } + } + --clean_name; + } + while ( *clean_name == PATH_SEPARATOR ) ++clean_name; // Skip any slashes + return clean_name; +} + +void CommandLineFlag::FillCommandLineFlagInfo( + CommandLineFlagInfo* result) { + result->name = name(); + result->type = type_name(); + result->description = help(); + result->current_value = current_value(); + result->default_value = default_value(); + result->filename = CleanFileName(); + UpdateModifiedBit(); + result->is_default = !modified_; + result->has_validator_fn = validate_function() != NULL; +} + +void CommandLineFlag::UpdateModifiedBit() { + // Update the "modified" bit in case somebody bypassed the + // Flags API and wrote directly through the FLAGS_name variable. + if (!modified_ && !current_->Equal(*defvalue_)) { + modified_ = true; + } +} + +void CommandLineFlag::CopyFrom(const CommandLineFlag& src) { + // Note we only copy the non-const members; others are fixed at construct time + if (modified_ != src.modified_) modified_ = src.modified_; + if (!current_->Equal(*src.current_)) current_->CopyFrom(*src.current_); + if (!defvalue_->Equal(*src.defvalue_)) defvalue_->CopyFrom(*src.defvalue_); + if (validate_fn_proto_ != src.validate_fn_proto_) + validate_fn_proto_ = src.validate_fn_proto_; +} + +bool CommandLineFlag::Validate(const FlagValue& value) const { + if (validate_function() == NULL) + return true; + else + return value.Validate(name(), validate_function()); +} + + +// -------------------------------------------------------------------- +// FlagRegistry +// A FlagRegistry singleton object holds all flag objects indexed +// by their names so that if you know a flag's name (as a C +// string), you can access or set it. If the function is named +// FooLocked(), you must own the registry lock before calling +// the function; otherwise, you should *not* hold the lock, and +// the function will acquire it itself if needed. +// -------------------------------------------------------------------- + +struct StringCmp { // Used by the FlagRegistry map class to compare char*'s + bool operator() (const char* s1, const char* s2) const { + return (strcmp(s1, s2) < 0); + } +}; + +class FlagRegistry { + public: + FlagRegistry(); + ~FlagRegistry(); + + void Lock(); + void Unlock(); + + // Store a flag in this registry. Takes ownership of the given pointer. + void RegisterFlag(CommandLineFlag* flag); + + // Returns the flag object for the specified name, or NULL if not found. + CommandLineFlag* FindFlagLocked(const char* name); + + // Returns the flag object whose current-value is stored at flag_ptr. + // That is, for whom current_->value_buffer_ == flag_ptr + CommandLineFlag* FindFlagViaPtrLocked(const void* flag_ptr); + + // A fancier form of FindFlag that works correctly if name is of the + // form flag=value. In that case, we set key to point to flag, and + // modify v to point to the value (if present), and return the flag + // with the given name. If the flag does not exist, returns NULL + // and sets error_message. + CommandLineFlag* SplitArgumentLocked(const char* argument, + string* key, const char** v, + string* error_message); + + // Set the value of a flag. If the flag was successfully set to + // value, set msg to indicate the new flag-value, and return true. + // Otherwise, set msg to indicate the error, leave flag unchanged, + // and return false. msg can be NULL. + bool SetFlagLocked(CommandLineFlag* flag, const char* value, + FlagSettingMode set_mode, string* msg); + + static FlagRegistry* GlobalRegistry(); // returns a singleton registry + + private: + friend class ::google::FlagSaverImpl; // reads all the flags in order to copy them + friend class CommandLineFlagParser; // for ValidateAllFlags + friend void ::google::GetAllFlags(vector<CommandLineFlagInfo>*); + + // The map from name to flag, for FindFlagLocked(). + typedef map<const char*, CommandLineFlag*, StringCmp> FlagMap; + typedef FlagMap::iterator FlagIterator; + typedef FlagMap::const_iterator FlagConstIterator; + FlagMap flags_; + + // The map from current-value pointer to flag, fo FindFlagViaPtrLocked(). + typedef map<const void*, CommandLineFlag*> FlagPtrMap; + FlagPtrMap flags_by_ptr_; + +#if defined(HAVE_PTHREAD) + pthread_mutex_t lock_; +#elif defined(HAVE_INITIALIZECRITICALSECTION) + CRITICAL_SECTION lock_; +#else +#error "Need to define a mutual-exclusion object for your system" +#endif + + static FlagRegistry* global_registry_; // a singleton registry + + // If global_registry_ has not yet been initialized, this function allocates + // a new global registry. If InterlockedCompareExchange() is available, it + // does so in a thread-safe manner; otherwise, single-threaded execution (or + // serialization using pthread_once) is assumed. + static void InitGlobalRegistry(); + + // Disallow + FlagRegistry(const FlagRegistry&); + FlagRegistry& operator=(const FlagRegistry&); +}; + + +#if defined(HAVE_PTHREAD) +// The pthread.h header is available. The pthreads library may or may not be +// linked in with -lpthread. If that library is not linked in, then it is +// assumed that all operations involving command-line flags will be +// single-threaded. + +#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0) + +inline FlagRegistry::FlagRegistry() { + SAFE_PTHREAD(pthread_mutex_init(&lock_, NULL)); +} +inline FlagRegistry::~FlagRegistry() { + SAFE_PTHREAD(pthread_mutex_destroy(&lock_)); +} + +inline void FlagRegistry::Lock() { + SAFE_PTHREAD(pthread_mutex_lock(&lock_)); +} +inline void FlagRegistry::Unlock() { + SAFE_PTHREAD(pthread_mutex_unlock(&lock_)); +} + +// We want to use pthread_once here, for safety, but have to worry about +// whether libpthread is linked in or not. We declare a weak version of +// the function, so we'll always compile (if the weak version is the only +// one that ends up existing, then pthread_once will be equal to NULL). +#ifdef HAVE___ATTRIBUTE__ + // __THROW is defined in glibc systems. It means, counter-intuitively, + // "This function will never throw an exception." It's an optional + // optimization tool, but we may need to use it to match glibc prototypes. +# ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +# endif +extern "C" int pthread_once(pthread_once_t *, void (*)(void)) + __THROW __attribute__((weak)); +#endif + +FlagRegistry* FlagRegistry::GlobalRegistry() { + if (pthread_once) { // means we're linked with -lpthread + static pthread_once_t global_registry_once = PTHREAD_ONCE_INIT; + pthread_once(&global_registry_once, &InitGlobalRegistry); + } else { + // Assume single-threaded execution. + InitGlobalRegistry(); + } + return global_registry_; +} + +#elif defined(HAVE_INITIALIZECRITICALSECTION) +// The Windows version of the thread-safe code uses EnterCriticalSection and +// LeaveCriticalSection to serialize access to the registry. + +#ifndef HAVE_INTERLOCKEDCOMPAREEXCHANGE +// InitializeCriticalSection is available, but InterlockedCompareExchange +// is not. On a Windows system both should be available, and on Unix, neither +// one should be available. +#error "Please check settings for HAVE_INTERLOCKED... and HAVE_INITIALIZE..." +#endif // !HAVE_INTERLOCKEDCOMPAREEXCHANGE + +inline FlagRegistry::FlagRegistry() { InitializeCriticalSection(&lock_); } +inline FlagRegistry::~FlagRegistry() { DeleteCriticalSection(&lock_); } + +inline void FlagRegistry::Lock() { EnterCriticalSection(&lock_); } +inline void FlagRegistry::Unlock() { LeaveCriticalSection(&lock_); } + +FlagRegistry* FlagRegistry::GlobalRegistry() { + InitGlobalRegistry(); + return global_registry_; +} +#endif // !HAVE_PTHREAD && HAVE_INITIALIZECRITICALSECTION + +// Get the singleton FlagRegistry object +FlagRegistry* FlagRegistry::global_registry_ = NULL; + +void FlagRegistry::InitGlobalRegistry() { + if (!global_registry_) { +#ifdef HAVE_INTERLOCKEDCOMPAREEXCHANGE + FlagRegistry* new_global_registry = new FlagRegistry; + if (InterlockedCompareExchangePointer( + reinterpret_cast<void* volatile *>(&global_registry_), + new_global_registry, + NULL) != NULL) { + // Some other thread initialized global_registry_ first. + delete new_global_registry; + } +#else // !HAVE_INTERLOCKEDCOMPAREEXCHANGE + // Assume single-threaded execution, or else that this function call was + // serialized using pthread_once. + global_registry_ = new FlagRegistry; +#endif // HAVE_INTERLOCKEDCOMPAREEXCHANGE + } +} + +void FlagRegistry::RegisterFlag(CommandLineFlag* flag) { + Lock(); + pair<FlagIterator, bool> ins = + flags_.insert(pair<const char*, CommandLineFlag*>(flag->name(), flag)); + if (ins.second == false) { // means the name was already in the map + if (strcmp(ins.first->second->filename(), flag->filename()) != 0) { + fprintf(stderr, + "ERROR: flag '%s' was defined more than once " + "(in files '%s' and '%s').\n", + flag->name(), + ins.first->second->filename(), + flag->filename()); + } else { + fprintf(stderr, + "ERROR: something wrong with flag '%s' in file '%s'. " + "One possibility: file '%s' is being linked both statically " + "and dynamically into this executable.\n", + flag->name(), + flag->filename(), flag->filename()); + } + commandlineflags_exitfunc(1); // almost certainly exit() + } + // Also add to the flags_by_ptr_ map. + flags_by_ptr_[flag->current_->value_buffer_] = flag; + Unlock(); +} + +CommandLineFlag* FlagRegistry::FindFlagLocked(const char* name) { + FlagConstIterator i = flags_.find(name); + if (i == flags_.end()) { + return NULL; + } else { + return i->second; + } +} + +CommandLineFlag* FlagRegistry::FindFlagViaPtrLocked(const void* flag_ptr) { + FlagPtrMap::const_iterator i = flags_by_ptr_.find(flag_ptr); + if (i == flags_by_ptr_.end()) { + return NULL; + } else { + return i->second; + } +} + +CommandLineFlag* FlagRegistry::SplitArgumentLocked(const char* arg, + string* key, + const char** v, + string* error_message) { + // Find the flag object for this option + const char* flag_name; + const char* value = strchr(arg, '='); + if (value == NULL) { + key->assign(arg); + *v = NULL; + } else { + // Strip out the "=value" portion from arg + key->assign(arg, value-arg); + *v = ++value; // advance past the '=' + } + flag_name = key->c_str(); + + CommandLineFlag* flag = FindFlagLocked(flag_name); + + if (flag == NULL) { + // If we can't find the flag-name, then we should return an error. + // The one exception is if 1) the flag-name is 'nox', 2) there + // exists a flag named 'x', and 3) 'x' is a boolean flag. + // In that case, we want to return flag 'x'. + if (!(flag_name[0] == 'n' && flag_name[1] == 'o')) { + // flag-name is not 'nox', so we're not in the exception case. + *error_message = (string(kError) + + "unknown command line flag '" + *key + "'\n"); + return NULL; + } + flag = FindFlagLocked(flag_name+2); + if (flag == NULL) { + // No flag named 'x' exists, so we're not in the exception case. + *error_message = (string(kError) + + "unknown command line flag '" + *key + "'\n"); + return NULL; + } + if (strcmp(flag->type_name(), "bool") != 0) { + // 'x' exists but is not boolean, so we're not in the exception case. + *error_message = (string(kError) + + "boolean value (" + *key + ") specified for " + + flag->type_name() + " command line flag\n"); + return NULL; + } + // We're in the exception case! + // Make up a fake value to replace the "no" we stripped out + key->assign(flag_name+2); // the name without the "no" + *v = "0"; + } + + // Assign a value if this is a boolean flag + if (*v == NULL && strcmp(flag->type_name(), "bool") == 0) { + *v = "1"; // the --nox case was already handled, so this is the --x case + } + + return flag; +} + +bool TryParseLocked(const CommandLineFlag* flag, FlagValue* flag_value, + const char* value, string* msg) { + // Use tenative_value, not flag_value, until we know value is valid. + FlagValue* tentative_value = flag_value->New(); + if (!tentative_value->ParseFrom(value)) { + if (msg) { + *msg += (string(kError) + "illegal value '" + value + + + "' specified for " + flag->type_name() + " flag '" + + flag->name() + "'\n"); + } + delete tentative_value; + return false; + } else if (!flag->Validate(*tentative_value)) { + if (msg) { + *msg += (string(kError) + "failed validation of new value " + + "'" + tentative_value->ToString() + "' for flag '" + + + flag->name() + "'\n"); + } + delete tentative_value; + return false; + } else { + flag_value->CopyFrom(*tentative_value); + if (msg) { + *msg += (string(flag->name()) + " set to " + flag_value->ToString() + + "\n"); + } + delete tentative_value; + return true; + } +} + +bool FlagRegistry::SetFlagLocked(CommandLineFlag* flag, + const char* value, + FlagSettingMode set_mode, + string* msg) { + flag->UpdateModifiedBit(); + switch (set_mode) { + case SET_FLAGS_VALUE: { + // set or modify the flag's value + if (!TryParseLocked(flag, flag->current_, value, msg)) + return false; + flag->modified_ = true; + break; + } + case SET_FLAG_IF_DEFAULT: { + // set the flag's value, but only if it hasn't been set by someone else + if (!flag->modified_) { + if (!TryParseLocked(flag, flag->current_, value, msg)) + return false; + flag->modified_ = true; + } else { + *msg = string(flag->name()) + " set to " + flag->current_value(); + } + break; + } + case SET_FLAGS_DEFAULT: { + // modify the flag's default-value + if (!TryParseLocked(flag, flag->defvalue_, value, msg)) + return false; + if (!flag->modified_) { + // Need to set both defvalue *and* current, in this case + TryParseLocked(flag, flag->current_, value, NULL); + } + break; + } + default: { + // unknown set_mode + assert(false); + return false; + } + } + + return true; +} + +class FlagRegistryLock { + public: + explicit FlagRegistryLock(FlagRegistry* fr) : fr_(fr) { fr_->Lock(); } + ~FlagRegistryLock() { fr_->Unlock(); } + private: + FlagRegistry *const fr_; +}; + +// -------------------------------------------------------------------- +// CommandLineFlagParser +// Parsing is done in two stages. In the first, we go through +// argv. For every flag-like arg we can make sense of, we parse +// it and set the appropriate FLAGS_* variable. For every flag- +// like arg we can't make sense of, we store it in a vector, +// along with an explanation of the trouble. In stage 2, we +// handle the 'reporting' flags like --help and --mpm_version. +// (This is via a call to HandleCommandLineHelpFlags(), in +// gflags_reporting.cc.) +// An optional stage 3 prints out the error messages. +// This is a bit of a simplification. For instance, --flagfile +// is handled as soon as it's seen in stage 1, not in stage 2. +// -------------------------------------------------------------------- + +class CommandLineFlagParser { + public: + // The argument is the flag-registry to register the parsed flags in + explicit CommandLineFlagParser(FlagRegistry* reg) : registry_(reg) {} + ~CommandLineFlagParser() {} + + // Stage 1: Every time this is called, it reads all flags in argv. + // However, it ignores all flags that have been successfully set + // before. Typically this is only called once, so this 'reparsing' + // behavior isn't important. It can be useful when trying to + // reparse after loading a dll, though. + uint32 ParseNewCommandLineFlags(int* argc, char*** argv, bool remove_flags); + + // Stage 2: print reporting info and exit, if requested. + // In gflags_reporting.cc:HandleCommandLineHelpFlags(). + + // Stage 3: validate all the commandline flags that have validators + // registered. + void ValidateAllFlags(); + + // Stage 4: report any errors and return true if any were found. + bool ReportErrors(); + + // Set a particular command line option. "newval" is a string + // describing the new value that the option has been set to. If + // option_name does not specify a valid option name, or value is not + // a valid value for option_name, newval is empty. Does recursive + // processing for --flagfile and --fromenv. Returns the new value + // if everything went ok, or empty-string if not. (Actually, the + // return-string could hold many flag/value pairs due to --flagfile.) + // NB: Must have called registry_->Lock() before calling this function. + string ProcessSingleOptionLocked(CommandLineFlag* flag, + const char* value, + FlagSettingMode set_mode); + + // Set a whole batch of command line options as specified by contentdata, + // which is in flagfile format (and probably has been read from a flagfile). + // Returns the new value if everything went ok, or empty-string if + // not. (Actually, the return-string could hold many flag/value + // pairs due to --flagfile.) + // NB: Must have called registry_->Lock() before calling this function. + string ProcessOptionsFromStringLocked(const string& contentdata, + FlagSettingMode set_mode); + + // These are the 'recursive' flags, defined at the top of this file. + // Whenever we see these flags on the commandline, we must take action. + // These are called by ProcessSingleOptionLocked and, similarly, return + // new values if everything went ok, or the empty-string if not. + string ProcessFlagfileLocked(const string& flagval, FlagSettingMode set_mode); + // diff fromenv/tryfromenv + string ProcessFromenvLocked(const string& flagval, FlagSettingMode set_mode, + bool errors_are_fatal); + + private: + FlagRegistry* const registry_; + map<string, string> error_flags_; // map from name to error message + // This could be a set<string>, but we reuse the map to minimize the .o size + map<string, string> undefined_names_; // --[flag] name was not registered +}; + + +// Parse a list of (comma-separated) flags. +static void ParseFlagList(const char* value, vector<string>* flags) { + for (const char *p = value; p && *p; value = p) { + p = strchr(value, ','); + int len; + if (p) { + len = static_cast<int>(p - value); + p++; + } else { + len = static_cast<int>(strlen(value)); + } + + if (len == 0) { + fprintf(stderr, "ERROR: empty flaglist entry\n"); + commandlineflags_exitfunc(1); // almost certainly exit() + } + if (value[0] == '-') { + fprintf(stderr, "ERROR: flag \"%*s\" begins with '-'\n", len, value); + commandlineflags_exitfunc(1); + } + + flags->push_back(string(value, len)); + } +} + +// Snarf an entire file into a C++ string. This is just so that we +// can do all the I/O in one place and not worry about it everywhere. +// Plus, it's convenient to have the whole file contents at hand. +// Adds a newline at the end of the file. +#define PFATAL(s) do { perror(s); commandlineflags_exitfunc(1); } while (0) + +static string ReadFileIntoString(const char* filename) { + const int kBufSize = 8092; + char buffer[kBufSize]; + string s; + FILE* fp = fopen(filename, "r"); + if (!fp) PFATAL(filename); + size_t n; + while ( (n=fread(buffer, 1, kBufSize, fp)) > 0 ) { + if (ferror(fp)) PFATAL(filename); + s.append(buffer, n); + } + fclose(fp); + return s; +} + +uint32 CommandLineFlagParser::ParseNewCommandLineFlags(int* argc, char*** argv, + bool remove_flags) { + const char *program_name = strrchr((*argv)[0], PATH_SEPARATOR); // nix path + program_name = (program_name == NULL ? (*argv)[0] : program_name+1); + + int first_nonopt = *argc; // for non-options moved to the end + + registry_->Lock(); + for (int i = 1; i < first_nonopt; i++) { + char* arg = (*argv)[i]; + + // Like getopt(), we permute non-option flags to be at the end. + if (arg[0] != '-' || // must be a program argument + (arg[0] == '-' && arg[1] == '\0')) { // "-" is an argument, not a flag + memmove((*argv) + i, (*argv) + i+1, (*argc - (i+1)) * sizeof((*argv)[i])); + (*argv)[*argc-1] = arg; // we go last + first_nonopt--; // we've been pushed onto the stack + i--; // to undo the i++ in the loop + continue; + } + + if (arg[0] == '-') arg++; // allow leading '-' + if (arg[0] == '-') arg++; // or leading '--' + + // -- alone means what it does for GNU: stop options parsing + if (*arg == '\0') { + first_nonopt = i+1; + break; + } + + // Find the flag object for this option + string key; + const char* value; + string error_message; + CommandLineFlag* flag = registry_->SplitArgumentLocked(arg, &key, &value, + &error_message); + if (flag == NULL) { + undefined_names_[key] = ""; // value isn't actually used + error_flags_[key] = error_message; + continue; + } + + if (value == NULL) { + // Boolean options are always assigned a value by SplitArgumentLocked() + assert(strcmp(flag->type_name(), "bool") != 0); + if (i+1 >= first_nonopt) { + // This flag needs a value, but there is nothing available + error_flags_[key] = (string(kError) + "flag '" + (*argv)[i] + "'" + + " is missing its argument"); + if (flag->help() && flag->help()[0] > '\001') { + // Be useful in case we have a non-stripped description. + error_flags_[key] += string("; flag description: ") + flag->help(); + } + error_flags_[key] += "\n"; + break; // we treat this as an unrecoverable error + } else { + value = (*argv)[++i]; // read next arg for value + } + } + + // TODO(csilvers): only set a flag if we hadn't set it before here + ProcessSingleOptionLocked(flag, value, SET_FLAGS_VALUE); + } + registry_->Unlock(); + + if (remove_flags) { // Fix up argc and argv by removing command line flags + (*argv)[first_nonopt-1] = (*argv)[0]; + (*argv) += (first_nonopt-1); + (*argc) -= (first_nonopt-1); + first_nonopt = 1; // because we still don't count argv[0] + } + + logging_is_probably_set_up = true; // because we've parsed --logdir, etc. + + return first_nonopt; +} + +string CommandLineFlagParser::ProcessFlagfileLocked(const string& flagval, + FlagSettingMode set_mode) { + if (flagval.empty()) + return ""; + + string msg; + vector<string> filename_list; + ParseFlagList(flagval.c_str(), &filename_list); // take a list of filenames + for (size_t i = 0; i < filename_list.size(); ++i) { + const char* file = filename_list[i].c_str(); + msg += ProcessOptionsFromStringLocked(ReadFileIntoString(file), set_mode); + } + return msg; +} + +string CommandLineFlagParser::ProcessFromenvLocked(const string& flagval, + FlagSettingMode set_mode, + bool errors_are_fatal) { + if (flagval.empty()) + return ""; + + string msg; + vector<string> flaglist; + ParseFlagList(flagval.c_str(), &flaglist); + + for (size_t i = 0; i < flaglist.size(); ++i) { + const char* flagname = flaglist[i].c_str(); + CommandLineFlag* flag = registry_->FindFlagLocked(flagname); + if (flag == NULL) { + error_flags_[flagname] = (string(kError) + "unknown command line flag" + + " '" + flagname + "'" + + " (via --fromenv or --tryfromenv)\n"); + undefined_names_[flagname] = ""; + continue; + } + + const string envname = string("FLAGS_") + string(flagname); + const char* envval = getenv(envname.c_str()); + if (!envval) { + if (errors_are_fatal) { + error_flags_[flagname] = (string(kError) + envname + + " not found in environment\n"); + } + continue; + } + + // Avoid infinite recursion. + if ((strcmp(envval, "fromenv") == 0) || + (strcmp(envval, "tryfromenv") == 0)) { + error_flags_[flagname] = (string(kError) + "infinite recursion on " + + "environment flag '" + envval + "'\n"); + continue; + } + + msg += ProcessSingleOptionLocked(flag, envval, set_mode); + } + return msg; +} + +string CommandLineFlagParser::ProcessSingleOptionLocked( + CommandLineFlag* flag, const char* value, FlagSettingMode set_mode) { + string msg; + if (value && !registry_->SetFlagLocked(flag, value, set_mode, &msg)) { + error_flags_[flag->name()] = msg; + return ""; + } + + // The recursive flags, --flagfile and --fromenv and --tryfromenv, + // must be dealt with as soon as they're seen. They will emit + // messages of their own. + if (strcmp(flag->name(), "flagfile") == 0) { + msg += ProcessFlagfileLocked(FLAGS_flagfile, set_mode); + + } else if (strcmp(flag->name(), "fromenv") == 0) { + // last arg indicates envval-not-found is fatal (unlike in --tryfromenv) + msg += ProcessFromenvLocked(FLAGS_fromenv, set_mode, true); + + } else if (strcmp(flag->name(), "tryfromenv") == 0) { + msg += ProcessFromenvLocked(FLAGS_tryfromenv, set_mode, false); + } + + return msg; +} + +void CommandLineFlagParser::ValidateAllFlags() { + FlagRegistryLock frl(registry_); + for (FlagRegistry::FlagConstIterator i = registry_->flags_.begin(); + i != registry_->flags_.end(); ++i) { + if (!i->second->ValidateCurrent()) { + // only set a message if one isn't already there. (If there's + // an error message, our job is done, even if it's not exactly + // the same error.) + if (error_flags_[i->second->name()].empty()) + error_flags_[i->second->name()] = (string(kError) + + "--" + i->second->name() + + " must be set on the commandline" + + " (default value fails validation)"); + } + } +} + +bool CommandLineFlagParser::ReportErrors() { + // error_flags_ indicates errors we saw while parsing. + // But we ignore undefined-names if ok'ed by --undef_ok + if (!FLAGS_undefok.empty()) { + vector<string> flaglist; + ParseFlagList(FLAGS_undefok.c_str(), &flaglist); + for (size_t i = 0; i < flaglist.size(); ++i) + if (undefined_names_.find(flaglist[i]) != undefined_names_.end()) { + error_flags_[flaglist[i]] = ""; // clear the error message + } + } + // Likewise, if they decided to allow reparsing, all undefined-names + // are ok; we just silently ignore them now, and hope that a future + // parse will pick them up somehow. + if (allow_command_line_reparsing) { + for (map<string, string>::const_iterator it = undefined_names_.begin(); + it != undefined_names_.end(); ++it) + error_flags_[it->first] = ""; // clear the error message + } + + bool found_error = false; + for (map<string, string>::const_iterator it = error_flags_.begin(); + it != error_flags_.end(); ++it) { + if (!it->second.empty()) { + fprintf(stderr, "%s", it->second.c_str()); + found_error = true; + } + } + return found_error; +} + +string CommandLineFlagParser::ProcessOptionsFromStringLocked( + const string& contentdata, FlagSettingMode set_mode) { + string retval; + const char* flagfile_contents = contentdata.c_str(); + bool flags_are_relevant = true; // set to false when filenames don't match + bool in_filename_section = false; + + const char* line_end = flagfile_contents; + // We read this file a line at a time. + for (; line_end; flagfile_contents = line_end + 1) { + while (*flagfile_contents && isspace(*flagfile_contents)) + ++flagfile_contents; + line_end = strchr(flagfile_contents, '\n'); + size_t len = line_end ? static_cast<size_t>(line_end - flagfile_contents) + : strlen(flagfile_contents); + string line(flagfile_contents, len); + + // Each line can be one of four things: + // 1) A comment line -- we skip it + // 2) An empty line -- we skip it + // 3) A list of filenames -- starts a new filenames+flags section + // 4) A --flag=value line -- apply if previous filenames match + if (line.empty() || line[0] == '#') { + // comment or empty line; just ignore + + } else if (line[0] == '-') { // flag + in_filename_section = false; // instead, it was a flag-line + if (!flags_are_relevant) // skip this flag; applies to someone else + continue; + + const char* name_and_val = line.c_str() + 1; // skip the leading - + if (*name_and_val == '-') + name_and_val++; // skip second - too + string key; + const char* value; + string error_message; + CommandLineFlag* flag = registry_->SplitArgumentLocked(name_and_val, + &key, &value, + &error_message); + // By API, errors parsing flagfile lines are silently ignored. + if (flag == NULL) { + // "WARNING: flagname '" + key + "' not found\n" + } else if (value == NULL) { + // "WARNING: flagname '" + key + "' missing a value\n" + } else { + retval += ProcessSingleOptionLocked(flag, value, set_mode); + } + + } else { // a filename! + if (!in_filename_section) { // start over: assume filenames don't match + in_filename_section = true; + flags_are_relevant = false; + } + + // Split the line up at spaces into glob-patterns + const char* space = line.c_str(); // just has to be non-NULL + for (const char* word = line.c_str(); *space; word = space+1) { + if (flags_are_relevant) // we can stop as soon as we match + break; + space = strchr(word, ' '); + if (space == NULL) + space = word + strlen(word); + const string glob(word, space - word); + // We try matching both against the full argv0 and basename(argv0) +#ifdef HAVE_FNMATCH_H + if (fnmatch(glob.c_str(), + ProgramInvocationName(), + FNM_PATHNAME) == 0 || + fnmatch(glob.c_str(), + ProgramInvocationShortName(), + FNM_PATHNAME) == 0) { +#else // !HAVE_FNMATCH_H + if ((glob == ProgramInvocationName()) || + (glob == ProgramInvocationShortName())) { +#endif // HAVE_FNMATCH_H + flags_are_relevant = true; + } + } + } + } + return retval; +} + +// -------------------------------------------------------------------- +// GetFromEnv() +// AddFlagValidator() +// These are helper functions for routines like BoolFromEnv() and +// RegisterFlagValidator, defined below. They're defined here so +// they can live in the unnamed namespace (which makes friendship +// declarations for these classes possible). +// -------------------------------------------------------------------- + +template<typename T> +T GetFromEnv(const char *varname, const char* type, T dflt) { + const char* const valstr = getenv(varname); + if (!valstr) + return dflt; + FlagValue ifv(new T, type); + if (!ifv.ParseFrom(valstr)) { + fprintf(stderr, "ERROR: error parsing env variable '%s' with value '%s'\n", + varname, valstr); + commandlineflags_exitfunc(1); + } + return OTHER_VALUE_AS(ifv, T); +} + +bool AddFlagValidator(const void* flag_ptr, ValidateFnProto validate_fn_proto) { + // We want a lock around this routine, in case two threads try to + // add a validator (hopefully the same one!) at once. We could use + // our own thread, but we need to loook at the registry anyway, so + // we just steal that one. + FlagRegistry* const registry = FlagRegistry::GlobalRegistry(); + FlagRegistryLock frl(registry); + // First, find the flag whose current-flag storage is 'flag'. + // This is the CommandLineFlag whose current_->value_buffer_ == flag + CommandLineFlag* flag = registry->FindFlagViaPtrLocked(flag_ptr); + if (!flag) { + // WARNING << "Ignoring RegisterValidateFunction() for flag pointer " + // << flag_ptr << ": no flag found at that address"; + return false; + } else if (validate_fn_proto == flag->validate_function()) { + return true; // ok to register the same function over and over again + } else if (validate_fn_proto != NULL && flag->validate_function() != NULL) { + // WARNING << "Ignoring RegisterValidateFunction() for flag '" + // << flag->name() << "': validate-fn already registered"; + return false; + } else { + flag->validate_fn_proto_ = validate_fn_proto; + return true; + } +} + +} // end unnamed namespaces + + +// Now define the functions that are exported via the .h file + +// -------------------------------------------------------------------- +// FlagRegisterer +// This class exists merely to have a global constructor (the +// kind that runs before main(), that goes an initializes each +// flag that's been declared. Note that it's very important we +// don't have a destructor that deletes flag_, because that would +// cause us to delete current_storage/defvalue_storage as well, +// which can cause a crash if anything tries to access the flag +// values in a global destructor. +// -------------------------------------------------------------------- + +// TODO(csilvers): When we're ready to have this error be a fatal one, +// change this to give a compilation error (via COMPILE_ASSERT(false)). +bool FlagsTypeWarn(const char *name) { + cerr << "Flag " << name << " is of type bool, but its default" + << " value is not a boolean. NOTE: This will soon be a" + << " compilations error!"; + return false; +} + +FlagRegisterer::FlagRegisterer(const char* name, const char* type, + const char* help, const char* filename, + void* current_storage, void* defvalue_storage) { + if (help == NULL) + help = ""; + // FlagValue expects the type-name to not include any namespace + // components, so we get rid of those, if any. + if (strchr(type, ':')) + type = strrchr(type, ':') + 1; + FlagValue* current = new FlagValue(current_storage, type); + FlagValue* defvalue = new FlagValue(defvalue_storage, type); + // Importantly, flag_ will never be deleted, so storage is always good. + CommandLineFlag* flag = new CommandLineFlag(name, help, filename, + current, defvalue); + FlagRegistry::GlobalRegistry()->RegisterFlag(flag); // default registry +} + +// -------------------------------------------------------------------- +// GetAllFlags() +// The main way the FlagRegistry class exposes its data. This +// returns, as strings, all the info about all the flags in +// the main registry, sorted first by filename they are defined +// in, and then by flagname. +// -------------------------------------------------------------------- + +struct FilenameFlagnameCmp { + bool operator()(const CommandLineFlagInfo& a, + const CommandLineFlagInfo& b) const { + int cmp = strcmp(a.filename.c_str(), b.filename.c_str()); + if (cmp == 0) + cmp = strcmp(a.name.c_str(), b.name.c_str()); // secondary sort key + return cmp < 0; + } +}; + +void GetAllFlags(vector<CommandLineFlagInfo>* OUTPUT) { + FlagRegistry* const registry = FlagRegistry::GlobalRegistry(); + registry->Lock(); + for (FlagRegistry::FlagConstIterator i = registry->flags_.begin(); + i != registry->flags_.end(); ++i) { + CommandLineFlagInfo fi; + i->second->FillCommandLineFlagInfo(&fi); + OUTPUT->push_back(fi); + } + registry->Unlock(); + // Now sort the flags, first by filename they occur in, then alphabetically + sort(OUTPUT->begin(), OUTPUT->end(), FilenameFlagnameCmp()); +} + +// -------------------------------------------------------------------- +// SetArgv() +// GetArgvs() +// GetArgv() +// GetArgv0() +// ProgramInvocationName() +// ProgramInvocationShortName() +// SetUsageMessage() +// ProgramUsage() +// Functions to set and get argv. Typically the setter is called +// by ParseCommandLineFlags. Also can get the ProgramUsage string, +// set by SetUsageMessage. +// -------------------------------------------------------------------- + +// These values are not protected by a Mutex because they are normally +// set only once during program startup. +static const char* argv0 = "UNKNOWN"; // just the program name +static const char* cmdline = ""; // the entire command-line +static vector<string> argvs; +static uint32 argv_sum = 0; +static const char* program_usage = NULL; + +void SetArgv(int argc, const char** argv) { + static bool called_set_argv = false; + if (called_set_argv) // we already have an argv for you + return; + + called_set_argv = true; + + assert(argc > 0); // every program has at least a progname + argv0 = strdup(argv[0]); // small memory leak, but fn only called once + assert(argv0); + + string cmdline_string; // easier than doing strcats + for (int i = 0; i < argc; i++) { + if (i != 0) { + cmdline_string += " "; + } + cmdline_string += argv[i]; + argvs.push_back(argv[i]); + } + cmdline = strdup(cmdline_string.c_str()); // another small memory leak + assert(cmdline); + + // Compute a simple sum of all the chars in argv + for (const char* c = cmdline; *c; c++) + argv_sum += *c; +} + +const vector<string>& GetArgvs() { return argvs; } +const char* GetArgv() { return cmdline; } +const char* GetArgv0() { return argv0; } +uint32 GetArgvSum() { return argv_sum; } +const char* ProgramInvocationName() { // like the GNU libc fn + return GetArgv0(); +} +const char* ProgramInvocationShortName() { // like the GNU libc fn + const char* slash = strrchr(argv0, '/'); +#ifdef OS_WINDOWS + if (!slash) slash = strrchr(argv0, '\\'); +#endif + return slash ? slash + 1 : argv0; +} + +void SetUsageMessage(const string& usage) { + if (program_usage != NULL) { + fprintf(stderr, "ERROR: SetUsageMessage() called twice\n"); + exit(1); + } + program_usage = strdup(usage.c_str()); // small memory leak +} + +const char* ProgramUsage() { + if (program_usage) { + return program_usage; + } + return "Warning: SetUsageMessage() never called"; +} + +// -------------------------------------------------------------------- +// GetCommandLineOption() +// GetCommandLineFlagInfo() +// GetCommandLineFlagInfoOrDie() +// SetCommandLineOption() +// SetCommandLineOptionWithMode() +// The programmatic way to set a flag's value, using a string +// for its name rather than the variable itself (that is, +// SetCommandLineOption("foo", x) rather than FLAGS_foo = x). +// There's also a bit more flexibility here due to the various +// set-modes, but typically these are used when you only have +// that flag's name as a string, perhaps at runtime. +// All of these work on the default, global registry. +// For GetCommandLineOption, return false if no such flag +// is known, true otherwise. We clear "value" if a suitable +// flag is found. +// -------------------------------------------------------------------- + + +bool GetCommandLineOption(const char* name, string* value) { + if (NULL == name) + return false; + assert(value); + + FlagRegistry* const registry = FlagRegistry::GlobalRegistry(); + FlagRegistryLock frl(registry); + CommandLineFlag* flag = registry->FindFlagLocked(name); + if (flag == NULL) { + return false; + } else { + *value = flag->current_value(); + return true; + } +} + +bool GetCommandLineFlagInfo(const char* name, CommandLineFlagInfo* OUTPUT) { + if (NULL == name) return false; + FlagRegistry* const registry = FlagRegistry::GlobalRegistry(); + FlagRegistryLock frl(registry); + CommandLineFlag* flag = registry->FindFlagLocked(name); + if (flag == NULL) { + return false; + } else { + assert(OUTPUT); + flag->FillCommandLineFlagInfo(OUTPUT); + return true; + } +} + +CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name) { + CommandLineFlagInfo info; + if (!GetCommandLineFlagInfo(name, &info)) { + fprintf(stderr, "FATAL ERROR: flag name '%s' doesn't exit", name); + commandlineflags_exitfunc(1); // almost certainly exit() + } + return info; +} + +string SetCommandLineOptionWithMode(const char* name, const char* value, + FlagSettingMode set_mode) { + string result; + FlagRegistry* const registry = FlagRegistry::GlobalRegistry(); + FlagRegistryLock frl(registry); + CommandLineFlag* flag = registry->FindFlagLocked(name); + if (flag) { + CommandLineFlagParser parser(registry); + result = parser.ProcessSingleOptionLocked(flag, value, set_mode); + if (!result.empty()) { // in the error case, we've already logged + // You could consider logging this change, if you wanted to know it: + //fprintf(stderr, "%sFLAGS_%s\n", + // (set_mode == SET_FLAGS_DEFAULT ? "default value of " : ""), + // result); + } + } + // The API of this function is that we return empty string on error + return result; +} + +string SetCommandLineOption(const char* name, const char* value) { + return SetCommandLineOptionWithMode(name, value, SET_FLAGS_VALUE); +} + +// -------------------------------------------------------------------- +// FlagSaver +// FlagSaverImpl +// This class stores the states of all flags at construct time, +// and restores all flags to that state at destruct time. +// Its major implementation challenge is that it never modifies +// pointers in the 'main' registry, so global FLAG_* vars always +// point to the right place. +// -------------------------------------------------------------------- + +class FlagSaverImpl { + public: + // Constructs an empty FlagSaverImpl object. + explicit FlagSaverImpl(FlagRegistry* main_registry) + : main_registry_(main_registry) { } + ~FlagSaverImpl() { + // reclaim memory from each of our CommandLineFlags + vector<CommandLineFlag*>::const_iterator it; + for (it = backup_registry_.begin(); it != backup_registry_.end(); ++it) + delete *it; + } + + // Saves the flag states from the flag registry into this object. + // It's an error to call this more than once. + // Must be called when the registry mutex is not held. + void SaveFromRegistry() { + FlagRegistryLock frl(main_registry_); + assert(backup_registry_.empty()); // call only once! + for (FlagRegistry::FlagConstIterator it = main_registry_->flags_.begin(); + it != main_registry_->flags_.end(); + ++it) { + const CommandLineFlag* main = it->second; + // Sets up all the const variables in backup correctly + CommandLineFlag* backup = new CommandLineFlag( + main->name(), main->help(), main->filename(), + main->current_->New(), main->defvalue_->New()); + // Sets up all the non-const variables in backup correctly + backup->CopyFrom(*main); + backup_registry_.push_back(backup); // add it to a convenient list + } + } + + // Restores the saved flag states into the flag registry. We + // assume no flags were added or deleted from the registry since + // the SaveFromRegistry; if they were, that's trouble! Must be + // called when the registry mutex is not held. + void RestoreToRegistry() { + FlagRegistryLock frl(main_registry_); + vector<CommandLineFlag*>::const_iterator it; + for (it = backup_registry_.begin(); it != backup_registry_.end(); ++it) { + CommandLineFlag* main = main_registry_->FindFlagLocked((*it)->name()); + if (main != NULL) { // if NULL, flag got deleted from registry(!) + main->CopyFrom(**it); + } + } + } + + private: + FlagRegistry* const main_registry_; + vector<CommandLineFlag*> backup_registry_; + + FlagSaverImpl(const FlagSaverImpl&); // no copying! + void operator=(const FlagSaverImpl&); +}; + +FlagSaver::FlagSaver() + : impl_(new FlagSaverImpl(FlagRegistry::GlobalRegistry())) { + impl_->SaveFromRegistry(); +} + +FlagSaver::~FlagSaver() { + impl_->RestoreToRegistry(); + delete impl_; +} + + +// -------------------------------------------------------------------- +// CommandlineFlagsIntoString() +// ReadFlagsFromString() +// AppendFlagsIntoFile() +// ReadFromFlagsFile() +// These are mostly-deprecated routines that stick the +// commandline flags into a file/string and read them back +// out again. I can see a use for CommandlineFlagsIntoString, +// for creating a flagfile, but the rest don't seem that useful +// -- some, I think, are a poor-man's attempt at FlagSaver -- +// and are included only until we can delete them from callers. +// Note they don't save --flagfile flags (though they do save +// the result of having called the flagfile, of course). +// -------------------------------------------------------------------- + +static string TheseCommandlineFlagsIntoString( + const vector<CommandLineFlagInfo>& flags) { + vector<CommandLineFlagInfo>::const_iterator i; + + size_t retval_space = 0; + for (i = flags.begin(); i != flags.end(); ++i) { + // An (over)estimate of how much space it will take to print this flag + retval_space += i->name.length() + i->current_value.length() + 5; + } + + string retval; + retval.reserve(retval_space); + for (i = flags.begin(); i != flags.end(); ++i) { + retval += "--"; + retval += i->name; + retval += "="; + retval += i->current_value; + retval += "\n"; + } + return retval; +} + +string CommandlineFlagsIntoString() { + vector<CommandLineFlagInfo> sorted_flags; + GetAllFlags(&sorted_flags); + return TheseCommandlineFlagsIntoString(sorted_flags); +} + +bool ReadFlagsFromString(const string& flagfilecontents, + const char* /*prog_name*/, // TODO(csilvers): nix this + bool errors_are_fatal) { + FlagRegistry* const registry = FlagRegistry::GlobalRegistry(); + FlagSaverImpl saved_states(registry); + saved_states.SaveFromRegistry(); + + CommandLineFlagParser parser(registry); + registry->Lock(); + parser.ProcessOptionsFromStringLocked(flagfilecontents, SET_FLAGS_VALUE); + registry->Unlock(); + // Should we handle --help and such when reading flags from a string? Sure. + HandleCommandLineHelpFlags(); + if (parser.ReportErrors()) { + // Error. Restore all global flags to their previous values. + if (errors_are_fatal) + commandlineflags_exitfunc(1); // almost certainly exit() + saved_states.RestoreToRegistry(); + return false; + } + return true; +} + +// TODO(csilvers): nix prog_name in favor of ProgramInvocationShortName() +bool AppendFlagsIntoFile(const string& filename, const char *prog_name) { + FILE *fp = fopen(filename.c_str(), "a"); + if (!fp) { + return false; + } + + if (prog_name) + fprintf(fp, "%s\n", prog_name); + + vector<CommandLineFlagInfo> flags; + GetAllFlags(&flags); + // But we don't want --flagfile, which leads to weird recursion issues + vector<CommandLineFlagInfo>::iterator i; + for (i = flags.begin(); i != flags.end(); ++i) { + if (strcmp(i->name.c_str(), "flagfile") == 0) { + flags.erase(i); + break; + } + } + fprintf(fp, "%s", TheseCommandlineFlagsIntoString(flags).c_str()); + + fclose(fp); + return true; +} + +bool ReadFromFlagsFile(const string& filename, const char* prog_name, + bool errors_are_fatal) { + return ReadFlagsFromString(ReadFileIntoString(filename.c_str()), + prog_name, errors_are_fatal); +} + + +// -------------------------------------------------------------------- +// BoolFromEnv() +// Int32FromEnv() +// Int64FromEnv() +// Uint64FromEnv() +// DoubleFromEnv() +// StringFromEnv() +// Reads the value from the environment and returns it. +// We use an FlagValue to make the parsing easy. +// Example usage: +// DEFINE_bool(myflag, BoolFromEnv("MYFLAG_DEFAULT", false), "whatever"); +// -------------------------------------------------------------------- + +bool BoolFromEnv(const char *v, bool dflt) { + return GetFromEnv(v, "bool", dflt); +} +int32 Int32FromEnv(const char *v, int32 dflt) { + return GetFromEnv(v, "int32", dflt); +} +int64 Int64FromEnv(const char *v, int64 dflt) { + return GetFromEnv(v, "int64", dflt); +} +uint64 Uint64FromEnv(const char *v, uint64 dflt) { + return GetFromEnv(v, "uint64", dflt); +} +double DoubleFromEnv(const char *v, double dflt) { + return GetFromEnv(v, "double", dflt); +} +const char *StringFromEnv(const char *varname, const char *dflt) { + const char* const val = getenv(varname); + return val ? val : dflt; +} + + +// -------------------------------------------------------------------- +// RegisterFlagValidator() +// RegisterFlagValidator() is the function that clients use to +// 'decorate' a flag with a validation function. Once this is +// done, every time the flag is set (including when the flag +// is parsed from argv), the validator-function is called. +// These functions return true if the validator was added +// successfully, or false if not: the flag already has a validator, +// (only one allowed per flag), the 1st arg isn't a flag, etc. +// This function is not thread-safe. +// -------------------------------------------------------------------- + +bool RegisterFlagValidator(const bool* flag, + bool (*validate_fn)(const char*, bool)) { + return AddFlagValidator(flag, reinterpret_cast<ValidateFnProto>(validate_fn)); +} +bool RegisterFlagValidator(const int32* flag, + bool (*validate_fn)(const char*, int32)) { + return AddFlagValidator(flag, reinterpret_cast<ValidateFnProto>(validate_fn)); +} +bool RegisterFlagValidator(const int64* flag, + bool (*validate_fn)(const char*, int64)) { + return AddFlagValidator(flag, reinterpret_cast<ValidateFnProto>(validate_fn)); +} +bool RegisterFlagValidator(const uint64* flag, + bool (*validate_fn)(const char*, uint64)) { + return AddFlagValidator(flag, reinterpret_cast<ValidateFnProto>(validate_fn)); +} +bool RegisterFlagValidator(const double* flag, + bool (*validate_fn)(const char*, double)) { + return AddFlagValidator(flag, reinterpret_cast<ValidateFnProto>(validate_fn)); +} +bool RegisterFlagValidator(const string* flag, + bool (*validate_fn)(const char*, const string&)) { + return AddFlagValidator(flag, reinterpret_cast<ValidateFnProto>(validate_fn)); +} + + +// -------------------------------------------------------------------- +// ParseCommandLineFlags() +// ParseCommandLineNonHelpFlags() +// HandleCommandLineHelpFlags() +// This is the main function called from main(), to actually +// parse the commandline. It modifies argc and argv as described +// at the top of gflags.h. You can also divide this +// function into two parts, if you want to do work between +// the parsing of the flags and the printing of any help output. +// -------------------------------------------------------------------- + +static uint32 ParseCommandLineFlagsInternal(int* argc, char*** argv, + bool remove_flags, bool do_report) { + SetArgv(*argc, const_cast<const char**>(*argv)); // save it for later + + FlagRegistry* const registry = FlagRegistry::GlobalRegistry(); + CommandLineFlagParser parser(registry); + + // When we parse the commandline flags, we'll handle --flagfile, + // --tryfromenv, etc. as we see them (since flag-evaluation order + // may be important). But sometimes apps set FLAGS_tryfromenv/etc. + // manually before calling ParseCommandLineFlags. We want to evaluate + // those too, as if they were the first flags on the commandline. + registry->Lock(); + parser.ProcessFlagfileLocked(FLAGS_flagfile, SET_FLAGS_VALUE); + // Last arg here indicates whether flag-not-found is a fatal error or not + parser.ProcessFromenvLocked(FLAGS_fromenv, SET_FLAGS_VALUE, true); + parser.ProcessFromenvLocked(FLAGS_tryfromenv, SET_FLAGS_VALUE, false); + registry->Unlock(); + + // Now get the flags specified on the commandline + const int r = parser.ParseNewCommandLineFlags(argc, argv, remove_flags); + + if (do_report) + HandleCommandLineHelpFlags(); // may cause us to exit on --help, etc. + + // See if any of the unset flags fail their validation checks + parser.ValidateAllFlags(); + + if (parser.ReportErrors()) // may cause us to exit on illegal flags + commandlineflags_exitfunc(1); // almost certainly exit() + return r; +} + +uint32 ParseCommandLineFlags(int* argc, char*** argv, bool remove_flags) { + return ParseCommandLineFlagsInternal(argc, argv, remove_flags, true); +} + +uint32 ParseCommandLineNonHelpFlags(int* argc, char*** argv, + bool remove_flags) { + return ParseCommandLineFlagsInternal(argc, argv, remove_flags, false); +} + +// -------------------------------------------------------------------- +// AllowCommandLineReparsing() +// ReparseCommandLineNonHelpFlags() +// This is most useful for shared libraries. The idea is if +// a flag is defined in a shared library that is dlopen'ed +// sometime after main(), you can ParseCommandLineFlags before +// the dlopen, then ReparseCommandLineNonHelpFlags() after the +// dlopen, to get the new flags. But you have to explicitly +// Allow() it; otherwise, you get the normal default behavior +// of unrecognized flags calling a fatal error. +// TODO(csilvers): this isn't used. Just delete it? +// -------------------------------------------------------------------- + +void AllowCommandLineReparsing() { + allow_command_line_reparsing = true; +} + +uint32 ReparseCommandLineNonHelpFlags() { + // We make a copy of argc and argv to pass in + const vector<string>& argvs = GetArgvs(); + int tmp_argc = static_cast<int>(argvs.size()); + char** tmp_argv = new char* [tmp_argc + 1]; + for (int i = 0; i < tmp_argc; ++i) + tmp_argv[i] = strdup(argvs[i].c_str()); // TODO(csilvers): don't dup + + const int retval = ParseCommandLineNonHelpFlags(&tmp_argc, &tmp_argv, false); + + for (int i = 0; i < tmp_argc; ++i) + free(tmp_argv[i]); + delete[] tmp_argv; + + return retval; +} + +} // namespace google diff --git a/src/gflags/gflags.h b/src/gflags/gflags.h new file mode 100644 index 0000000..3fef830 --- /dev/null +++ b/src/gflags/gflags.h @@ -0,0 +1,519 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ray Sidney +// Revamped and reorganized by Craig Silverstein +// +// This is the file that should be included by any file which declares +// or defines a command line flag or wants to parse command line flags +// or print a program usage message (which will include information about +// flags). Executive summary, in the form of an example foo.cc file: +// +// #include "foo.h" // foo.h has a line "DECLARE_int32(start);" +// +// DEFINE_int32(end, 1000, "The last record to read"); +// DECLARE_bool(verbose); // some other file has a DEFINE_bool(verbose, ...) +// +// void MyFunc() { +// if (FLAGS_verbose) printf("Records %d-%d\n", FLAGS_start, FLAGS_end); +// } +// +// Then, at the command-line: +// ./foo --noverbose --start=5 --end=100 +// +// For more details, see +// doc/gflags.html +// +// --- A note about thread-safety: +// +// We describe many functions in this routine as being thread-hostile, +// thread-compatible, or thread-safe. Here are the meanings we use: +// +// thread-safe: it is safe for multiple threads to call this routine +// (or, when referring to a class, methods of this class) +// concurrently. +// thread-hostile: it is not safe for multiple threads to call this +// routine (or methods of this class) concurrently. In gflags, +// most thread-hostile routines are intended to be called early in, +// or even before, main() -- that is, before threads are spawned. +// thread-compatible: it is safe for multiple threads to read from +// this variable (when applied to variables), or to call const +// methods of this class (when applied to classes), as long as no +// other thread is writing to the variable or calling non-const +// methods of this class. + +#ifndef GOOGLE_GFLAGS_H_ +#define GOOGLE_GFLAGS_H_ + +#include "config.h" +#include <string> +#include <vector> + +// We care a lot about number of bits things take up. Unfortunately, +// systems define their bit-specific ints in a lot of different ways. +// We use our own way, and have a typedef to get there. +#if defined(HAVE_STDINT_H) +#include <stdint.h> // the normal place uint16_t is defined +#elif defined(HAVE_SYSTYPES_H) +#include <sys/types.h> // the normal place u_int16_t is defined +#elif defined(HAVE_INTTYPES_H) +#include <inttypes.h> // a third place for uint16_t or u_int16_t +#endif + +namespace google { + +#if defined(HAVE_UINT16_T) // the C99 format +typedef int32_t int32; +typedef uint32_t uint32; +typedef int64_t int64; +typedef uint64_t uint64; +#elif defined(HAVE_U_INT16_T) // the BSD format +typedef int32_t int32; +typedef u_int32_t uint32; +typedef int64_t int64; +typedef u_int64_t uint64; +#elif defined(HAVE___INT16) // the windows (vc++) format +typedef __int32 int32; +typedef unsigned __int32 uint32; +typedef __int64 int64; +typedef unsigned __int64 uint64; +#else +#error Do not know how to define a 32-bit integer quantity on your system +#endif + +// -------------------------------------------------------------------- +// To actually define a flag in a file, use DEFINE_bool, +// DEFINE_string, etc. at the bottom of this file. You may also find +// it useful to register a validator with the flag. This ensures that +// when the flag is parsed from the commandline, or is later set via +// SetCommandLineOption, we call the validation function. The +// validation function should return true if the flag value is valid, +// and false otherwise. +// +// This function is safe to call at global construct time (as in the +// example below). +// +// Example use: +// static bool ValidatePort(const char* flagname, int32 value) { +// if (value > 0 && value < 32768) // value is ok +// return true; +// printf("Invalid value for --%s: %d\n", flagname, (int)value); +// return false; +// } +// DEFINE_int32(port, 0, "What port to listen on"); +// static bool dummy = RegisterFlagValidator(&FLAGS_port, &ValidatePort); + +// Returns true if successfully registered, false if not (because the +// first argument doesn't point to a command-line flag, or because a +// validator is already registered for this flag). +bool RegisterFlagValidator(const bool* flag, + bool (*validate_fn)(const char*, bool)); +bool RegisterFlagValidator(const int32* flag, + bool (*validate_fn)(const char*, int32)); +bool RegisterFlagValidator(const int64* flag, + bool (*validate_fn)(const char*, int64)); +bool RegisterFlagValidator(const uint64* flag, + bool (*validate_fn)(const char*, uint64)); +bool RegisterFlagValidator(const double* flag, + bool (*validate_fn)(const char*, double)); +bool RegisterFlagValidator(const std::string* flag, + bool (*validate_fn)(const char*, const std::string&)); + + +// -------------------------------------------------------------------- +// These methods are the best way to get access to info about the +// list of commandline flags. Note that these routines are pretty slow. +// GetAllFlags: mostly-complete info about the list, sorted by file. +// ShowUsageWithFlags: pretty-prints the list to stdout (what --help does) +// ShowUsageWithFlagsRestrict: limit to filenames with restrict as a substr +// +// In addition to accessing flags, you can also access argv[0] (the program +// name) and argv (the entire commandline), which we sock away a copy of. +// These variables are static, so you should only set them once. + +struct CommandLineFlagInfo { + std::string name; // the name of the flag + std::string type; // the type of the flag: int32, etc + std::string description; // the "help text" associated with the flag + std::string current_value; // the current value, as a string + std::string default_value; // the default value, as a string + std::string filename; // 'cleaned' version of filename holding the flag + bool has_validator_fn; // true if RegisterFlagValidator called on flag + bool is_default; // true if the flag has default value +}; + +extern void GetAllFlags(std::vector<CommandLineFlagInfo>* OUTPUT); +// These two are actually defined in commandlineflags_reporting.cc. +extern void ShowUsageWithFlags(const char *argv0); // what --help does +extern void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict); + +// Create a descriptive string for a flag. +// Goes to some trouble to make pretty line breaks. +extern std::string DescribeOneFlag(const CommandLineFlagInfo& flag); + +// Thread-hostile; meant to be called before any threads are spawned. +extern void SetArgv(int argc, const char** argv); +// The following functions are thread-safe as long as SetArgv() is +// only called before any threads start. +extern const std::vector<std::string>& GetArgvs(); // all of argv as a vector +extern const char* GetArgv(); // all of argv as a string +extern const char* GetArgv0(); // only argv0 +extern uint32 GetArgvSum(); // simple checksum of argv +extern const char* ProgramInvocationName(); // argv0, or "UNKNOWN" if not set +extern const char* ProgramInvocationShortName(); // basename(argv0) +// ProgramUsage() is thread-safe as long as SetUsageMessage() is only +// called before any threads start. +extern const char* ProgramUsage(); // string set by SetUsageMessage() + + +// -------------------------------------------------------------------- +// Normally you access commandline flags by just saying "if (FLAGS_foo)" +// or whatever, and set them by calling "FLAGS_foo = bar" (or, more +// commonly, via the DEFINE_foo macro). But if you need a bit more +// control, we have programmatic ways to get/set the flags as well. +// These programmatic ways to access flags are thread-safe, but direct +// access is only thread-compatible. + +// Return true iff the flagname was found. +// OUTPUT is set to the flag's value, or unchanged if we return false. +extern bool GetCommandLineOption(const char* name, std::string* OUTPUT); + +// Return true iff the flagname was found. OUTPUT is set to the flag's +// CommandLineFlagInfo or unchanged if we return false. +extern bool GetCommandLineFlagInfo(const char* name, + CommandLineFlagInfo* OUTPUT); + +// Return the CommandLineFlagInfo of the flagname. exit() if name not found. +// Example usage, to check if a flag's value is currently the default value: +// if (GetCommandLineFlagInfoOrDie("foo").is_default) ... +extern CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name); + +enum FlagSettingMode { + // update the flag's value (can call this multiple times). + SET_FLAGS_VALUE, + // update the flag's value, but *only if* it has not yet been updated + // with SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef". + SET_FLAG_IF_DEFAULT, + // set the flag's default value to this. If the flag has not yet updated + // yet (via SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef") + // change the flag's current value to the new default value as well. + SET_FLAGS_DEFAULT +}; + +// Set a particular flag ("command line option"). Returns a string +// describing the new value that the option has been set to. The +// return value API is not well-specified, so basically just depend on +// it to be empty if the setting failed for some reason -- the name is +// not a valid flag name, or the value is not a valid value -- and +// non-empty else. + +// SetCommandLineOption uses set_mode == SET_FLAGS_VALUE (the common case) +extern std::string SetCommandLineOption(const char* name, const char* value); +extern std::string SetCommandLineOptionWithMode(const char* name, const char* value, + FlagSettingMode set_mode); + + +// -------------------------------------------------------------------- +// Saves the states (value, default value, whether the user has set +// the flag, registered validators, etc) of all flags, and restores +// them when the FlagSaver is destroyed. This is very useful in +// tests, say, when you want to let your tests change the flags, but +// make sure that they get reverted to the original states when your +// test is complete. +// +// Example usage: +// void TestFoo() { +// FlagSaver s1; +// FLAG_foo = false; +// FLAG_bar = "some value"; +// +// // test happens here. You can return at any time +// // without worrying about restoring the FLAG values. +// } +// +// Note: This class is marked with __attribute__((unused)) because all the +// work is done in the constructor and destructor, so in the standard +// usage example above, the compiler would complain that it's an +// unused variable. +// +// This class is thread-safe. + +class FlagSaver { + public: + FlagSaver(); + ~FlagSaver(); + + private: + class FlagSaverImpl* impl_; // we use pimpl here to keep API steady + + FlagSaver(const FlagSaver&); // no copying! + void operator=(const FlagSaver&); +#if HAVE___ATTRIBUTE__ +} __attribute__ ((unused)); +#else // !HAVE___ATTRIBUTE__ +}; +#endif // HAVE___ATTRIBUTE__ + +// -------------------------------------------------------------------- +// Some deprecated or hopefully-soon-to-be-deprecated functions. + +// This is often used for logging. TODO(csilvers): figure out a better way +extern std::string CommandlineFlagsIntoString(); +// Usually where this is used, a FlagSaver should be used instead. +extern bool ReadFlagsFromString(const std::string& flagfilecontents, + const char* prog_name, + bool errors_are_fatal); // uses SET_FLAGS_VALUE + +// These let you manually implement --flagfile functionality. +// DEPRECATED. +extern bool AppendFlagsIntoFile(const std::string& filename, const char* prog_name); +extern bool SaveCommandFlags(); // actually defined in google.cc ! +extern bool ReadFromFlagsFile(const std::string& filename, const char* prog_name, + bool errors_are_fatal); // uses SET_FLAGS_VALUE + + +// -------------------------------------------------------------------- +// Useful routines for initializing flags from the environment. +// In each case, if 'varname' does not exist in the environment +// return defval. If 'varname' does exist but is not valid +// (e.g., not a number for an int32 flag), abort with an error. +// Otherwise, return the value. NOTE: for booleans, for true use +// 't' or 'T' or 'true' or '1', for false 'f' or 'F' or 'false' or '0'. + +extern bool BoolFromEnv(const char *varname, bool defval); +extern int32 Int32FromEnv(const char *varname, int32 defval); +extern int64 Int64FromEnv(const char *varname, int64 defval); +extern uint64 Uint64FromEnv(const char *varname, uint64 defval); +extern double DoubleFromEnv(const char *varname, double defval); +extern const char *StringFromEnv(const char *varname, const char *defval); + + +// -------------------------------------------------------------------- +// The next two functions parse commandlineflags from main(): + +// Set the "usage" message for this program. For example: +// string usage("This program does nothing. Sample usage:\n"); +// usage += argv[0] + " <uselessarg1> <uselessarg2>"; +// SetUsageMessage(usage); +// Do not include commandline flags in the usage: we do that for you! +// Thread-hostile; meant to be called before any threads are spawned. +extern void SetUsageMessage(const std::string& usage); + +// Looks for flags in argv and parses them. Rearranges argv to put +// flags first, or removes them entirely if remove_flags is true. +// If a flag is defined more than once in the command line or flag +// file, the last definition is used. +// See top-of-file for more details on this function. +#ifndef SWIG // In swig, use ParseCommandLineFlagsScript() instead. +extern uint32 ParseCommandLineFlags(int *argc, char*** argv, + bool remove_flags); +#endif + + +// Calls to ParseCommandLineNonHelpFlags and then to +// HandleCommandLineHelpFlags can be used instead of a call to +// ParseCommandLineFlags during initialization, in order to allow for +// changing default values for some FLAGS (via +// e.g. SetCommandLineOptionWithMode calls) between the time of +// command line parsing and the time of dumping help information for +// the flags as a result of command line parsing. +// If a flag is defined more than once in the command line or flag +// file, the last definition is used. +extern uint32 ParseCommandLineNonHelpFlags(int *argc, char*** argv, + bool remove_flags); +// This is actually defined in commandlineflags_reporting.cc. +// This function is misnamed (it also handles --version, etc.), but +// it's too late to change that now. :-( +extern void HandleCommandLineHelpFlags(); // in commandlineflags_reporting.cc + +// Allow command line reparsing. Disables the error normally +// generated when an unknown flag is found, since it may be found in a +// later parse. Thread-hostile; meant to be called before any threads +// are spawned. +extern void AllowCommandLineReparsing(); + +// Reparse the flags that have not yet been recognized. +// Only flags registered since the last parse will be recognized. +// Any flag value must be provided as part of the argument using "=", +// not as a separate command line argument that follows the flag argument. +// Intended for handling flags from dynamically loaded libraries, +// since their flags are not registered until they are loaded. +extern uint32 ReparseCommandLineNonHelpFlags(); + + +// -------------------------------------------------------------------- +// Now come the command line flag declaration/definition macros that +// will actually be used. They're kind of hairy. A major reason +// for this is initialization: we want people to be able to access +// variables in global constructors and have that not crash, even if +// their global constructor runs before the global constructor here. +// (Obviously, we can't guarantee the flags will have the correct +// default value in that case, but at least accessing them is safe.) +// The only way to do that is have flags point to a static buffer. +// So we make one, using a union to ensure proper alignment, and +// then use placement-new to actually set up the flag with the +// correct default value. In the same vein, we have to worry about +// flag access in global destructors, so FlagRegisterer has to be +// careful never to destroy the flag-values it constructs. +// +// Note that when we define a flag variable FLAGS_<name>, we also +// preemptively define a junk variable, FLAGS_no<name>. This is to +// cause a link-time error if someone tries to define 2 flags with +// names like "logging" and "nologging". We do this because a bool +// flag FLAG can be set from the command line to true with a "-FLAG" +// argument, and to false with a "-noFLAG" argument, and so this can +// potentially avert confusion. +// +// We also put flags into their own namespace. It is purposefully +// named in an opaque way that people should have trouble typing +// directly. The idea is that DEFINE puts the flag in the weird +// namespace, and DECLARE imports the flag from there into the current +// namespace. The net result is to force people to use DECLARE to get +// access to a flag, rather than saying "extern bool FLAGS_whatever;" +// or some such instead. We want this so we can put extra +// functionality (like sanity-checking) in DECLARE if we want, and +// make sure it is picked up everywhere. +// +// We also put the type of the variable in the namespace, so that +// people can't DECLARE_int32 something that they DEFINE_bool'd +// elsewhere. + +class FlagRegisterer { + public: + FlagRegisterer(const char* name, const char* type, + const char* help, const char* filename, + void* current_storage, void* defvalue_storage); +}; + +#ifndef SWIG // In swig, ignore the main flag declarations + +// If your application #defines STRIP_FLAG_HELP to a non-zero value +// before #including this file, we remove the help message from the +// binary file. This can reduce the size of the resulting binary +// somewhat, and may also be useful for security reasons. + +extern const char kStrippedFlagHelp[]; + +#if defined(STRIP_FLAG_HELP) && STRIP_FLAG_HELP > 0 +// Need this construct to avoid the 'defined but not used' warning. +#define MAYBE_STRIPPED_HELP(txt) (false ? (txt) : kStrippedFlagHelp) +#else +#define MAYBE_STRIPPED_HELP(txt) txt +#endif + +// Each command-line flag has two variables associated with it: one +// with the current value, and one with the default value. However, +// we have a third variable, which is where value is assigned; it's a +// constant. This guarantees that FLAG_##value is initialized at +// static initialization time (e.g. before program-start) rather than +// than global construction time (which is after program-start but +// before main), at least when 'value' is a compile-time constant. We +// use a small trick for the "default value" variable, and call it +// FLAGS_no<name>. This serves the second purpose of assuring a +// compile error if someone tries to define a flag named no<name> +// which is illegal (--foo and --nofoo both affect the "foo" flag). +#define DEFINE_VARIABLE(type, shorttype, name, value, help) \ + namespace fL##shorttype { \ + static const type FLAGS_nono##name = value; \ + type FLAGS_##name = FLAGS_nono##name; \ + type FLAGS_no##name = FLAGS_nono##name; \ + static ::google::FlagRegisterer o_##name( \ + #name, #type, MAYBE_STRIPPED_HELP(help), __FILE__, \ + &FLAGS_##name, &FLAGS_no##name); \ + } \ + using fL##shorttype::FLAGS_##name + +#define DECLARE_VARIABLE(type, shorttype, name) \ + namespace fL##shorttype { \ + extern type FLAGS_##name; \ + } \ + using fL##shorttype::FLAGS_##name + +// For boolean flags, we want to do the extra check that the passed-in +// value is actually a bool, and not a string or something that can be +// coerced to a bool. These declarations (no definition needed!) will +// help us do that, and never evaluate from, which is important. +// We'll use 'sizeof(IsBool(val))' to distinguish. +namespace fLB { +template<typename From> double IsBoolFlag(const From& from); +bool IsBoolFlag(bool from); +} +extern bool FlagsTypeWarn(const char *name); + +#define DECLARE_bool(name) DECLARE_VARIABLE(bool,B, name) +// We have extra code here to make sure 'val' is actually a boolean. +#define DEFINE_bool(name,val,txt) namespace fLB { \ + const bool FLAGS_nonono##name = \ + (sizeof(::google::fLB::IsBoolFlag(val)) \ + == sizeof(double)) \ + ? ::google::FlagsTypeWarn(#name) : true; \ + } \ + DEFINE_VARIABLE(bool,B, name, val, txt) +#define DECLARE_int32(name) DECLARE_VARIABLE(::google::int32,I, name) +#define DEFINE_int32(name,val,txt) DEFINE_VARIABLE(::google::int32,I, name, val, txt) + +#define DECLARE_int64(name) DECLARE_VARIABLE(::google::int64,I64, name) +#define DEFINE_int64(name,val,txt) DEFINE_VARIABLE(::google::int64,I64, name, val, txt) + +#define DECLARE_uint64(name) DECLARE_VARIABLE(::google::uint64,U64, name) +#define DEFINE_uint64(name,val,txt) DEFINE_VARIABLE(::google::uint64,U64, name, val, txt) + +#define DECLARE_double(name) DECLARE_VARIABLE(double,D, name) +#define DEFINE_double(name,val,txt) DEFINE_VARIABLE(double,D, name, val, txt) + +// Strings are trickier, because they're not a POD, so we can't +// construct them at static-initialization time (instead they get +// constructed at global-constructor time, which is much later). To +// try to avoid crashes in that case, we use a char buffer to store +// the string, which we can static-initialize, and then placement-new +// into it later. It's not perfect, but the best we can do. +#define DECLARE_string(name) namespace fLS { extern string& FLAGS_##name; } \ + using fLS::FLAGS_##name + +// We need to define a var named FLAGS_no##name so people don't define +// --string and --nostring. And we need a temporary place to put val +// so we don't have to evaluate it twice. Two great needs that go +// great together! +#define DEFINE_string(name, val, txt) \ + namespace fLS { \ + static union { void* align; char s[sizeof(std::string)]; } s_##name[2]; \ + const string* const FLAGS_no##name = new (s_##name[0].s) std::string(val); \ + static ::google::FlagRegisterer o_##name( \ + #name, "string", MAYBE_STRIPPED_HELP(txt), __FILE__, \ + s_##name[0].s, new (s_##name[1].s) std::string(*FLAGS_no##name)); \ + std::string& FLAGS_##name = *(reinterpret_cast<std::string*>(s_##name[0].s)); \ + } \ + using fLS::FLAGS_##name + +#endif // SWIG + +} // namespace google + +#endif // GOOGLE_GFLAGS_H_ diff --git a/src/gflags_reporting.cc b/src/gflags_reporting.cc new file mode 100644 index 0000000..622496d --- /dev/null +++ b/src/gflags_reporting.cc @@ -0,0 +1,417 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ray Sidney +// Revamped and reorganized by Craig Silverstein +// +// This file contains code for handling the 'reporting' flags. These +// are flags that, when present, cause the program to report some +// information and then exit. --help and --version are the canonical +// reporting flags, but we also have flags like --helpxml, etc. +// +// There's only one function that's meant to be called externally: +// HandleCommandLineHelpFlags(). (Well, actually, ShowUsageWithFlags(), +// ShowUsageWithFlagsRestrict(), and DescribeOneFlag() can be called +// externally too, but there's little need for it.) These are all +// declared in the main commandlineflags.h header file. +// +// HandleCommandLineHelpFlags() will check what 'reporting' flags have +// been defined, if any -- the "help" part of the function name is a +// bit misleading -- and do the relevant reporting. It should be +// called after all flag-values have been assigned, that is, after +// parsing the command-line. + +#include "config.h" +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <assert.h> +#include <string> +#include <vector> +#include "gflags/gflags.h" + +#ifndef PATH_SEPARATOR +#define PATH_SEPARATOR '/' +#endif + +using std::string; +using std::vector; + +// The 'reporting' flags. They all call exit(). +DEFINE_bool(help, false, + "show help on all flags [tip: all flags can have two dashes]"); +DEFINE_bool(helpfull, false, + "show help on all flags -- same as -help"); +DEFINE_bool(helpshort, false, + "show help on only the main module for this program"); +DEFINE_string(helpon, "", + "show help on the modules named by this flag value"); +DEFINE_string(helpmatch, "", + "show help on modules whose name contains the specified substr"); +DEFINE_bool(helppackage, false, + "show help on all modules in the main package"); +DEFINE_bool(helpxml, false, + "produce an xml version of help"); +DEFINE_bool(version, false, + "show version and build info and exit"); + +namespace google { + +// -------------------------------------------------------------------- +// DescribeOneFlag() +// DescribeOneFlagInXML() +// Routines that pretty-print info about a flag. These use +// a CommandLineFlagInfo, which is the way the commandlineflags +// API exposes static info about a flag. +// -------------------------------------------------------------------- + +static const int kLineLength = 80; + +static void AddString(const string& s, + string* final_string, int* chars_in_line) { + const int slen = static_cast<int>(s.length()); + if (*chars_in_line + 1 + slen >= kLineLength) { // < 80 chars/line + *final_string += "\n "; + *chars_in_line = 6; + } else { + *final_string += " "; + *chars_in_line += 1; + } + *final_string += s; + *chars_in_line += slen; +} + +// Create a descriptive string for a flag. +// Goes to some trouble to make pretty line breaks. +string DescribeOneFlag(const CommandLineFlagInfo& flag) { + string main_part = (string(" -") + flag.name + + " (" + flag.description + ')'); + const char* c_string = main_part.c_str(); + int chars_left = static_cast<int>(main_part.length()); + string final_string = ""; + int chars_in_line = 0; // how many chars in current line so far? + while (1) { + assert(chars_left == strlen(c_string)); // Unless there's a \0 in there? + const char* newline = strchr(c_string, '\n'); + if (newline == NULL && chars_in_line+chars_left < kLineLength) { + // The whole remainder of the string fits on this line + final_string += c_string; + chars_in_line += chars_left; + break; + } + if (newline != NULL && newline - c_string < kLineLength - chars_in_line) { + int n = static_cast<int>(newline - c_string); + final_string.append(c_string, n); + chars_left -= n + 1; + c_string += n + 1; + } else { + // Find the last whitespace on this 80-char line + int whitespace = kLineLength-chars_in_line-1; // < 80 chars/line + while ( whitespace > 0 && !isspace(c_string[whitespace]) ) { + --whitespace; + } + if (whitespace <= 0) { + // Couldn't find any whitespace to make a line break. Just dump the + // rest out! + final_string += c_string; + chars_in_line = kLineLength; // next part gets its own line for sure! + break; + } + final_string += string(c_string, whitespace); + chars_in_line += whitespace; + while (isspace(c_string[whitespace])) ++whitespace; + c_string += whitespace; + chars_left -= whitespace; + } + if (*c_string == '\0') + break; + final_string += "\n "; + chars_in_line = 6; + } + + // Append data type + AddString(string("type: ") + flag.type, &final_string, &chars_in_line); + // Append the effective default value (i.e., the value that the flag + // will have after the command line is parsed if the flag is not + // specified on the command line), which may be different from the + // stored default value. This would happen if the value of the flag + // was modified before the command line was parsed. (Unless the + // value was modified using SetCommandLineOptionWithMode() with mode + // SET_FLAGS_DEFAULT.) + // Note that we are assuming this code is being executed because a help + // request was just parsed from the command line, in which case the + // printed value is indeed the effective default, as long as no value + // for the flag was parsed from the command line before "--help". + if (strcmp(flag.type.c_str(), "string") == 0) { // add quotes for strings + AddString(string("default: \"") + flag.current_value + string("\""), + &final_string, &chars_in_line); + } else { + AddString(string("default: ") + flag.current_value, + &final_string, &chars_in_line); + } + + final_string += '\n'; + return final_string; +} + +// Simple routine to xml-escape a string: escape & and < only. +static string XMLText(const string& txt) { + string ans = txt; + for (string::size_type pos = 0; (pos = ans.find("&", pos)) != string::npos; ) + ans.replace(pos++, 1, "&"); + for (string::size_type pos = 0; (pos = ans.find("<", pos)) != string::npos; ) + ans.replace(pos++, 1, "<"); + return ans; +} + +static string DescribeOneFlagInXML(const CommandLineFlagInfo& flag) { + // The file and flagname could have been attributes, but default + // and meaning need to avoid attribute normalization. This way it + // can be parsed by simple programs, in addition to xml parsers. + return (string("<flag>") + + "<file>" + XMLText(flag.filename) + "</file>" + + "<name>" + XMLText(flag.name) + "</name>" + + "<meaning>" + XMLText(flag.description) + "</meaning>" + + "<default>" + XMLText(flag.default_value) + "</default>" + + "<type>" + XMLText(flag.type) + "</type>" + + string("</flag>")); +} + +// -------------------------------------------------------------------- +// ShowUsageWithFlags() +// ShowUsageWithFlagsRestrict() +// ShowXMLOfFlags() +// These routines variously expose the registry's list of flag +// values. ShowUsage*() prints the flag-value information +// to stdout in a user-readable format (that's what --help uses). +// The Restrict() version limits what flags are shown. +// ShowXMLOfFlags() prints the flag-value information to stdout +// in a machine-readable format. In all cases, the flags are +// sorted: first by filename they are defined in, then by flagname. +// -------------------------------------------------------------------- + +static const char* Basename(const char* filename) { + const char* sep = strrchr(filename, PATH_SEPARATOR); + return sep ? sep + 1 : filename; +} + +static string Dirname(const string& filename) { + string::size_type sep = filename.rfind(PATH_SEPARATOR); + return filename.substr(0, (sep == string::npos) ? 0 : sep); +} + +// Test whether a filename contains at least one of the substrings. +static bool FileMatchesSubstring(const string& filename, + const vector<string>& substrings) { + for (vector<string>::const_iterator target = substrings.begin(); + target != substrings.end(); + ++target) { + if (strstr(filename.c_str(), target->c_str()) != NULL) { + return true; + } + } + return false; +} + +// Show help for every filename which matches any of the target substrings. +// If substrings is empty, shows help for every file. If a flag's help message +// has been stripped (e.g. by adding '#define STRIP_FLAG_HELP 1' before +// including gflags/gflags.h), then this flag will not be displayed by +// '--help' and its variants. +static void ShowUsageWithFlagsMatching(const char *argv0, + const vector<string> &substrings) { + fprintf(stdout, "%s: %s\n", Basename(argv0), ProgramUsage()); + + vector<CommandLineFlagInfo> flags; + GetAllFlags(&flags); // flags are sorted by filename, then flagname + + string last_filename; // so we know when we're at a new file + bool first_directory = true; // controls blank lines between dirs + bool found_match = false; // stays false iff no dir matches restrict + for (vector<CommandLineFlagInfo>::const_iterator flag = flags.begin(); + flag != flags.end(); + ++flag) { + if (substrings.empty() || + FileMatchesSubstring(flag->filename, substrings)) { + // If the flag has been stripped, pretend that it doesn't exist. + if (flag->description == kStrippedFlagHelp) continue; + found_match = true; // this flag passed the match! + if (flag->filename != last_filename) { // new file + if (Dirname(flag->filename) != Dirname(last_filename)) { // new dir! + if (!first_directory) + fprintf(stdout, "\n\n"); // put blank lines between directories + first_directory = false; + } + fprintf(stdout, "\n Flags from %s:\n", flag->filename.c_str()); + last_filename = flag->filename; + } + // Now print this flag + fprintf(stdout, "%s", DescribeOneFlag(*flag).c_str()); + } + } + if (!found_match && !substrings.empty()) { + fprintf(stdout, "\n No modules matched: use -help\n"); + } +} + +void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict) { + vector<string> substrings; + if (restrict != NULL && *restrict != '\0') { + substrings.push_back(restrict); + } + ShowUsageWithFlagsMatching(argv0, substrings); +} + +void ShowUsageWithFlags(const char *argv0) { + ShowUsageWithFlagsRestrict(argv0, ""); +} + +// Convert the help, program, and usage to xml. +static void ShowXMLOfFlags(const char *prog_name) { + vector<CommandLineFlagInfo> flags; + GetAllFlags(&flags); // flags are sorted: by filename, then flagname + + // XML. There is no corresponding schema yet + fprintf(stdout, "<?xml version=\"1.0\"?>\n"); + // The document + fprintf(stdout, "<AllFlags>\n"); + // the program name and usage + fprintf(stdout, "<program>%s</program>\n", + XMLText(Basename(prog_name)).c_str()); + fprintf(stdout, "<usage>%s</usage>\n", + XMLText(ProgramUsage()).c_str()); + // All the flags + for (vector<CommandLineFlagInfo>::const_iterator flag = flags.begin(); + flag != flags.end(); + ++flag) { + if (flag->description != kStrippedFlagHelp) + fprintf(stdout, "%s\n", DescribeOneFlagInXML(*flag).c_str()); + } + // The end of the document + fprintf(stdout, "</AllFlags>\n"); +} + +// -------------------------------------------------------------------- +// ShowVersion() +// Called upon --version. Prints build-related info. +// -------------------------------------------------------------------- + +static void ShowVersion() { + fprintf(stdout, "%s\n", ProgramInvocationShortName()); + // TODO: add other stuff, like a timestamp, who built it, what + // target they built, etc. + +# if !defined(NDEBUG) + fprintf(stdout, "Debug build (NDEBUG not #defined)\n"); +# endif +} + +// -------------------------------------------------------------------- +// HandleCommandLineHelpFlags() +// Checks all the 'reporting' commandline flags to see if any +// have been set. If so, handles them appropriately. Note +// that all of them, by definition, cause the program to exit +// if they trigger. +// -------------------------------------------------------------------- + +void HandleCommandLineHelpFlags() { + const char* progname = ProgramInvocationShortName(); + extern void (*commandlineflags_exitfunc)(int); // in gflags.cc + + if (FLAGS_helpshort) { + // show only flags related to this binary: + // E.g. for fileutil.cc, want flags containing ... "/fileutil." cc + vector<string> substrings; + substrings.push_back(string("/") + progname + "."); + substrings.push_back(string("/") + progname + "-main."); + substrings.push_back(string("/") + progname + "_main."); + ShowUsageWithFlagsMatching(progname, substrings); + commandlineflags_exitfunc(1); // almost certainly exit() + + } else if (FLAGS_help || FLAGS_helpfull) { + // show all options + ShowUsageWithFlagsRestrict(progname, ""); // empty restrict + commandlineflags_exitfunc(1); + + } else if (!FLAGS_helpon.empty()) { + string restrict = "/" + FLAGS_helpon + "."; + ShowUsageWithFlagsRestrict(progname, restrict.c_str()); + commandlineflags_exitfunc(1); + + } else if (!FLAGS_helpmatch.empty()) { + ShowUsageWithFlagsRestrict(progname, FLAGS_helpmatch.c_str()); + commandlineflags_exitfunc(1); + + } else if (FLAGS_helppackage) { + // Shows help for all files in the same directory as main(). We + // don't want to resort to looking at dirname(progname), because + // the user can pick progname, and it may not relate to the file + // where main() resides. So instead, we search the flags for a + // filename like "/progname.cc", and take the dirname of that. + vector<CommandLineFlagInfo> flags; + GetAllFlags(&flags); + vector<string> substrings; + substrings.push_back(string("/") + progname + "."); + substrings.push_back(string("/") + progname + "-main."); + substrings.push_back(string("/") + progname + "_main."); + string last_package; + for (vector<CommandLineFlagInfo>::const_iterator flag = flags.begin(); + flag != flags.end(); + ++flag) { + if (!FileMatchesSubstring(flag->filename, substrings)) + continue; + const string package = Dirname(flag->filename) + "/"; + if (package != last_package) { + ShowUsageWithFlagsRestrict(progname, package.c_str()); + if (!last_package.empty()) { // means this isn't our first pkg + fprintf(stderr, "WARNING: Multiple packages contain a file=%s\n", + progname); + } + last_package = package; + } + } + if (last_package.empty()) { // never found a package to print + fprintf(stderr, "WARNING: Unable to find a package for file=%s\n", + progname); + } + commandlineflags_exitfunc(1); + + } else if (FLAGS_helpxml) { + ShowXMLOfFlags(progname); + commandlineflags_exitfunc(1); + + } else if (FLAGS_version) { + ShowVersion(); + // Unlike help, we may be asking for version in a script, so return 0 + commandlineflags_exitfunc(0); + } +} + +} // namespace google diff --git a/src/google/output_string.h b/src/google/output_string.h new file mode 100644 index 0000000..8554e5e --- /dev/null +++ b/src/google/output_string.h @@ -0,0 +1,109 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_OUTPUT_STRING_H_ +#define OPEN_VCDIFF_OUTPUT_STRING_H_ + +#include <cstddef> // size_t + +namespace open_vcdiff { + +// This interface allows clients of VCDiff[Streaming]Encoder and +// VCDiff[Streaming]Decoder to use different string types to receive the output +// of those interfaces. +// +// Only the following operations can be performed on an output string, and their +// semantics must be identical to the std::string methods of the same names: +// append() +// clear() +// push_back() +// size() +// +// The versions of these methods that take a std::string argument are not +// supported by OutputStringInterface. +// +// There is one additional operation that can be performed on an output string: +// ReserveAdditionalBytes(). This asks the underlying output type to reserve +// enough capacity for the number of additional bytes requested in addition to +// existing content. The decoder knows the total expected output size in +// advance, so one large ReserveAdditionalBytes() operation precedes many small +// append() operations. For output types that gain no advantage from knowing in +// advance how many bytes will be appended, ReserveAdditionalBytes() can be +// defined to do nothing. +class OutputStringInterface { + public: + virtual ~OutputStringInterface() { } + + virtual OutputStringInterface& append(const char* s, size_t n) = 0; + + virtual void clear() = 0; + + virtual void push_back(char c) = 0; + + virtual void ReserveAdditionalBytes(size_t res_arg) = 0; + + virtual size_t size() const = 0; +}; + +// This template can be used to wrap any class that supports the operations +// needed by OutputStringInterface, including std::string. A class that has +// different names or syntax for these operations will need specialized +// definitions of OutputString methods -- see output_string_types.h for some +// examples of how to do this. +template<class StringClass> +class OutputString : public OutputStringInterface { + public: + explicit OutputString(StringClass* impl) : impl_(impl) { } + + virtual ~OutputString() { } + + virtual OutputString& append(const char* s, size_t n) { + impl_->append(s, n); + return *this; + } + + virtual void clear() { + impl_->clear(); + } + + virtual void push_back(char c) { + impl_->push_back(c); + } + + virtual void ReserveAdditionalBytes(size_t res_arg) { + impl_->reserve(impl_->size() + res_arg); + } + + virtual size_t size() const { + return impl_->size(); + } + + protected: + StringClass* impl_; + + private: + // Making these private avoids implicit copy constructor & assignment operator + OutputString(const OutputString&); + void operator=(const OutputString&); +}; + +// Don't allow the OutputString template to be based upon a pointer to +// OutputStringInterface. Enforce this restriction by defining this class to +// lack any functions expected of an OutputString. +template<> class OutputString<OutputStringInterface> { }; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_OUTPUT_STRING_H_ diff --git a/src/google/vcdecoder.h b/src/google/vcdecoder.h new file mode 100644 index 0000000..d5aa741 --- /dev/null +++ b/src/google/vcdecoder.h @@ -0,0 +1,189 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_VCDECODER_H_ +#define OPEN_VCDIFF_VCDECODER_H_ + +#include <cstddef> // size_t +#include <string> +#include "google/output_string.h" + +namespace open_vcdiff { + +using std::string; + +class VCDiffStreamingDecoderImpl; + +// A streaming decoder class. Takes a dictionary (source) file and a delta +// file, and produces the original target file. It is intended to process +// the partial contents of the delta file as they arrive, in "chunks". +// As soon as a chunk of bytes is received from a file read or from a network +// transmission, it can be passed to DecodeChunk(), which will then output +// as much of the target file as it can. +// +// The client should use this class as follows: +// VCDiffStreamingDecoder v; +// v.StartDecoding(dictionary_ptr, dictionary_size); +// while (any data left) { +// if (!v.DecodeChunk(data, len, &output_string)) { +// handle error; +// break; +// } +// process(output_string); // might have no new data, though +// } +// if (!v.FinishDecoding()) { ... handle error ... } +// +// I.e., the allowed pattern of calls is +// StartDecoding DecodeChunk* FinishDecoding +// +// NOTE: It is not necessary to call FinishDecoding if DecodeChunk +// returns false. When DecodeChunk returns false to signal an +// error, it resets its state and is ready for a new StartDecoding. +// If FinishDecoding is called, it will also return false. +// +class VCDiffStreamingDecoder { + public: + VCDiffStreamingDecoder(); + ~VCDiffStreamingDecoder(); + + // Resets the dictionary contents to "dictionary_ptr[0,dictionary_size-1]" + // and sets up the data structures for decoding. Note that the dictionary + // contents are not copied, and the client is responsible for ensuring that + // dictionary_ptr is valid until FinishDecoding is called. + // + void StartDecoding(const char* dictionary_ptr, size_t dictionary_size); + + // Accepts "data[0,len-1]" as additional data received in the + // compressed stream. If any chunks of data can be fully decoded, + // they are appended to output_string. + // + // Returns true on success, and false if the data was malformed + // or if there was an error in decoding it (e.g. out of memory, etc.). + // + // Note: we *append*, so the old contents of output_string stick around. + // This convention differs from the non-streaming Encode/Decode + // interfaces in VCDiffDecoder. + // + // output_string is guaranteed to be resized no more than once for each + // window in the VCDIFF delta file. This rule is irrespective + // of the number of calls to DecodeChunk(). + // + template<class OutputType> + bool DecodeChunk(const char* data, size_t len, OutputType* output) { + OutputString<OutputType> output_string(output); + return DecodeChunkToInterface(data, len, &output_string); + } + + bool DecodeChunkToInterface(const char* data, size_t len, + OutputStringInterface* output_string); + + // Finishes decoding after all data has been received. Returns true + // if decoding of the entire stream was successful. FinishDecoding() + // must be called for the current target before StartDecoding() can be + // called for a different target. + // + bool FinishDecoding(); + + // The decoder can create a version of the output target string with XML tags + // added to indicate where each section of the decoded text came from. This + // can assist in debugging the decoder and/or determining the effectiveness of + // a particular dictionary. The following XML tags will be added. Despite + // the formatting of this example, newlines will not be added between tags. + // <dmatch>This text matched with the dictionary</dmatch> + // <bmatch>This text matched earlier target output</bmatch> + // <literal>This text found no match</literal> + // + // Calling EnableAnnotatedOutput() will enable this feature. The interface + // GetAnnotatedOutput() can be used to retrieve the annotated text. It is + // recommended to use this feature only when the target data consists of HTML + // or other human-readable text. + + // Enables the annotated output feature. After this method is called, new + // target windows added to output_string by DecodeChunk() will also be added + // to the annotated output, and can be retrieved using GetAnnotatedOutput(). + // If annotated output is already enabled, this function has no effect. + void EnableAnnotatedOutput(); + + // Disables the annotated output feature. After calling this method, + // GetAnnotatedOutput() will produce an empty string until + // EnableAnnotatedOutput() is called again. + void DisableAnnotatedOutput(); + + // Replaces annotated_output with a copy of the annotated output string. + // Annotated output collection begins when EnableAnnotatedOutput() is called. + // The annotated output will be cleared each time StartDecoding() is called, + // but not when FinishDecoding() is called. + template<class OutputType> + void GetAnnotatedOutput(OutputType* annotated_output) { + OutputString<OutputType> output_string(annotated_output); + GetAnnotatedOutputToInterface(&output_string); + } + + void GetAnnotatedOutputToInterface(OutputStringInterface* annotated_output); + + private: + VCDiffStreamingDecoderImpl* const impl_; + + // Make the copy constructor and assignment operator private + // so that they don't inadvertently get used. + explicit VCDiffStreamingDecoder(const VCDiffStreamingDecoder&); + void operator=(const VCDiffStreamingDecoder&); +}; + +// A simpler (non-streaming) interface to the VCDIFF decoder that can be used +// if the entire delta file is available. +// +class VCDiffDecoder { + public: + VCDiffDecoder() { } + ~VCDiffDecoder() { } + + /***** Simple interface *****/ + + // Replaces old contents of "*target" with the result of decoding + // the bytes found in "encoding." + // + // Returns true if "encoding" was a well-formed sequence of + // instructions, and returns false if not. + // + template<class OutputType> + bool Decode(const char* dictionary_ptr, + size_t dictionary_size, + const string& encoding, + OutputType* target) { + OutputString<OutputType> output_string(target); + return DecodeToInterface(dictionary_ptr, + dictionary_size, + encoding, + &output_string); + } + + private: + bool DecodeToInterface(const char* dictionary_ptr, + size_t dictionary_size, + const string& encoding, + OutputStringInterface* target); + + VCDiffStreamingDecoder decoder_; + + // Make the copy constructor and assignment operator private + // so that they don't inadvertently get used. + explicit VCDiffDecoder(const VCDiffDecoder&); + void operator=(const VCDiffDecoder&); +}; + +}; // namespace open_vcdiff + +#endif // OPEN_VCDIFF_VCDECODER_H_ diff --git a/src/google/vcencoder.h b/src/google/vcencoder.h new file mode 100644 index 0000000..6faff76 --- /dev/null +++ b/src/google/vcencoder.h @@ -0,0 +1,287 @@ +// Copyright 2007 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_VCENCODER_H_ +#define OPEN_VCDIFF_VCENCODER_H_ + +#include <cstddef> // size_t +#include <vector> +#include "google/output_string.h" + +namespace open_vcdiff { + +class VCDiffEngine; +class VCDiffStreamingEncoderImpl; + +// These flags are passed to the constructor of VCDiffStreamingEncoder +// to determine whether certain open-vcdiff format extensions +// (which are not part of the RFC 3284 draft standard for VCDIFF) +// are employed. +// +// Because these extensions are not part of the VCDIFF standard, if +// any of these flags except VCD_STANDARD_FORMAT is specified, then the caller +// must be certain that the receiver of the data will be using open-vcdiff +// to decode the delta file, or at least that the receiver can interpret +// these extensions. The encoder will use an 'S' as the fourth character +// in the delta file to indicate that non-standard extensions are being used. +// +enum VCDiffFormatExtensionFlagValues { + // No extensions: the encoded format will conform to the RFC + // draft standard for VCDIFF. + VCD_STANDARD_FORMAT = 0x00, + // If this flag is specified, then the encoder writes each delta file + // window by interleaving instructions and sizes with their corresponding + // addresses and data, rather than placing these elements + // into three separate sections. This facilitates providing partially + // decoded results when only a portion of a delta file window is received + // (e.g. when HTTP over TCP is used as the transmission protocol.) + VCD_FORMAT_INTERLEAVED = 0x01, + // If this flag is specified, then an Adler32 checksum + // of the target window data is included in the delta window. + VCD_FORMAT_CHECKSUM = 0x02 +}; + +typedef int VCDiffFormatExtensionFlags; + +// A HashedDictionary must be constructed from the dictionary data +// in order to use VCDiffStreamingEncoder. If the same dictionary will +// be used to perform several encoding operations, then the caller should +// create the HashedDictionary once and cache it for reuse. This object +// is thread-safe: the same const HashedDictionary can be used +// by several threads simultaneously, each with its own VCDiffStreamingEncoder. +// +// dictionary_contents is copied into the HashedDictionary, so the +// caller may free that string, if desired, after the constructor returns. +// +class HashedDictionary { + public: + HashedDictionary(const char* dictionary_contents, + size_t dictionary_size); + ~HashedDictionary(); + + // Init() must be called before using the HashedDictionary as an argument + // to the VCDiffStreamingEncoder, or for any other purpose except + // destruction. It returns true if initialization succeeded, or false + // if an error occurred, in which case the caller should destroy the object + // without using it. + bool Init(); + + const VCDiffEngine* engine() const { return engine_; } + + private: + const VCDiffEngine* engine_; +}; + +// The standard streaming interface to the VCDIFF (RFC 3284) encoder. +// "Streaming" in this context means that, even though the entire set of +// input data to be encoded may not be available at once, the encoder +// can produce partial output based on what is available. Of course, +// the caller should try to maximize the sizes of the data chunks passed +// to the encoder. +class VCDiffStreamingEncoder { + public: + // The HashedDictionary object passed to the constructor must remain valid, + // without being deleted, for the lifetime of the VCDiffStreamingEncoder + // object. + // + // format_extensions allows certain open-vcdiff extensions to the VCDIFF + // format to be included in the encoded output. These extensions are not + // part of the RFC 3284 draft standard, so specifying any extension flags + // will make the output compatible only with open-vcdiff, or with other + // VCDIFF implementations that accept these extensions. See above for an + // explanation of each possible flag value. + // + // *** look_for_target_matches: + // The VCDIFF format allows COPY instruction addresses to reference data from + // the source (dictionary), or from previously encoded target data. + // + // If look_for_target_matches is false, then the encoder will only + // produce COPY instructions that reference source data from the dictionary, + // never from previously encoded target data. This will speed up the encoding + // process, but the encoded data will not be as compact. + // + // If this value is true, then the encoder will produce COPY instructions + // that reference either source data or target data. A COPY instruction from + // the previously encoded target data may even extend into the range of the + // data being produced by that same COPY instruction; for example, if the + // previously encoded target data is "LA", then a single COPY instruction of + // length 10 can produce the additional target data "LALALALALA". + // + // There is a third type of COPY instruction that starts within + // the source data and extends from the end of the source data + // into the beginning of the target data. This VCDIFF encoder will never + // produce a COPY instruction of this third type (regardless of the value of + // look_for_target_matches) because the cost of checking for matches + // across the source-target boundary would not justify its benefits. + // + VCDiffStreamingEncoder(const HashedDictionary* dictionary, + VCDiffFormatExtensionFlags format_extensions, + bool look_for_target_matches); + ~VCDiffStreamingEncoder(); + + // The client should use these routines as follows: + // HashedDictionary hd(dictionary, dictionary_size); + // if (!hd.Init()) { + // HandleError(); + // return; + // } + // string output_string; + // VCDiffStreamingEncoder v(hd, false, false); + // if (!v.StartEncoding(&output_string)) { + // HandleError(); + // return; // No need to call FinishEncoding() + // } + // Process(output_string.data(), output_string.size()); + // output_string.clear(); + // while (get data_buf) { + // if (!v.EncodeChunk(data_buf, data_len, &output_string)) { + // HandleError(); + // return; // No need to call FinishEncoding() + // } + // // The encoding is appended to output_string at each call, + // // so clear output_string once its contents have been processed. + // Process(output_string.data(), output_string.size()); + // output_string.clear(); + // } + // if (!v.FinishEncoding(&output_string)) { + // HandleError(); + // return; + // } + // Process(output_string.data(), output_string.size()); + // output_string.clear(); + // + // I.e., the allowed pattern of calls is + // StartEncoding EncodeChunk* FinishEncoding + // + // The size of the encoded output depends on the sizes of the chunks + // passed in (i.e. the chunking boundary affects compression). + // However the decoded output is independent of chunk boundaries. + + // Sets up the data structures for encoding. + // Writes a VCDIFF delta file header (as defined in RFC section 4.1) + // to *output_string. + // + // Note: we *append*, so the old contents of *output_string stick around. + // This convention differs from the non-streaming Encode/Decode + // interfaces in VCDiffEncoder. + // + // If an error occurs, this function returns false; otherwise it returns true. + // If this function returns false, the caller does not need to call + // FinishEncoding or to do any cleanup except destroying the + // VCDiffStreamingEncoder object. + template<class OutputType> + bool StartEncoding(OutputType* output) { + OutputString<OutputType> output_string(output); + return StartEncodingToInterface(&output_string); + } + + bool StartEncodingToInterface(OutputStringInterface* output_string); + + // Appends compressed encoding for "data" (one complete VCDIFF delta window) + // to *output_string. + // If an error occurs (for example, if StartEncoding was not called + // earlier or StartEncoding returned false), this function returns false; + // otherwise it returns true. The caller does not need to call FinishEncoding + // or do any cleanup except destroying the VCDiffStreamingEncoder + // if this function returns false. + template<class OutputType> + bool EncodeChunk(const char* data, size_t len, OutputType* output) { + OutputString<OutputType> output_string(output); + return EncodeChunkToInterface(data, len, &output_string); + } + + bool EncodeChunkToInterface(const char* data, size_t len, + OutputStringInterface* output_string); + + // Finishes encoding and appends any leftover encoded data to *output_string. + // If an error occurs (for example, if StartEncoding was not called + // earlier or StartEncoding returned false), this function returns false; + // otherwise it returns true. The caller does not need to + // do any cleanup except destroying the VCDiffStreamingEncoder + // if this function returns false. + template<class OutputType> + bool FinishEncoding(OutputType* output) { + OutputString<OutputType> output_string(output); + return FinishEncodingToInterface(&output_string); + } + + bool FinishEncodingToInterface(OutputStringInterface* output_string); + + // Replaces the contents of match_counts with a vector of integers, + // one for each possible match length. The value of match_counts[n] + // is equal to the number of matches of length n found so far + // for this VCDiffStreamingEncoder object. + void GetMatchCounts(std::vector<int>* match_counts) const; + + private: + VCDiffStreamingEncoderImpl* const impl_; + + // Make the copy constructor and assignment operator private + // so that they don't inadvertently get used. + VCDiffStreamingEncoder(const VCDiffStreamingEncoder&); // NOLINT + void operator=(const VCDiffStreamingEncoder&); +}; + +// A simpler (non-streaming) interface to the VCDIFF encoder that can be used +// if the entire target data string is available. +// +class VCDiffEncoder { + public: + VCDiffEncoder(const char* dictionary_contents, size_t dictionary_size) + : dictionary_(dictionary_contents, dictionary_size), + encoder_(NULL), + flags_(VCD_STANDARD_FORMAT) { } + + ~VCDiffEncoder() { + delete encoder_; + } + + // By default, VCDiffEncoder uses standard VCDIFF format. This function + // can be used before calling Encode(), to specify that interleaved format + // and/or checksum format should be used. + void SetFormatFlags(VCDiffFormatExtensionFlags flags) { flags_ = flags; } + + // Replaces old contents of output_string with the encoded form of + // target_data. + template<class OutputType> + bool Encode(const char* target_data, + size_t target_len, + OutputType* output) { + OutputString<OutputType> output_string(output); + return EncodeToInterface(target_data, target_len, &output_string); + } + + private: + // Always look for matches in both source and target. This default value + // can be changed in this code if desired. + static const bool look_for_target_matches_ = true; + + bool EncodeToInterface(const char* target_data, + size_t target_len, + OutputStringInterface* output_string); + + HashedDictionary dictionary_; + VCDiffStreamingEncoder* encoder_; + VCDiffFormatExtensionFlags flags_; + + // Make the copy constructor and assignment operator private + // so that they don't inadvertently get used. + VCDiffEncoder(const VCDiffEncoder&); // NOLINT + void operator=(const VCDiffEncoder&); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_VCENCODER_H_ diff --git a/src/gtest/README b/src/gtest/README new file mode 100644 index 0000000..e684823 --- /dev/null +++ b/src/gtest/README @@ -0,0 +1,157 @@ +*** NOTE: The files in the open-vcdiff/src/gtest directory are only a subset of +*** the full Google Test package. If you want to use Google Test with a +*** project other than open-vcdiff, please do not use this bundled copy. +*** Instead, please download the latest version of Google Test from: +*** http://code.google.com/p/googletest/ + +Google C++ Testing Framework +============================ +http://code.google.com/p/googletest/ + +Overview +-------- +Google's framework for writing C++ tests on a variety of platforms (Linux, Mac +OS X, Windows, Windows CE, and Symbian). Based on the xUnit architecture. +Supports automatic test discovery, a rich set of assertions, user-defined +assertions, death tests, fatal and non-fatal failures, various options for +running the tests, and XML test report generation. + +Please see the project page above for more information as well as mailing lists +for questions, discussions, and development. There is also an IRC channel on +OFTC (irc.oftc.net) #gtest available. Please join us! + +Requirements +------------ +Google Test is designed to have fairly minimal requirements to build and use +with your projects, but there are some. Currently, the only Operating System +(OS) on which Google Test is known to build properly is Linux, but we are +actively working on Windows and Mac support as well. The source code itself is +already portable across many other platforms, but we are still developing +robust build systems for each. + +### Linux Requirements ### +These are the base requirements to build and use Google Test from a source +package (as described below): + * GNU-compatible Make or "gmake" + * POSIX-standard shell + * POSIX(-2) Regular Expressions (regex.h) + * A C++98 standards compliant compiler + +Furthermore, if you are building Google Test from a VCS Checkout (also +described below), there are further requirements: + * Automake version 1.9 or newer + * Autoconf version 2.59 or newer + * Libtool / Libtoolize + * Python version 2.4 or newer + +### Windows Requirements ### + * Microsoft Visual Studio 7.1 or newer + +### Cygwin Requirements ### + * Cygwin 1.5.25-14 or newer + +### Mac OS X Requirements ### + * Mac OS X 10.4 Tiger or newer + +Getting the Source +------------------ +There are two primary ways of getting Google Test's source code: you can +download a source release in your preferred archive format, or directly check +out the source from a Version Control System (VCS, we use Google Code's +Subversion hosting). The VCS checkout requires a few extra steps and some extra +software packages on your system, but lets you track development, and make +patches to contribute much more easily, so we highly encourage it. + +### VCS Checkout: ### +The first step is to select whether you want to check out the main line of +development on Google Test, or one of the released branches. The former will be +much more active and have the latest features, but the latter provides much +more stability and predictability. Choose whichever fits your needs best, and +proceed with the following Subversion commands: + + $ svn checkout http://googletest.googlecode.com/svn/trunk/ gtest-svn + +or for a release version X.Y.*'s branch: + + $ svn checkout http://googletest.googlecode.com/svn/branches/release-X.Y/ gtest-X.Y-svn + +Next you will need to prepare the GNU Autotools build system, if you +are using Linux, Mac OS X, or Cygwin. Enter the target directory of +the checkout command you used ('gtest-svn' or 'gtest-X.Y-svn' above) +and proceed with the following commands: + + $ aclocal-1.9 # Where "1.9" must match the following automake command. + $ libtoolize -c # Use "glibtoolize -c" instead on Mac OS X. + $ autoheader + $ automake-1.9 -ac # See Automake version requirements above. + $ autoconf + +While this is a bit complicated, it will most often be automatically re-run by +your "make" invocations, so in practice you shouldn't need to worry too much. +Once you have completed these steps, you are ready to build the library. + +### Source Package: ### +Google Test is also released in source packages which can be downloaded from +its Google Code download page[1]. Several different archive formats are +provided, but the only difference is the tools used to manipulate them, and the +size of the resulting file. Download whichever you are most comfortable with. + + [1] Google Test Downloads: http://code.google.com/p/googletest/downloads/list + +Once downloaded expand the archive using whichever tools you prefer for that +type. This will always result in a new directory with the name "gtest-X.Y.Z" +which contains all of the source code. Here are some examples in Linux: + + $ tar -xvzf gtest-X.Y.Z.tar.gz + $ tar -xvjf gtest-X.Y.Z.tar.bz2 + $ unzip gtest-X.Y.Z.zip + +Building the Source +------------------- + +### Linux, Mac OS X, and Cygwin ### +There are two primary options for building the source at this point: build it +inside the source code tree, or in a separate directory. We recommend building +in a separate directory as that tends to produce both more consistent results +and be easier to clean up should anything go wrong, but both patterns are +supported. The only hard restriction is that while the build directory can be +a subdirectory of the source directory, the opposite is not possible and will +result in errors. Once you have selected where you wish to build Google Test, +create the directory if necessary, and enter it. The following steps apply for +either approach by simply substituting the shell variable SRCDIR with "." for +building inside the source directory, and the relative path to the source +directory otherwise. + + $ ${SRCDIR}/configure # Standard GNU configure script, --help for more info + $ make # Standard makefile following GNU conventions + $ make check # Builds and runs all tests - all should pass + +Other programs will only be able to use Google Test's functionality if you +install it in a location which they can access, in Linux this is typically +under '/usr/local'. The following command will install all of the Google Test +libraries, public headers, and utilities necessary for other programs and +libraries to leverage it: + + $ sudo make install # Not necessary, but allows use by other programs + +TODO(chandlerc@google.com): This section needs to be expanded when the +'gtest-config' script is finished and Autoconf macro's are provided (or not +provided) in order to properly reflect the process for other programs to +locate, include, and link against Google Test. + +Finally, should you need to remove Google Test from your system after having +installed it, run the following command, and it will back out its changes. +However, note carefully that you must run this command on the *same* Google +Test build that you ran the install from, or the results are not predictable. +If you install Google Test on your system, and are working from a VCS checkout, +make sure you run this *before* updating your checkout of the source in order +to uninstall the same version which you installed. + + $ sudo make uninstall # Must be run against the exact same build as "install" + +### Windows ### +Open the gtest.sln file in the msvc/ folder using Visual Studio, and +you are ready to build Google Test the same way you build any Visual +Studio project. + +Happy testing! diff --git a/src/gtest/gtest-death-test.cc b/src/gtest/gtest-death-test.cc new file mode 100644 index 0000000..919fb53 --- /dev/null +++ b/src/gtest/gtest-death-test.cc @@ -0,0 +1,751 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// This file implements death tests. + +#include <gtest/gtest-death-test.h> +#include <gtest/internal/gtest-port.h> + +#include <errno.h> +#include <limits.h> +#include <stdarg.h> + +#include <gtest/gtest-message.h> +#include <gtest/internal/gtest-string.h> + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION + +namespace testing { + +// Constants. + +// The default death test style. +static const char kDefaultDeathTestStyle[] = "fast"; + +GTEST_DEFINE_string( + death_test_style, + internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle), + "Indicates how to run a death test in a forked child process: " + "\"threadsafe\" (child process re-executes the test binary " + "from the beginning, running only the specific death test) or " + "\"fast\" (child process runs the death test immediately " + "after forking)."); + +namespace internal { +GTEST_DEFINE_string( + internal_run_death_test, "", + "Indicates the file, line number, temporal index of " + "the single death test to run, and a file descriptor to " + "which a success code may be sent, all separated by " + "colons. This flag is specified if and only if the current " + "process is a sub-process launched for running a thread-safe " + "death test. FOR INTERNAL USE ONLY."); +} // namespace internal + +#ifdef GTEST_HAS_DEATH_TEST + +// ExitedWithCode constructor. +ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) { +} + +// ExitedWithCode function-call operator. +bool ExitedWithCode::operator()(int exit_status) const { + return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_; +} + +// KilledBySignal constructor. +KilledBySignal::KilledBySignal(int signum) : signum_(signum) { +} + +// KilledBySignal function-call operator. +bool KilledBySignal::operator()(int exit_status) const { + return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_; +} + +namespace internal { + +// Utilities needed for death tests. + +// Generates a textual description of a given exit code, in the format +// specified by wait(2). +static String ExitSummary(int exit_code) { + Message m; + if (WIFEXITED(exit_code)) { + m << "Exited with exit status " << WEXITSTATUS(exit_code); + } else if (WIFSIGNALED(exit_code)) { + m << "Terminated by signal " << WTERMSIG(exit_code); + } +#ifdef WCOREDUMP + if (WCOREDUMP(exit_code)) { + m << " (core dumped)"; + } +#endif + return m.GetString(); +} + +// Returns true if exit_status describes a process that was terminated +// by a signal, or exited normally with a nonzero exit code. +bool ExitedUnsuccessfully(int exit_status) { + return !ExitedWithCode(0)(exit_status); +} + +// Generates a textual failure message when a death test finds more than +// one thread running, or cannot determine the number of threads, prior +// to executing the given statement. It is the responsibility of the +// caller not to pass a thread_count of 1. +static String DeathTestThreadWarning(size_t thread_count) { + Message msg; + msg << "Death tests use fork(), which is unsafe particularly" + << " in a threaded context. For this test, " << GTEST_NAME << " "; + if (thread_count == 0) + msg << "couldn't detect the number of threads."; + else + msg << "detected " << thread_count << " threads."; + return msg.GetString(); +} + +// Static string containing a description of the outcome of the +// last death test. +static String last_death_test_message; + +// Flag characters for reporting a death test that did not die. +static const char kDeathTestLived = 'L'; +static const char kDeathTestReturned = 'R'; +static const char kDeathTestInternalError = 'I'; + +// An enumeration describing all of the possible ways that a death test +// can conclude. DIED means that the process died while executing the +// test code; LIVED means that process lived beyond the end of the test +// code; and RETURNED means that the test statement attempted a "return," +// which is not allowed. IN_PROGRESS means the test has not yet +// concluded. +enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED }; + +// Routine for aborting the program which is safe to call from an +// exec-style death test child process, in which case the the error +// message is propagated back to the parent process. Otherwise, the +// message is simply printed to stderr. In either case, the program +// then exits with status 1. +void DeathTestAbort(const char* format, ...) { + // This function may be called from a threadsafe-style death test + // child process, which operates on a very small stack. Use the + // heap for any additional non-miniscule memory requirements. + const InternalRunDeathTestFlag* const flag = + GetUnitTestImpl()->internal_run_death_test_flag(); + va_list args; + va_start(args, format); + + if (flag != NULL) { + FILE* parent = fdopen(flag->status_fd, "w"); + fputc(kDeathTestInternalError, parent); + vfprintf(parent, format, args); + fclose(parent); + va_end(args); + _exit(1); + } else { + vfprintf(stderr, format, args); + va_end(args); + abort(); + } +} + +// A replacement for CHECK that calls DeathTestAbort if the assertion +// fails. +#define GTEST_DEATH_TEST_CHECK(expression) \ + do { \ + if (!(expression)) { \ + DeathTestAbort("CHECK failed: File %s, line %d: %s", \ + __FILE__, __LINE__, #expression); \ + } \ + } while (0) + +// This macro is similar to GTEST_DEATH_TEST_CHECK, but it is meant for +// evaluating any system call that fulfills two conditions: it must return +// -1 on failure, and set errno to EINTR when it is interrupted and +// should be tried again. The macro expands to a loop that repeatedly +// evaluates the expression as long as it evaluates to -1 and sets +// errno to EINTR. If the expression evaluates to -1 but errno is +// something other than EINTR, DeathTestAbort is called. +#define GTEST_DEATH_TEST_CHECK_SYSCALL(expression) \ + do { \ + int retval; \ + do { \ + retval = (expression); \ + } while (retval == -1 && errno == EINTR); \ + if (retval == -1) { \ + DeathTestAbort("CHECK failed: File %s, line %d: %s != -1", \ + __FILE__, __LINE__, #expression); \ + } \ + } while (0) + +// Death test constructor. Increments the running death test count +// for the current test. +DeathTest::DeathTest() { + TestInfo* const info = GetUnitTestImpl()->current_test_info(); + if (info == NULL) { + DeathTestAbort("Cannot run a death test outside of a TEST or " + "TEST_F construct"); + } +} + +// Creates and returns a death test by dispatching to the current +// death test factory. +bool DeathTest::Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test) { + return GetUnitTestImpl()->death_test_factory()->Create( + statement, regex, file, line, test); +} + +const char* DeathTest::LastMessage() { + return last_death_test_message.c_str(); +} + +// ForkingDeathTest provides implementations for most of the abstract +// methods of the DeathTest interface. Only the AssumeRole method is +// left undefined. +class ForkingDeathTest : public DeathTest { + public: + ForkingDeathTest(const char* statement, const RE* regex); + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + virtual bool Passed(bool status_ok); + virtual void Abort(AbortReason reason); + + protected: + void set_forked(bool forked) { forked_ = forked; } + void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; } + void set_read_fd(int fd) { read_fd_ = fd; } + void set_write_fd(int fd) { write_fd_ = fd; } + + private: + // The textual content of the code this object is testing. + const char* const statement_; + // The regular expression which test output must match. + const RE* const regex_; + // True if the death test successfully forked. + bool forked_; + // PID of child process during death test; 0 in the child process itself. + pid_t child_pid_; + // File descriptors for communicating the death test's status byte. + int read_fd_; // Always -1 in the child process. + int write_fd_; // Always -1 in the parent process. + // The exit status of the child process. + int status_; + // How the death test concluded. + DeathTestOutcome outcome_; +}; + +// Constructs a ForkingDeathTest. +ForkingDeathTest::ForkingDeathTest(const char* statement, const RE* regex) + : DeathTest(), + statement_(statement), + regex_(regex), + forked_(false), + child_pid_(-1), + read_fd_(-1), + write_fd_(-1), + status_(-1), + outcome_(IN_PROGRESS) { +} + +// Reads an internal failure message from a file descriptor, then calls +// LOG(FATAL) with that message. Called from a death test parent process +// to read a failure message from the death test child process. +static void FailFromInternalError(int fd) { + Message error; + char buffer[256]; + ssize_t num_read; + + do { + while ((num_read = read(fd, buffer, 255)) > 0) { + buffer[num_read] = '\0'; + error << buffer; + } + } while (num_read == -1 && errno == EINTR); + + // TODO(smcafee): Maybe just FAIL the test instead? + if (num_read == 0) { + GTEST_LOG(FATAL, error); + } else { + GTEST_LOG(FATAL, + Message() << "Error while reading death test internal: " + << strerror(errno) << " [" << errno << "]"); + } +} + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int ForkingDeathTest::Wait() { + if (!forked_) + return 0; + + // The read() here blocks until data is available (signifying the + // failure of the death test) or until the pipe is closed (signifying + // its success), so it's okay to call this in the parent before + // the child process has exited. + char flag; + ssize_t bytes_read; + + do { + bytes_read = read(read_fd_, &flag, 1); + } while (bytes_read == -1 && errno == EINTR); + + if (bytes_read == 0) { + outcome_ = DIED; + } else if (bytes_read == 1) { + switch (flag) { + case kDeathTestReturned: + outcome_ = RETURNED; + break; + case kDeathTestLived: + outcome_ = LIVED; + break; + case kDeathTestInternalError: + FailFromInternalError(read_fd_); // Does not return. + break; + default: + GTEST_LOG(FATAL, + Message() << "Death test child process reported unexpected " + << "status byte (" << static_cast<unsigned int>(flag) + << ")"); + } + } else { + GTEST_LOG(FATAL, + Message() << "Read from death test child process failed: " + << strerror(errno)); + } + + GTEST_DEATH_TEST_CHECK_SYSCALL(close(read_fd_)); + GTEST_DEATH_TEST_CHECK_SYSCALL(waitpid(child_pid_, &status_, 0)); + return status_; +} + +// Assesses the success or failure of a death test, using both private +// members which have previously been set, and one argument: +// +// Private data members: +// outcome: an enumeration describing how the death test +// concluded: DIED, LIVED, or RETURNED. The death test fails +// in the latter two cases +// status: the exit status of the child process, in the format +// specified by wait(2) +// regex: a regular expression object to be applied to +// the test's captured standard error output; the death test +// fails if it does not match +// +// Argument: +// status_ok: true if exit_status is acceptable in the context of +// this particular death test, which fails if it is false +// +// Returns true iff all of the above conditions are met. Otherwise, the +// first failing condition, in the order given above, is the one that is +// reported. Also sets the static variable last_death_test_message. +bool ForkingDeathTest::Passed(bool status_ok) { + if (!forked_) + return false; + +#if GTEST_HAS_GLOBAL_STRING + const ::string error_message = GetCapturedStderr(); +#else + const ::std::string error_message = GetCapturedStderr(); +#endif // GTEST_HAS_GLOBAL_STRING + + bool success = false; + Message buffer; + + buffer << "Death test: " << statement_ << "\n"; + switch (outcome_) { + case LIVED: + buffer << " Result: failed to die.\n" + << " Error msg: " << error_message; + break; + case RETURNED: + buffer << " Result: illegal return in test statement.\n" + << " Error msg: " << error_message; + break; + case DIED: + if (status_ok) { + if (RE::PartialMatch(error_message, *regex_)) { + success = true; + } else { + buffer << " Result: died but not with expected error.\n" + << " Expected: " << regex_->pattern() << "\n" + << "Actual msg: " << error_message; + } + } else { + buffer << " Result: died but not with expected exit code:\n" + << " " << ExitSummary(status_) << "\n"; + } + break; + default: + GTEST_LOG(FATAL, + "DeathTest::Passed somehow called before conclusion of test"); + } + + last_death_test_message = buffer.GetString(); + return success; +} + +// Signals that the death test code which should have exited, didn't. +// Should be called only in a death test child process. +// Writes a status byte to the child's status file desriptor, then +// calls _exit(1). +void ForkingDeathTest::Abort(AbortReason reason) { + // The parent process considers the death test to be a failure if + // it finds any data in our pipe. So, here we write a single flag byte + // to the pipe, then exit. + const char flag = + reason == TEST_DID_NOT_DIE ? kDeathTestLived : kDeathTestReturned; + GTEST_DEATH_TEST_CHECK_SYSCALL(write(write_fd_, &flag, 1)); + GTEST_DEATH_TEST_CHECK_SYSCALL(close(write_fd_)); + _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash) +} + +// A concrete death test class that forks, then immediately runs the test +// in the child process. +class NoExecDeathTest : public ForkingDeathTest { + public: + NoExecDeathTest(const char* statement, const RE* regex) : + ForkingDeathTest(statement, regex) { } + virtual TestRole AssumeRole(); +}; + +// The AssumeRole process for a fork-and-run death test. It implements a +// straightforward fork, with a simple pipe to transmit the status byte. +DeathTest::TestRole NoExecDeathTest::AssumeRole() { + const size_t thread_count = GetThreadCount(); + if (thread_count != 1) { + GTEST_LOG(WARNING, DeathTestThreadWarning(thread_count)); + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK(pipe(pipe_fd) != -1); + + last_death_test_message = ""; + CaptureStderr(); + // When we fork the process below, the log file buffers are copied, but the + // file descriptors are shared. We flush all log files here so that closing + // the file descriptors in the child process doesn't throw off the + // synchronization between descriptors and buffers in the parent process. + // This is as close to the fork as possible to avoid a race condition in case + // there are multiple threads running before the death test, and another + // thread writes to the log file. + FlushInfoLog(); + + const pid_t child_pid = fork(); + GTEST_DEATH_TEST_CHECK(child_pid != -1); + set_child_pid(child_pid); + if (child_pid == 0) { + GTEST_DEATH_TEST_CHECK_SYSCALL(close(pipe_fd[0])); + set_write_fd(pipe_fd[1]); + // Redirects all logging to stderr in the child process to prevent + // concurrent writes to the log files. We capture stderr in the parent + // process and append the child process' output to a log. + LogToStderr(); + return EXECUTE_TEST; + } else { + GTEST_DEATH_TEST_CHECK_SYSCALL(close(pipe_fd[1])); + set_read_fd(pipe_fd[0]); + set_forked(true); + return OVERSEE_TEST; + } +} + +// A concrete death test class that forks and re-executes the main +// program from the beginning, with command-line flags set that cause +// only this specific death test to be run. +class ExecDeathTest : public ForkingDeathTest { + public: + ExecDeathTest(const char* statement, const RE* regex, + const char* file, int line) : + ForkingDeathTest(statement, regex), file_(file), line_(line) { } + virtual TestRole AssumeRole(); + private: + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; +}; + +// Utility class for accumulating command-line arguments. +class Arguments { + public: + Arguments() { + args_.push_back(NULL); + } + ~Arguments() { + for (std::vector<char*>::iterator i = args_.begin(); + i + 1 != args_.end(); + ++i) { + free(*i); + } + } + void AddArgument(const char* argument) { + args_.insert(args_.end() - 1, strdup(argument)); + } + + template <typename Str> + void AddArguments(const ::std::vector<Str>& arguments) { + for (typename ::std::vector<Str>::const_iterator i = arguments.begin(); + i != arguments.end(); + ++i) { + args_.insert(args_.end() - 1, strdup(i->c_str())); + } + } + char* const* Argv() { + return &args_[0]; + } + private: + std::vector<char*> args_; +}; + +// A struct that encompasses the arguments to the child process of a +// threadsafe-style death test process. +struct ExecDeathTestArgs { + char* const* argv; // Command-line arguments for the child's call to exec + int close_fd; // File descriptor to close; the read end of a pipe +}; + +// The main function for a threadsafe-style death test child process. +static int ExecDeathTestChildMain(void* child_arg) { + ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg); + GTEST_DEATH_TEST_CHECK_SYSCALL(close(args->close_fd)); + execve(args->argv[0], args->argv, environ); + DeathTestAbort("execve failed: %s", strerror(errno)); + return EXIT_FAILURE; +} + +// Two utility routines that together determine the direction the stack +// grows. +// This could be accomplished more elegantly by a single recursive +// function, but we want to guard against the unlikely possibility of +// a smart compiler optimizing the recursion away. +static bool StackLowerThanAddress(const void* ptr) { + int dummy; + return &dummy < ptr; +} + +static bool StackGrowsDown() { + int dummy; + return StackLowerThanAddress(&dummy); +} + +// A threadsafe implementation of fork(2) for threadsafe-style death tests +// that uses clone(2). It dies with an error message if anything goes +// wrong. +static pid_t ExecDeathTestFork(char* const* argv, int close_fd) { + static const bool stack_grows_down = StackGrowsDown(); + const size_t stack_size = getpagesize(); + void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + GTEST_DEATH_TEST_CHECK(stack != MAP_FAILED); + void* const stack_top = + static_cast<char*>(stack) + (stack_grows_down ? stack_size : 0); + ExecDeathTestArgs args = { argv, close_fd }; + const pid_t child_pid = clone(&ExecDeathTestChildMain, stack_top, + SIGCHLD, &args); + GTEST_DEATH_TEST_CHECK(child_pid != -1); + GTEST_DEATH_TEST_CHECK(munmap(stack, stack_size) != -1); + return child_pid; +} + +// The AssumeRole process for a fork-and-exec death test. It re-executes the +// main program from the beginning, setting the --gtest_filter +// and --gtest_internal_run_death_test flags to cause only the current +// death test to be re-run. +DeathTest::TestRole ExecDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + set_write_fd(flag->status_fd); + return EXECUTE_TEST; + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK(pipe(pipe_fd) != -1); + // Clear the close-on-exec flag on the write end of the pipe, lest + // it be closed when the child process does an exec: + GTEST_DEATH_TEST_CHECK(fcntl(pipe_fd[1], F_SETFD, 0) != -1); + + const String filter_flag = + String::Format("--%s%s=%s.%s", + GTEST_FLAG_PREFIX, kFilterFlag, + info->test_case_name(), info->name()); + const String internal_flag = + String::Format("--%s%s=%s:%d:%d:%d", + GTEST_FLAG_PREFIX, kInternalRunDeathTestFlag, file_, line_, + death_test_index, pipe_fd[1]); + Arguments args; + args.AddArguments(GetArgvs()); + args.AddArgument("--logtostderr"); + args.AddArgument(filter_flag.c_str()); + args.AddArgument(internal_flag.c_str()); + + last_death_test_message = ""; + + CaptureStderr(); + // See the comment in NoExecDeathTest::AssumeRole for why the next line + // is necessary. + FlushInfoLog(); + + const pid_t child_pid = ExecDeathTestFork(args.Argv(), pipe_fd[0]); + GTEST_DEATH_TEST_CHECK_SYSCALL(close(pipe_fd[1])); + set_child_pid(child_pid); + set_read_fd(pipe_fd[0]); + set_forked(true); + return OVERSEE_TEST; +} + +// Creates a concrete DeathTest-derived class that depends on the +// --gtest_death_test_style flag, and sets the pointer pointed to +// by the "test" argument to its address. If the test should be +// skipped, sets that pointer to NULL. Returns true, unless the +// flag is set to an invalid value. +bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, + const char* file, int line, + DeathTest** test) { + UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const int death_test_index = impl->current_test_info() + ->increment_death_test_count(); + + if (flag != NULL) { + if (death_test_index > flag->index) { + last_death_test_message = String::Format( + "Death test count (%d) somehow exceeded expected maximum (%d)", + death_test_index, flag->index); + return false; + } + + if (!(flag->file == file && flag->line == line && + flag->index == death_test_index)) { + *test = NULL; + return true; + } + } + + if (GTEST_FLAG(death_test_style) == "threadsafe") { + *test = new ExecDeathTest(statement, regex, file, line); + } else if (GTEST_FLAG(death_test_style) == "fast") { + *test = new NoExecDeathTest(statement, regex); + } else { + last_death_test_message = String::Format( + "Unknown death test style \"%s\" encountered", + GTEST_FLAG(death_test_style).c_str()); + return false; + } + + return true; +} + +// Splits a given string on a given delimiter, populating a given +// vector with the fields. GTEST_HAS_DEATH_TEST implies that we have +// ::std::string, so we can use it here. +static void SplitString(const ::std::string& str, char delimiter, + ::std::vector< ::std::string>* dest) { + ::std::vector< ::std::string> parsed; + ::std::string::size_type pos = 0; + while (true) { + const ::std::string::size_type colon = str.find(delimiter, pos); + if (colon == ::std::string::npos) { + parsed.push_back(str.substr(pos)); + break; + } else { + parsed.push_back(str.substr(pos, colon - pos)); + pos = colon + 1; + } + } + dest->swap(parsed); +} + +// Attempts to parse a string into a positive integer. Returns true +// if that is possible. GTEST_HAS_DEATH_TEST implies that we have +// ::std::string, so we can use it here. +static bool ParsePositiveInt(const ::std::string& str, int* number) { + // Fail fast if the given string does not begin with a digit; + // this bypasses strtol's "optional leading whitespace and plus + // or minus sign" semantics, which are undesirable here. + if (str.empty() || !isdigit(str[0])) { + return false; + } + char* endptr; + const long parsed = strtol(str.c_str(), &endptr, 10); // NOLINT + if (*endptr == '\0' && parsed <= INT_MAX) { + *number = static_cast<int>(parsed); + return true; + } else { + return false; + } +} + +// Returns a newly created InternalRunDeathTestFlag object with fields +// initialized from the GTEST_FLAG(internal_run_death_test) flag if +// the flag is specified; otherwise returns NULL. +InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() { + if (GTEST_FLAG(internal_run_death_test) == "") return NULL; + + InternalRunDeathTestFlag* const internal_run_death_test_flag = + new InternalRunDeathTestFlag; + // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we + // can use it here. + ::std::vector< ::std::string> fields; + SplitString(GTEST_FLAG(internal_run_death_test).c_str(), ':', &fields); + if (fields.size() != 4 + || !ParsePositiveInt(fields[1], &internal_run_death_test_flag->line) + || !ParsePositiveInt(fields[2], &internal_run_death_test_flag->index) + || !ParsePositiveInt(fields[3], + &internal_run_death_test_flag->status_fd)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: %s", + GTEST_FLAG(internal_run_death_test).c_str()); + } + internal_run_death_test_flag->file = fields[0].c_str(); + return internal_run_death_test_flag; +} + +} // namespace internal + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace testing diff --git a/src/gtest/gtest-death-test.h b/src/gtest/gtest-death-test.h new file mode 100644 index 0000000..cbd41fe --- /dev/null +++ b/src/gtest/gtest-death-test.h @@ -0,0 +1,205 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the public API for death tests. It is +// #included by gtest.h so a user doesn't need to include this +// directly. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ + +#include <gtest/internal/gtest-death-test-internal.h> + +namespace testing { + +// This flag controls the style of death tests. Valid values are "threadsafe", +// meaning that the death test child process will re-execute the test binary +// from the start, running only a single death test, or "fast", +// meaning that the child process will execute the test logic immediately +// after forking. +GTEST_DECLARE_string(death_test_style); + +#ifdef GTEST_HAS_DEATH_TEST + +// The following macros are useful for writing death tests. + +// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is +// executed: +// +// 1. The assertion fails immediately if there are more than one +// active threads. This is because it's safe to fork() only when +// there is a single thread. +// +// 2. The parent process forks a sub-process and runs the death test +// in it; the sub-process exits with code 0 at the end of the death +// test, if it hasn't exited already. +// +// 3. The parent process waits for the sub-process to terminate. +// +// 4. The parent process checks the exit code and error message of +// the sub-process. +// +// Note: +// +// It's not safe to call exit() if the current process is forked from +// a multi-threaded process, so people usually call _exit() instead in +// such a case. However, we are not concerned with this as we run +// death tests only when there is a single thread. Since exit() has a +// cleaner semantics (it also calls functions registered with atexit() +// and on_exit()), this macro calls exit() instead of _exit() to +// terminate the child process. +// +// Examples: +// +// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number"); +// for (int i = 0; i < 5; i++) { +// EXPECT_DEATH(server.ProcessRequest(i), +// "Invalid request .* in ProcessRequest()") +// << "Failed to die on request " << i); +// } +// +// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting"); +// +// bool KilledBySIGHUP(int exit_code) { +// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP; +// } +// +// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!"); + +// Asserts that a given statement causes the program to exit, with an +// integer exit status that satisfies predicate, and emitting error output +// that matches regex. +#define ASSERT_EXIT(statement, predicate, regex) \ + GTEST_DEATH_TEST(statement, predicate, regex, GTEST_FATAL_FAILURE) + +// Like ASSERT_EXIT, but continues on to successive tests in the +// test case, if any: +#define EXPECT_EXIT(statement, predicate, regex) \ + GTEST_DEATH_TEST(statement, predicate, regex, GTEST_NONFATAL_FAILURE) + +// Asserts that a given statement causes the program to exit, either by +// explicitly exiting with a nonzero exit code or being killed by a +// signal, and emitting error output that matches regex. +#define ASSERT_DEATH(statement, regex) \ + ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) + +// Like ASSERT_DEATH, but continues on to successive tests in the +// test case, if any: +#define EXPECT_DEATH(statement, regex) \ + EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) + +// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*: + +// Tests that an exit code describes a normal exit with a given exit code. +class ExitedWithCode { + public: + explicit ExitedWithCode(int exit_code); + bool operator()(int exit_status) const; + private: + const int exit_code_; +}; + +// Tests that an exit code describes an exit due to termination by a +// given signal. +class KilledBySignal { + public: + explicit KilledBySignal(int signum); + bool operator()(int exit_status) const; + private: + const int signum_; +}; + +// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode. +// The death testing framework causes this to have interesting semantics, +// since the sideeffects of the call are only visible in opt mode, and not +// in debug mode. +// +// In practice, this can be used to test functions that utilize the +// LOG(DFATAL) macro using the following style: +// +// int DieInDebugOr12(int* sideeffect) { +// if (sideeffect) { +// *sideeffect = 12; +// } +// LOG(DFATAL) << "death"; +// return 12; +// } +// +// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) { +// int sideeffect = 0; +// // Only asserts in dbg. +// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death"); +// +// #ifdef NDEBUG +// // opt-mode has sideeffect visible. +// EXPECT_EQ(12, sideeffect); +// #else +// // dbg-mode no visible sideeffect. +// EXPECT_EQ(0, sideeffect); +// #endif +// } +// +// This will assert that DieInDebugReturn12InOpt() crashes in debug +// mode, usually due to a DCHECK or LOG(DFATAL), but returns the +// appropriate fallback value (12 in this case) in opt mode. If you +// need to test that a function has appropriate side-effects in opt +// mode, include assertions against the side-effects. A general +// pattern for this is: +// +// EXPECT_DEBUG_DEATH({ +// // Side-effects here will have an effect after this statement in +// // opt mode, but none in debug mode. +// EXPECT_EQ(12, DieInDebugOr12(&sideeffect)); +// }, "death"); +// +#ifdef NDEBUG + +#define EXPECT_DEBUG_DEATH(statement, regex) \ + do { statement; } while (false) + +#define ASSERT_DEBUG_DEATH(statement, regex) \ + do { statement; } while (false) + +#else + +#define EXPECT_DEBUG_DEATH(statement, regex) \ + EXPECT_DEATH(statement, regex) + +#define ASSERT_DEBUG_DEATH(statement, regex) \ + ASSERT_DEATH(statement, regex) + +#endif // NDEBUG for EXPECT_DEBUG_DEATH +#endif // GTEST_HAS_DEATH_TEST +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ diff --git a/src/gtest/gtest-filepath.cc b/src/gtest/gtest-filepath.cc new file mode 100644 index 0000000..2fba96e --- /dev/null +++ b/src/gtest/gtest-filepath.cc @@ -0,0 +1,208 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: keith.ray@gmail.com (Keith Ray) + +#include <gtest/internal/gtest-filepath.h> +#include <gtest/internal/gtest-port.h> + +#ifdef _WIN32 +#include <direct.h> +#include <io.h> +#endif // _WIN32 + +#include <sys/stat.h> + +#include <gtest/internal/gtest-string.h> + +namespace testing { +namespace internal { + +#ifdef GTEST_OS_WINDOWS +const char kPathSeparator = '\\'; +const char kPathSeparatorString[] = "\\"; +const char kCurrentDirectoryString[] = ".\\"; +#else +const char kPathSeparator = '/'; +const char kPathSeparatorString[] = "/"; +const char kCurrentDirectoryString[] = "./"; +#endif // GTEST_OS_WINDOWS + +// Returns a copy of the FilePath with the case-insensitive extension removed. +// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns +// FilePath("dir/file"). If a case-insensitive extension is not +// found, returns a copy of the original FilePath. +FilePath FilePath::RemoveExtension(const char* extension) const { + String dot_extension(String::Format(".%s", extension)); + if (pathname_.EndsWithCaseInsensitive(dot_extension.c_str())) { + return FilePath(String(pathname_.c_str(), pathname_.GetLength() - 4)); + } + return *this; +} + +// Returns a copy of the FilePath with the directory part removed. +// Example: FilePath("path/to/file").RemoveDirectoryName() returns +// FilePath("file"). If there is no directory part ("just_a_file"), it returns +// the FilePath unmodified. If there is no file part ("just_a_dir/") it +// returns an empty FilePath (""). +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveDirectoryName() const { + const char* const last_sep = strrchr(c_str(), kPathSeparator); + return last_sep ? FilePath(String(last_sep + 1)) : *this; +} + +// RemoveFileName returns the directory path with the filename removed. +// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". +// If the FilePath is "a_file" or "/a_file", RemoveFileName returns +// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does +// not have a file, like "just/a/dir/", it returns the FilePath unmodified. +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveFileName() const { + const char* const last_sep = strrchr(c_str(), kPathSeparator); + return FilePath(last_sep ? String(c_str(), last_sep + 1 - c_str()) + : String(kCurrentDirectoryString)); +} + +// Helper functions for naming files in a directory for xml output. + +// Given directory = "dir", base_name = "test", number = 0, +// extension = "xml", returns "dir/test.xml". If number is greater +// than zero (e.g., 12), returns "dir/test_12.xml". +// On Windows platform, uses \ as the separator rather than /. +FilePath FilePath::MakeFileName(const FilePath& directory, + const FilePath& base_name, + int number, + const char* extension) { + FilePath dir(directory.RemoveTrailingPathSeparator()); + if (number == 0) { + return FilePath(String::Format("%s%c%s.%s", dir.c_str(), kPathSeparator, + base_name.c_str(), extension)); + } + return FilePath(String::Format("%s%c%s_%d.%s", dir.c_str(), kPathSeparator, + base_name.c_str(), number, extension)); +} + +// Returns true if pathname describes something findable in the file-system, +// either a file, directory, or whatever. +bool FilePath::FileOrDirectoryExists() const { +#ifdef GTEST_OS_WINDOWS + struct _stat file_stat = {}; + return _stat(pathname_.c_str(), &file_stat) == 0; +#else + struct stat file_stat = {}; + return stat(pathname_.c_str(), &file_stat) == 0; +#endif // GTEST_OS_WINDOWS +} + +// Returns true if pathname describes a directory in the file-system +// that exists. +bool FilePath::DirectoryExists() const { + bool result = false; +#ifdef _WIN32 + FilePath removed_sep(this->RemoveTrailingPathSeparator()); + struct _stat file_stat = {}; + result = _stat(removed_sep.c_str(), &file_stat) == 0 && + (_S_IFDIR & file_stat.st_mode) != 0; +#else + struct stat file_stat = {}; + result = stat(pathname_.c_str(), &file_stat) == 0 && + S_ISDIR(file_stat.st_mode); +#endif // _WIN32 + return result; +} + +// Returns a pathname for a file that does not currently exist. The pathname +// will be directory/base_name.extension or +// directory/base_name_<number>.extension if directory/base_name.extension +// already exists. The number will be incremented until a pathname is found +// that does not already exist. +// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. +// There could be a race condition if two or more processes are calling this +// function at the same time -- they could both pick the same filename. +FilePath FilePath::GenerateUniqueFileName(const FilePath& directory, + const FilePath& base_name, + const char* extension) { + FilePath full_pathname; + int number = 0; + do { + full_pathname.Set(MakeFileName(directory, base_name, number++, extension)); + } while (full_pathname.FileOrDirectoryExists()); + return full_pathname; +} + +// Returns true if FilePath ends with a path separator, which indicates that +// it is intended to represent a directory. Returns false otherwise. +// This does NOT check that a directory (or file) actually exists. +bool FilePath::IsDirectory() const { + return pathname_.EndsWith(kPathSeparatorString); +} + +// Create directories so that path exists. Returns true if successful or if +// the directories already exist; returns false if unable to create directories +// for any reason. +bool FilePath::CreateDirectoriesRecursively() const { + if (!this->IsDirectory()) { + return false; + } + + if (pathname_.GetLength() == 0 || this->DirectoryExists()) { + return true; + } + + const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName()); + return parent.CreateDirectoriesRecursively() && this->CreateFolder(); +} + +// Create the directory so that path exists. Returns true if successful or +// if the directory already exists; returns false if unable to create the +// directory for any reason, including if the parent directory does not +// exist. Not named "CreateDirectory" because that's a macro on Windows. +bool FilePath::CreateFolder() const { +#ifdef _WIN32 + int result = _mkdir(pathname_.c_str()); +#else + int result = mkdir(pathname_.c_str(), 0777); +#endif // _WIN32 + if (result == -1) { + return this->DirectoryExists(); // An error is OK if the directory exists. + } + return true; // No error. +} + +// If input name has a trailing separator character, remove it and return the +// name, otherwise return the name string unmodified. +// On Windows platform, uses \ as the separator, other platforms use /. +FilePath FilePath::RemoveTrailingPathSeparator() const { + return pathname_.EndsWith(kPathSeparatorString) + ? FilePath(String(pathname_.c_str(), pathname_.GetLength() - 1)) + : *this; +} + +} // namespace internal +} // namespace testing diff --git a/src/gtest/gtest-message.h b/src/gtest/gtest-message.h new file mode 100644 index 0000000..b1d646f --- /dev/null +++ b/src/gtest/gtest-message.h @@ -0,0 +1,224 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the Message class. +// +// IMPORTANT NOTE: Due to limitation of the C++ language, we have to +// leave some internal implementation details in this header file. +// They are clearly marked by comments like this: +// +// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +// +// Such code is NOT meant to be used by a user directly, and is subject +// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user +// program! + +#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ +#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ + +#include <gtest/internal/gtest-string.h> +#include <gtest/internal/gtest-internal.h> + +namespace testing { + +// The Message class works like an ostream repeater. +// +// Typical usage: +// +// 1. You stream a bunch of values to a Message object. +// It will remember the text in a StrStream. +// 2. Then you stream the Message object to an ostream. +// This causes the text in the Message to be streamed +// to the ostream. +// +// For example; +// +// testing::Message foo; +// foo << 1 << " != " << 2; +// std::cout << foo; +// +// will print "1 != 2". +// +// Message is not intended to be inherited from. In particular, its +// destructor is not virtual. +// +// Note that StrStream behaves differently in gcc and in MSVC. You +// can stream a NULL char pointer to it in the former, but not in the +// latter (it causes an access violation if you do). The Message +// class hides this difference by treating a NULL char pointer as +// "(null)". +class Message { + private: + // The type of basic IO manipulators (endl, ends, and flush) for + // narrow streams. + typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&); + + public: + // Constructs an empty Message. + // We allocate the StrStream separately because it otherwise each use of + // ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's + // stack frame leading to huge stack frames in some cases; gcc does not reuse + // the stack space. + Message() : ss_(new internal::StrStream) {} + + // Copy constructor. + Message(const Message& msg) : ss_(new internal::StrStream) { // NOLINT + *ss_ << msg.GetString(); + } + + // Constructs a Message from a C-string. + explicit Message(const char* str) : ss_(new internal::StrStream) { + *ss_ << str; + } + + ~Message() { delete ss_; } +#ifdef __SYMBIAN32__ + // Streams a value (either a pointer or not) to this object. + template <typename T> + inline Message& operator <<(const T& value) { + StreamHelper(typename internal::is_pointer<T>::type(), value); + return *this; + } +#else + // Streams a non-pointer value to this object. + template <typename T> + inline Message& operator <<(const T& val) { + ::GTestStreamToHelper(ss_, val); + return *this; + } + + // Streams a pointer value to this object. + // + // This function is an overload of the previous one. When you + // stream a pointer to a Message, this definition will be used as it + // is more specialized. (The C++ Standard, section + // [temp.func.order].) If you stream a non-pointer, then the + // previous definition will be used. + // + // The reason for this overload is that streaming a NULL pointer to + // ostream is undefined behavior. Depending on the compiler, you + // may get "0", "(nil)", "(null)", or an access violation. To + // ensure consistent result across compilers, we always treat NULL + // as "(null)". + template <typename T> + inline Message& operator <<(T* const& pointer) { // NOLINT + if (pointer == NULL) { + *ss_ << "(null)"; + } else { + ::GTestStreamToHelper(ss_, pointer); + } + return *this; + } +#endif // __SYMBIAN32__ + + // Since the basic IO manipulators are overloaded for both narrow + // and wide streams, we have to provide this specialized definition + // of operator <<, even though its body is the same as the + // templatized version above. Without this definition, streaming + // endl or other basic IO manipulators to Message will confuse the + // compiler. + Message& operator <<(BasicNarrowIoManip val) { + *ss_ << val; + return *this; + } + + // Instead of 1/0, we want to see true/false for bool values. + Message& operator <<(bool b) { + return *this << (b ? "true" : "false"); + } + + // These two overloads allow streaming a wide C string to a Message + // using the UTF-8 encoding. + Message& operator <<(const wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); + } + Message& operator <<(wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); + } + +#if GTEST_HAS_STD_WSTRING + // Converts the given wide string to a narrow string using the UTF-8 + // encoding, and streams the result to this Message object. + Message& operator <<(const ::std::wstring& wstr); +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_WSTRING + // Converts the given wide string to a narrow string using the UTF-8 + // encoding, and streams the result to this Message object. + Message& operator <<(const ::wstring& wstr); +#endif // GTEST_HAS_GLOBAL_WSTRING + + // Gets the text streamed to this object so far as a String. + // Each '\0' character in the buffer is replaced with "\\0". + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + internal::String GetString() const { + return internal::StrStreamToString(ss_); + } + + private: +#ifdef __SYMBIAN32__ + // These are needed as the Nokia Symbian Compiler cannot decide between + // const T& and const T* in a function template. The Nokia compiler _can_ + // decide between class template specializations for T and T*, so a + // tr1::type_traits-like is_pointer works, and we can overload on that. + template <typename T> + inline void StreamHelper(internal::true_type dummy, T* pointer) { + if (pointer == NULL) { + *ss_ << "(null)"; + } else { + ::GTestStreamToHelper(ss_, pointer); + } + } + template <typename T> + inline void StreamHelper(internal::false_type dummy, const T& value) { + ::GTestStreamToHelper(ss_, value); + } +#endif // __SYMBIAN32__ + + // We'll hold the text streamed to this object here. + internal::StrStream* const ss_; + + // We declare (but don't implement) this to prevent the compiler + // from implementing the assignment operator. + void operator=(const Message&); +}; + +// Streams a Message to an ostream. +inline std::ostream& operator <<(std::ostream& os, const Message& sb) { + return os << sb.GetString(); +} + +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ diff --git a/src/gtest/gtest-port.cc b/src/gtest/gtest-port.cc new file mode 100644 index 0000000..2a4d37a --- /dev/null +++ b/src/gtest/gtest-port.cc @@ -0,0 +1,292 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include <gtest/internal/gtest-port.h> + +#include <limits.h> +#ifdef GTEST_HAS_DEATH_TEST +#include <regex.h> +#endif // GTEST_HAS_DEATH_TEST +#include <stdlib.h> +#include <stdio.h> + +#include <gtest/gtest-spi.h> +#include <gtest/gtest-message.h> +#include <gtest/internal/gtest-string.h> + +namespace testing { +namespace internal { + +#ifdef GTEST_HAS_DEATH_TEST + +// Implements RE. Currently only needed for death tests. + +RE::~RE() { + regfree(®ex_); + free(const_cast<char*>(pattern_)); +} + +// Returns true iff str contains regular expression re. +bool RE::PartialMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.regex_, str, 1, &match, 0) == 0; +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = strdup(regex); + is_valid_ = regcomp(®ex_, regex, REG_EXTENDED) == 0; + EXPECT_TRUE(is_valid_) + << "Regular expression \"" << regex + << "\" is not a valid POSIX Extended regular expression."; +} + +#endif // GTEST_HAS_DEATH_TEST + +// Logs a message at the given severity level. +void GTestLog(GTestLogSeverity severity, const char* file, + int line, const char* msg) { + const char* const marker = + severity == GTEST_INFO ? "[ INFO ]" : + severity == GTEST_WARNING ? "[WARNING]" : + severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]"; + fprintf(stderr, "\n%s %s:%d: %s\n", marker, file, line, msg); + if (severity == GTEST_FATAL) { + abort(); + } +} + +#ifdef GTEST_HAS_DEATH_TEST + +// Defines the stderr capturer. + +class CapturedStderr { + public: + // The ctor redirects stderr to a temporary file. + CapturedStderr() { + uncaptured_fd_ = dup(STDERR_FILENO); + + char name_template[] = "captured_stderr.XXXXXX"; + const int captured_fd = mkstemp(name_template); + filename_ = name_template; + fflush(NULL); + dup2(captured_fd, STDERR_FILENO); + close(captured_fd); + } + + ~CapturedStderr() { + remove(filename_.c_str()); + } + + // Stops redirecting stderr. + void StopCapture() { + // Restores the original stream. + fflush(NULL); + dup2(uncaptured_fd_, STDERR_FILENO); + close(uncaptured_fd_); + uncaptured_fd_ = -1; + } + + // Returns the name of the temporary file holding the stderr output. + // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we + // can use it here. + ::std::string filename() const { return filename_; } + + private: + int uncaptured_fd_; + ::std::string filename_; +}; + +static CapturedStderr* g_captured_stderr = NULL; + +// Returns the size (in bytes) of a file. +static size_t GetFileSize(FILE * file) { + fseek(file, 0, SEEK_END); + return static_cast<size_t>(ftell(file)); +} + +// Reads the entire content of a file as a string. +// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can +// use it here. +static ::std::string ReadEntireFile(FILE * file) { + const size_t file_size = GetFileSize(file); + char* const buffer = new char[file_size]; + + size_t bytes_last_read = 0; // # of bytes read in the last fread() + size_t bytes_read = 0; // # of bytes read so far + + fseek(file, 0, SEEK_SET); + + // Keeps reading the file until we cannot read further or the + // pre-determined file size is reached. + do { + bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file); + bytes_read += bytes_last_read; + } while (bytes_last_read > 0 && bytes_read < file_size); + + const ::std::string content(buffer, buffer+bytes_read); + delete[] buffer; + + return content; +} + +// Starts capturing stderr. +void CaptureStderr() { + if (g_captured_stderr != NULL) { + GTEST_LOG(FATAL, "Only one stderr capturer can exist at one time."); + } + g_captured_stderr = new CapturedStderr; +} + +// Stops capturing stderr and returns the captured string. +// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can +// use it here. +::std::string GetCapturedStderr() { + g_captured_stderr->StopCapture(); + FILE* const file = fopen(g_captured_stderr->filename().c_str(), "r"); + const ::std::string content = ReadEntireFile(file); + fclose(file); + + delete g_captured_stderr; + g_captured_stderr = NULL; + + return content; +} + +// A copy of all command line arguments. Set by InitGoogleTest(). +::std::vector<String> g_argvs; + +// Returns the command line as a vector of strings. +const ::std::vector<String>& GetArgvs() { return g_argvs; } + +#endif // GTEST_HAS_DEATH_TEST + +// Returns the name of the environment variable corresponding to the +// given flag. For example, FlagToEnvVar("foo") will return +// "GTEST_FOO" in the open-source version. +static String FlagToEnvVar(const char* flag) { + const String full_flag = (Message() << GTEST_FLAG_PREFIX << flag).GetString(); + + Message env_var; + for (int i = 0; i != full_flag.GetLength(); i++) { + env_var << static_cast<char>(toupper(full_flag.c_str()[i])); + } + + return env_var.GetString(); +} + +// Reads and returns the Boolean environment variable corresponding to +// the given flag; if it's not set, returns default_value. +// +// The value is considered true iff it's not "0". +bool BoolFromGTestEnv(const char* flag, bool default_value) { + const String env_var = FlagToEnvVar(flag); + const char* const string_value = GetEnv(env_var.c_str()); + return string_value == NULL ? + default_value : strcmp(string_value, "0") != 0; +} + +// Parses 'str' for a 32-bit signed integer. If successful, writes +// the result to *value and returns true; otherwise leaves *value +// unchanged and returns false. +bool ParseInt32(const Message& src_text, const char* str, Int32* value) { + // Parses the environment variable as a decimal integer. + char* end = NULL; + const long long_value = strtol(str, &end, 10); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value \"" << str << "\".\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + // Is the parsed value in the range of an Int32? + const Int32 result = static_cast<Int32>(long_value); + if (long_value == LONG_MAX || long_value == LONG_MIN || + // The parsed value overflows as a long. (strtol() returns + // LONG_MAX or LONG_MIN when the input overflows.) + result != long_value + // The parsed value overflows as an Int32. + ) { + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value " << str << ", which overflows.\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + *value = result; + return true; +} + +// Reads and returns a 32-bit integer stored in the environment +// variable corresponding to the given flag; if it isn't set or +// doesn't represent a valid 32-bit integer, returns default_value. +Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { + const String env_var = FlagToEnvVar(flag); + const char* const string_value = GetEnv(env_var.c_str()); + if (string_value == NULL) { + // The environment variable is not set. + return default_value; + } + + Int32 result = default_value; + if (!ParseInt32(Message() << "Environment variable " << env_var, + string_value, &result)) { + printf("The default value %s is used.\n", + (Message() << default_value).GetString().c_str()); + fflush(stdout); + return default_value; + } + + return result; +} + +// Reads and returns the string environment variable corresponding to +// the given flag; if it's not set, returns default_value. +const char* StringFromGTestEnv(const char* flag, const char* default_value) { + const String env_var = FlagToEnvVar(flag); + const char* const value = GetEnv(env_var.c_str()); + return value == NULL ? default_value : value; +} + +} // namespace internal +} // namespace testing diff --git a/src/gtest/gtest-spi.h b/src/gtest/gtest-spi.h new file mode 100644 index 0000000..75d0dcf --- /dev/null +++ b/src/gtest/gtest-spi.h @@ -0,0 +1,247 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Utilities for testing Google Test itself and code that uses Google Test +// (e.g. frameworks built on top of Google Test). + +#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_ +#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_ + +#include <gtest/gtest.h> + +namespace testing { + +// A copyable object representing the result of a test part (i.e. an +// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()). +// +// Don't inherit from TestPartResult as its destructor is not virtual. +class TestPartResult { + public: + // C'tor. TestPartResult does NOT have a default constructor. + // Always use this constructor (with parameters) to create a + // TestPartResult object. + TestPartResult(TestPartResultType type, + const char* file_name, + int line_number, + const char* message) + : type_(type), + file_name_(file_name), + line_number_(line_number), + message_(message) { + } + + // Gets the outcome of the test part. + TestPartResultType type() const { return type_; } + + // Gets the name of the source file where the test part took place, or + // NULL if it's unknown. + const char* file_name() const { return file_name_.c_str(); } + + // Gets the line in the source file where the test part took place, + // or -1 if it's unknown. + int line_number() const { return line_number_; } + + // Gets the message associated with the test part. + const char* message() const { return message_.c_str(); } + + // Returns true iff the test part passed. + bool passed() const { return type_ == TPRT_SUCCESS; } + + // Returns true iff the test part failed. + bool failed() const { return type_ != TPRT_SUCCESS; } + + // Returns true iff the test part non-fatally failed. + bool nonfatally_failed() const { return type_ == TPRT_NONFATAL_FAILURE; } + + // Returns true iff the test part fatally failed. + bool fatally_failed() const { return type_ == TPRT_FATAL_FAILURE; } + private: + TestPartResultType type_; + + // The name of the source file where the test part took place, or + // NULL if the source file is unknown. + internal::String file_name_; + // The line in the source file where the test part took place, or -1 + // if the line number is unknown. + int line_number_; + internal::String message_; // The test failure message. +}; + +// Prints a TestPartResult object. +std::ostream& operator<<(std::ostream& os, const TestPartResult& result); + +// An array of TestPartResult objects. +// +// We define this class as we cannot use STL containers when compiling +// Google Test with MSVC 7.1 and exceptions disabled. +// +// Don't inherit from TestPartResultArray as its destructor is not +// virtual. +class TestPartResultArray { + public: + TestPartResultArray(); + ~TestPartResultArray(); + + // Appends the given TestPartResult to the array. + void Append(const TestPartResult& result); + + // Returns the TestPartResult at the given index (0-based). + const TestPartResult& GetTestPartResult(int index) const; + + // Returns the number of TestPartResult objects in the array. + int size() const; + private: + // Internally we use a list to simulate the array. Yes, this means + // that random access is O(N) in time, but it's OK for its purpose. + internal::List<TestPartResult>* const list_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(TestPartResultArray); +}; + +// This interface knows how to report a test part result. +class TestPartResultReporterInterface { + public: + virtual ~TestPartResultReporterInterface() {} + + virtual void ReportTestPartResult(const TestPartResult& result) = 0; +}; + +// This helper class can be used to mock out Google Test failure reporting +// so that we can test Google Test or code that builds on Google Test. +// +// An object of this class appends a TestPartResult object to the +// TestPartResultArray object given in the constructor whenever a +// Google Test failure is reported. +class ScopedFakeTestPartResultReporter + : public TestPartResultReporterInterface { + public: + // The c'tor sets this object as the test part result reporter used + // by Google Test. The 'result' parameter specifies where to report the + // results. + explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result); + + // The d'tor restores the previous test part result reporter. + virtual ~ScopedFakeTestPartResultReporter(); + + // Appends the TestPartResult object to the TestPartResultArray + // received in the constructor. + // + // This method is from the TestPartResultReporterInterface + // interface. + virtual void ReportTestPartResult(const TestPartResult& result); + private: + TestPartResultReporterInterface* const old_reporter_; + TestPartResultArray* const result_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(ScopedFakeTestPartResultReporter); +}; + +namespace internal { + +// A helper class for implementing EXPECT_FATAL_FAILURE() and +// EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given +// TestPartResultArray contains exactly one failure that has the given +// type and contains the given substring. If that's not the case, a +// non-fatal failure will be generated. +class SingleFailureChecker { + public: + // The constructor remembers the arguments. + SingleFailureChecker(const TestPartResultArray* results, + TestPartResultType type, + const char* substr); + ~SingleFailureChecker(); + private: + const TestPartResultArray* const results_; + const TestPartResultType type_; + const String substr_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(SingleFailureChecker); +}; + +} // namespace internal + +} // namespace testing + +// A macro for testing Google Test assertions or code that's expected to +// generate Google Test fatal failures. It verifies that the given +// statement will cause exactly one fatal Google Test failure with 'substr' +// being part of the failure message. +// +// Implementation note: The verification is done in the destructor of +// SingleFailureChecker, to make sure that it's done even when +// 'statement' throws an exception. +// +// Known restrictions: +// - 'statement' cannot reference local non-static variables or +// non-static members of the current object. +// - 'statement' cannot return a value. +// - You cannot stream a failure message to this macro. +#define EXPECT_FATAL_FAILURE(statement, substr) do {\ + class GTestExpectFatalFailureHelper {\ + public:\ + static void Execute() { statement; }\ + };\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TPRT_FATAL_FAILURE, (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + >est_failures);\ + GTestExpectFatalFailureHelper::Execute();\ + }\ + } while (false) + +// A macro for testing Google Test assertions or code that's expected to +// generate Google Test non-fatal failures. It asserts that the given +// statement will cause exactly one non-fatal Google Test failure with +// 'substr' being part of the failure message. +// +// 'statement' is allowed to reference local variables and members of +// the current object. +// +// Implementation note: The verification is done in the destructor of +// SingleFailureChecker, to make sure that it's done even when +// 'statement' throws an exception or aborts the function. +// +// Known restrictions: +// - You cannot stream a failure message to this macro. +#define EXPECT_NONFATAL_FAILURE(statement, substr) do {\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TPRT_NONFATAL_FAILURE, (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + >est_failures);\ + statement;\ + }\ + } while (false) + +#endif // GTEST_INCLUDE_GTEST_GTEST_SPI_H_ diff --git a/src/gtest/gtest.cc b/src/gtest/gtest.cc new file mode 100644 index 0000000..235ec5a --- /dev/null +++ b/src/gtest/gtest.cc @@ -0,0 +1,3545 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) + +#include <gtest/gtest.h> +#include <gtest/gtest-spi.h> + +#include <ctype.h> +#include <math.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef GTEST_OS_LINUX + +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +#define GTEST_HAS_GETTIMEOFDAY + +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +// Declares vsnprintf(). This header is not available on Windows. +#include <strings.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <unistd.h> +#include <string> +#include <vector> + +#elif defined(_WIN32_WCE) // We are on Windows CE. + +#include <windows.h> // NOLINT + +#elif defined(_WIN32) // We are on Windows proper. + +#include <io.h> // NOLINT +#include <sys/timeb.h> // NOLINT +#include <sys/types.h> // NOLINT +#include <sys/stat.h> // NOLINT + +#if defined(__MINGW__) || defined(__MINGW32__) +// MinGW has gettimeofday() but not _ftime64(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +// TODO(kenton@google.com): There are other ways to get the time on +// Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW +// supports these. consider using them instead. +#define GTEST_HAS_GETTIMEOFDAY +#include <sys/time.h> // NOLINT +#endif + +// cpplint thinks that the header is already included, so we want to +// silence it. +#include <windows.h> // NOLINT + +#else + +// Assume other platforms have gettimeofday(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +#define GTEST_HAS_GETTIMEOFDAY + +// cpplint thinks that the header is already included, so we want to +// silence it. +#include <sys/time.h> // NOLINT +#include <unistd.h> // NOLINT + +#endif + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION + +#ifdef GTEST_OS_WINDOWS +#define fileno _fileno +#define isatty _isatty +#define vsnprintf _vsnprintf +#endif // GTEST_OS_WINDOWS + +namespace testing { + +// Constants. + +// A test that matches this pattern is disabled and not run. +static const char kDisableTestPattern[] = "DISABLED_*"; + +// A test filter that matches everything. +static const char kUniversalFilter[] = "*"; + +// The default output file for XML output. +static const char kDefaultOutputFile[] = "test_detail.xml"; + +GTEST_DEFINE_bool( + break_on_failure, + internal::BoolFromGTestEnv("break_on_failure", false), + "True iff a failed assertion should be a debugger break-point."); + +GTEST_DEFINE_bool( + catch_exceptions, + internal::BoolFromGTestEnv("catch_exceptions", false), + "True iff " GTEST_NAME + " should catch exceptions and treat them as test failures."); + +GTEST_DEFINE_string( + color, + internal::StringFromGTestEnv("color", "auto"), + "Whether to use colors in the output. Valid values: yes, no, " + "and auto. 'auto' means to use colors if the output is " + "being sent to a terminal and the TERM environment variable " + "is set to xterm or xterm-color."); + +GTEST_DEFINE_string( + filter, + internal::StringFromGTestEnv("filter", kUniversalFilter), + "A colon-separated list of glob (not regex) patterns " + "for filtering the tests to run, optionally followed by a " + "'-' and a : separated list of negative patterns (tests to " + "exclude). A test is run if it matches one of the positive " + "patterns and does not match any of the negative patterns."); + +GTEST_DEFINE_bool(list_tests, false, + "List all tests without running them."); + +GTEST_DEFINE_string( + output, + internal::StringFromGTestEnv("output", ""), + "A format (currently must be \"xml\"), optionally followed " + "by a colon and an output file name or directory. A directory " + "is indicated by a trailing pathname separator. " + "Examples: \"xml:filename.xml\", \"xml::directoryname/\". " + "If a directory is specified, output files will be created " + "within that directory, with file-names based on the test " + "executable's name and, if necessary, made unique by adding " + "digits."); + +GTEST_DEFINE_int32( + repeat, + internal::Int32FromGTestEnv("repeat", 1), + "How many times to repeat each test. Specify a negative number " + "for repeating forever. Useful for shaking out flaky tests."); + +GTEST_DEFINE_int32( + stack_trace_depth, + internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth), + "The maximum number of stack frames to print when an " + "assertion fails. The valid range is 0 through 100, inclusive."); + +GTEST_DEFINE_bool( + show_internal_stack_frames, false, + "True iff " GTEST_NAME " should include internal stack frames when " + "printing test failure stack traces."); + +namespace internal { + +// GTestIsInitialized() returns true iff the user has initialized +// Google Test. Useful for catching the user mistake of not initializing +// Google Test before calling RUN_ALL_TESTS(). + +// A user must call testing::InitGoogleTest() to initialize Google +// Test. g_parse_gtest_flags_called is set to true iff +// InitGoogleTest() has been called. We don't protect this variable +// under a mutex as it is only accessed in the main thread. +static bool g_parse_gtest_flags_called = false; +static bool GTestIsInitialized() { return g_parse_gtest_flags_called; } + +// Iterates over a list of TestCases, keeping a running sum of the +// results of calling a given int-returning method on each. +// Returns the sum. +static int SumOverTestCaseList(const internal::List<TestCase*>& case_list, + int (TestCase::*method)() const) { + int sum = 0; + for (const internal::ListNode<TestCase*>* node = case_list.Head(); + node != NULL; + node = node->next()) { + sum += (node->element()->*method)(); + } + return sum; +} + +// Returns true iff the test case passed. +static bool TestCasePassed(const TestCase* test_case) { + return test_case->should_run() && test_case->Passed(); +} + +// Returns true iff the test case failed. +static bool TestCaseFailed(const TestCase* test_case) { + return test_case->should_run() && test_case->Failed(); +} + +// Returns true iff test_case contains at least one test that should +// run. +static bool ShouldRunTestCase(const TestCase* test_case) { + return test_case->should_run(); +} + +#ifdef _WIN32_WCE +// Windows CE has no C library. The abort() function is used in +// several places in Google Test. This implementation provides a reasonable +// imitation of standard behaviour. +static void abort() { + DebugBreak(); + TerminateProcess(GetCurrentProcess(), 1); +} +#endif // _WIN32_WCE + +// AssertHelper constructor. +AssertHelper::AssertHelper(TestPartResultType type, const char* file, + int line, const char* message) + : type_(type), file_(file), line_(line), message_(message) { +} + +// Message assignment, for assertion streaming support. +void AssertHelper::operator=(const Message& message) const { + UnitTest::GetInstance()-> + AddTestPartResult(type_, file_, line_, + AppendUserMessage(message_, message), + UnitTest::GetInstance()->impl() + ->CurrentOsStackTraceExceptTop(1) + // Skips the stack frame for this function itself. + ); // NOLINT +} + +// Application pathname gotten in InitGoogleTest. +String g_executable_path; + +// Returns the current application's name, removing directory path if that +// is present. +FilePath GetCurrentExecutableName() { + FilePath result; + +#if defined(_WIN32_WCE) || defined(_WIN32) + result.Set(FilePath(g_executable_path).RemoveExtension("exe")); +#else + result.Set(FilePath(g_executable_path)); +#endif // _WIN32_WCE || _WIN32 + + return result.RemoveDirectoryName(); +} + +// Functions for processing the gtest_output flag. + +// Returns the output format, or "" for normal printed output. +String UnitTestOptions::GetOutputFormat() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) return String(""); + + const char* const colon = strchr(gtest_output_flag, ':'); + return (colon == NULL) ? + String(gtest_output_flag) : + String(gtest_output_flag, colon - gtest_output_flag); +} + +// Returns the name of the requested output file, or the default if none +// was explicitly specified. +String UnitTestOptions::GetOutputFile() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) + return String(""); + + const char* const colon = strchr(gtest_output_flag, ':'); + if (colon == NULL) + return String(kDefaultOutputFile); + + internal::FilePath output_name(colon + 1); + if (!output_name.IsDirectory()) + return output_name.ToString(); + + internal::FilePath result(internal::FilePath::GenerateUniqueFileName( + output_name, internal::GetCurrentExecutableName(), + GetOutputFormat().c_str())); + return result.ToString(); +} + +// Returns true iff the wildcard pattern matches the string. The +// first ':' or '\0' character in pattern marks the end of it. +// +// This recursive algorithm isn't very efficient, but is clear and +// works well enough for matching test names, which are short. +bool UnitTestOptions::PatternMatchesString(const char *pattern, + const char *str) { + switch (*pattern) { + case '\0': + case ':': // Either ':' or '\0' marks the end of the pattern. + return *str == '\0'; + case '?': // Matches any single character. + return *str != '\0' && PatternMatchesString(pattern + 1, str + 1); + case '*': // Matches any string (possibly empty) of characters. + return (*str != '\0' && PatternMatchesString(pattern, str + 1)) || + PatternMatchesString(pattern + 1, str); + default: // Non-special character. Matches itself. + return *pattern == *str && + PatternMatchesString(pattern + 1, str + 1); + } +} + +bool UnitTestOptions::MatchesFilter(const String& name, const char* filter) { + const char *cur_pattern = filter; + while (true) { + if (PatternMatchesString(cur_pattern, name.c_str())) { + return true; + } + + // Finds the next pattern in the filter. + cur_pattern = strchr(cur_pattern, ':'); + + // Returns if no more pattern can be found. + if (cur_pattern == NULL) { + return false; + } + + // Skips the pattern separater (the ':' character). + cur_pattern++; + } +} + +// TODO(keithray): move String function implementations to gtest-string.cc. + +// Returns true iff the user-specified filter matches the test case +// name and the test name. +bool UnitTestOptions::FilterMatchesTest(const String &test_case_name, + const String &test_name) { + const String& full_name = String::Format("%s.%s", + test_case_name.c_str(), + test_name.c_str()); + + // Split --gtest_filter at '-', if there is one, to separate into + // positive filter and negative filter portions + const char* const p = GTEST_FLAG(filter).c_str(); + const char* const dash = strchr(p, '-'); + String positive; + String negative; + if (dash == NULL) { + positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter + negative = String(""); + } else { + positive.Set(p, dash - p); // Everything up to the dash + negative = String(dash+1); // Everything after the dash + if (positive.empty()) { + // Treat '-test1' as the same as '*-test1' + positive = kUniversalFilter; + } + } + + // A filter is a colon-separated list of patterns. It matches a + // test if any pattern in it matches the test. + return (MatchesFilter(full_name, positive.c_str()) && + !MatchesFilter(full_name, negative.c_str())); +} + +#ifdef GTEST_OS_WINDOWS +// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the +// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. +// This function is useful as an __except condition. +int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) { + // Google Test should handle an exception if: + // 1. the user wants it to, AND + // 2. this is not a breakpoint exception. + return (GTEST_FLAG(catch_exceptions) && + exception_code != EXCEPTION_BREAKPOINT) ? + EXCEPTION_EXECUTE_HANDLER : + EXCEPTION_CONTINUE_SEARCH; +} +#endif // GTEST_OS_WINDOWS + +} // namespace internal + +// The interface for printing the result of a UnitTest +class UnitTestEventListenerInterface { + public: + // The d'tor is pure virtual as this is an abstract class. + virtual ~UnitTestEventListenerInterface() = 0; + + // Called before the unit test starts. + virtual void OnUnitTestStart(const UnitTest*) {} + + // Called after the unit test ends. + virtual void OnUnitTestEnd(const UnitTest*) {} + + // Called before the test case starts. + virtual void OnTestCaseStart(const TestCase*) {} + + // Called after the test case ends. + virtual void OnTestCaseEnd(const TestCase*) {} + + // Called before the global set-up starts. + virtual void OnGlobalSetUpStart(const UnitTest*) {} + + // Called after the global set-up ends. + virtual void OnGlobalSetUpEnd(const UnitTest*) {} + + // Called before the global tear-down starts. + virtual void OnGlobalTearDownStart(const UnitTest*) {} + + // Called after the global tear-down ends. + virtual void OnGlobalTearDownEnd(const UnitTest*) {} + + // Called before the test starts. + virtual void OnTestStart(const TestInfo*) {} + + // Called after the test ends. + virtual void OnTestEnd(const TestInfo*) {} + + // Called after an assertion. + virtual void OnNewTestPartResult(const TestPartResult*) {} +}; + +// Constructs an empty TestPartResultArray. +TestPartResultArray::TestPartResultArray() + : list_(new internal::List<TestPartResult>) { +} + +// Destructs a TestPartResultArray. +TestPartResultArray::~TestPartResultArray() { + delete list_; +} + +// Appends a TestPartResult to the array. +void TestPartResultArray::Append(const TestPartResult& result) { + list_->PushBack(result); +} + +// Returns the TestPartResult at the given index (0-based). +const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const { + if (index < 0 || index >= size()) { + printf("\nInvalid index (%d) into TestPartResultArray.\n", index); + abort(); + } + + const internal::ListNode<TestPartResult>* p = list_->Head(); + for (int i = 0; i < index; i++) { + p = p->next(); + } + + return p->element(); +} + +// Returns the number of TestPartResult objects in the array. +int TestPartResultArray::size() const { + return list_->size(); +} + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + TestPartResultArray* result) + : old_reporter_(UnitTest::GetInstance()->impl()-> + test_part_result_reporter()), + result_(result) { + internal::UnitTestImpl* const impl = UnitTest::GetInstance()->impl(); + impl->set_test_part_result_reporter(this); +} + +// The d'tor restores the test part result reporter used by Google Test +// before. +ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() { + UnitTest::GetInstance()->impl()-> + set_test_part_result_reporter(old_reporter_); +} + +// Increments the test part result count and remembers the result. +// This method is from the TestPartResultReporterInterface interface. +void ScopedFakeTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + result_->Append(result); +} + +namespace internal { + +// This predicate-formatter checks that 'results' contains a test part +// failure of the given type and that the failure message contains the +// given substring. +AssertionResult HasOneFailure(const char* /* results_expr */, + const char* /* type_expr */, + const char* /* substr_expr */, + const TestPartResultArray& results, + TestPartResultType type, + const char* substr) { + const String expected( + type == TPRT_FATAL_FAILURE ? "1 fatal failure" : + "1 non-fatal failure"); + Message msg; + if (results.size() != 1) { + msg << "Expected: " << expected << "\n" + << " Actual: " << results.size() << " failures"; + for (int i = 0; i < results.size(); i++) { + msg << "\n" << results.GetTestPartResult(i); + } + return AssertionFailure(msg); + } + + const TestPartResult& r = results.GetTestPartResult(0); + if (r.type() != type) { + msg << "Expected: " << expected << "\n" + << " Actual:\n" + << r; + return AssertionFailure(msg); + } + + if (strstr(r.message(), substr) == NULL) { + msg << "Expected: " << expected << " containing \"" + << substr << "\"\n" + << " Actual:\n" + << r; + return AssertionFailure(msg); + } + + return AssertionSuccess(); +} + +// The constructor of SingleFailureChecker remembers where to look up +// test part results, what type of failure we expect, and what +// substring the failure message should contain. +SingleFailureChecker:: SingleFailureChecker( + const TestPartResultArray* results, + TestPartResultType type, + const char* substr) + : results_(results), + type_(type), + substr_(substr) {} + +// The destructor of SingleFailureChecker verifies that the given +// TestPartResultArray contains exactly one failure that has the given +// type and contains the given substring. If that's not the case, a +// non-fatal failure will be generated. +SingleFailureChecker::~SingleFailureChecker() { + EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_.c_str()); +} + +// Reports a test part result. +void UnitTestImpl::ReportTestPartResult(const TestPartResult& result) { + current_test_result()->AddTestPartResult(result); + result_printer()->OnNewTestPartResult(&result); +} + +// Returns the current test part result reporter. +TestPartResultReporterInterface* UnitTestImpl::test_part_result_reporter() { + return test_part_result_reporter_; +} + +// Sets the current test part result reporter. +void UnitTestImpl::set_test_part_result_reporter( + TestPartResultReporterInterface* reporter) { + test_part_result_reporter_ = reporter; +} + +// Gets the number of successful test cases. +int UnitTestImpl::successful_test_case_count() const { + return test_cases_.CountIf(TestCasePassed); +} + +// Gets the number of failed test cases. +int UnitTestImpl::failed_test_case_count() const { + return test_cases_.CountIf(TestCaseFailed); +} + +// Gets the number of all test cases. +int UnitTestImpl::total_test_case_count() const { + return test_cases_.size(); +} + +// Gets the number of all test cases that contain at least one test +// that should run. +int UnitTestImpl::test_case_to_run_count() const { + return test_cases_.CountIf(ShouldRunTestCase); +} + +// Gets the number of successful tests. +int UnitTestImpl::successful_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count); +} + +// Gets the number of failed tests. +int UnitTestImpl::failed_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count); +} + +// Gets the number of disabled tests. +int UnitTestImpl::disabled_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count); +} + +// Gets the number of all tests. +int UnitTestImpl::total_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::total_test_count); +} + +// Gets the number of tests that should run. +int UnitTestImpl::test_to_run_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count); +} + +// Returns the current OS stack trace as a String. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// CurrentOsStackTraceExceptTop(1), Foo() will be included in the +// trace but Bar() and CurrentOsStackTraceExceptTop() won't. +String UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) { + (void)skip_count; + return String(""); +} + +static TimeInMillis GetTimeInMillis() { +#ifdef _WIN32_WCE // We are on Windows CE + // Difference between 1970-01-01 and 1601-01-01 in miliseconds. + // http://analogous.blogspot.com/2005/04/epoch.html + const TimeInMillis kJavaEpochToWinFileTimeDelta = 11644473600000UL; + const DWORD kTenthMicrosInMilliSecond = 10000; + + SYSTEMTIME now_systime; + FILETIME now_filetime; + ULARGE_INTEGER now_int64; + // TODO(kenton@google.com): Shouldn't this just use + // GetSystemTimeAsFileTime()? + GetSystemTime(&now_systime); + if (SystemTimeToFileTime(&now_systime, &now_filetime)) { + now_int64.LowPart = now_filetime.dwLowDateTime; + now_int64.HighPart = now_filetime.dwHighDateTime; + now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) - + kJavaEpochToWinFileTimeDelta; + return now_int64.QuadPart; + } + return 0; +#elif defined(_WIN32) && !defined(GTEST_HAS_GETTIMEOFDAY) + __timeb64 now; +#ifdef _MSC_VER + // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996 + // (deprecated function) there. + // TODO(kenton@google.com): Use GetTickCount()? Or use + // SystemTimeToFileTime() +#pragma warning(push) // Saves the current warning state. +#pragma warning(disable:4996) // Temporarily disables warning 4996. + _ftime64(&now); +#pragma warning(pop) // Restores the warning state. +#else + _ftime64(&now); +#endif // _MSC_VER + return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm; +#elif defined(GTEST_HAS_GETTIMEOFDAY) + struct timeval now; + gettimeofday(&now, NULL); + return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000; +#else +#error "Don't know how to get the current time on your system." +#endif +} + +// Utilities + +// class String + +// Returns the input enclosed in double quotes if it's not NULL; +// otherwise returns "(null)". For example, "\"Hello\"" is returned +// for input "Hello". +// +// This is useful for printing a C string in the syntax of a literal. +// +// Known issue: escape sequences are not handled yet. +String String::ShowCStringQuoted(const char* c_str) { + return c_str ? String::Format("\"%s\"", c_str) : String("(null)"); +} + +// Copies at most length characters from str into a newly-allocated +// piece of memory of size length+1. The memory is allocated with new[]. +// A terminating null byte is written to the memory, and a pointer to it +// is returned. If str is NULL, NULL is returned. +static char* CloneString(const char* str, size_t length) { + if (str == NULL) { + return NULL; + } else { + char* const clone = new char[length + 1]; + // MSVC 8 deprecates strncpy(), so we want to suppress warning + // 4996 (deprecated function) there. +#ifdef GTEST_OS_WINDOWS // We are on Windows. +#pragma warning(push) // Saves the current warning state. +#pragma warning(disable:4996) // Temporarily disables warning 4996. + strncpy(clone, str, length); +#pragma warning(pop) // Restores the warning state. +#else // We are on Linux or Mac OS. + strncpy(clone, str, length); +#endif // GTEST_OS_WINDOWS + clone[length] = '\0'; + return clone; + } +} + +// Clones a 0-terminated C string, allocating memory using new. The +// caller is responsible for deleting[] the return value. Returns the +// cloned string, or NULL if the input is NULL. +const char * String::CloneCString(const char* c_str) { + return (c_str == NULL) ? + NULL : CloneString(c_str, strlen(c_str)); +} + +// Compares two C strings. Returns true iff they have the same content. +// +// Unlike strcmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CStringEquals(const char * lhs, const char * rhs) { + if ( lhs == NULL ) return rhs == NULL; + + if ( rhs == NULL ) return false; + + return strcmp(lhs, rhs) == 0; +} + +#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +// Converts an array of wide chars to a narrow string using the UTF-8 +// encoding, and streams the result to the given Message object. +static void StreamWideCharsToMessage(const wchar_t* wstr, size_t len, + Message* msg) { + for (size_t i = 0; i != len; i++) { + // TODO(wan): consider allowing a testing::String object to + // contain '\0'. This will make it behave more like std::string, + // and will allow ToUtf8String() to return the correct encoding + // for '\0' s.t. we can get rid of the conditional here (and in + // several other places). + if (wstr[i]) { + *msg << internal::ToUtf8String(wstr[i]); + } else { + *msg << '\0'; + } + } +} + +#endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +} // namespace internal + +#if GTEST_HAS_STD_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::std::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +namespace internal { + +// Formats a value to be used in a failure message. + +// For a char value, we print it as a C++ char literal and as an +// unsigned integer (both in decimal and in hexadecimal). +String FormatForFailureMessage(char ch) { + const unsigned int ch_as_uint = ch; + // A String object cannot contain '\0', so we print "\\0" when ch is + // '\0'. + return String::Format("'%s' (%u, 0x%X)", + ch ? String::Format("%c", ch).c_str() : "\\0", + ch_as_uint, ch_as_uint); +} + +// For a wchar_t value, we print it as a C++ wchar_t literal and as an +// unsigned integer (both in decimal and in hexidecimal). +String FormatForFailureMessage(wchar_t wchar) { + // The C++ standard doesn't specify the exact size of the wchar_t + // type. It just says that it shall have the same size as another + // integral type, called its underlying type. + // + // Therefore, in order to print a wchar_t value in the numeric form, + // we first convert it to the largest integral type (UInt64) and + // then print the converted value. + // + // We use streaming to print the value as "%llu" doesn't work + // correctly with MSVC 7.1. + const UInt64 wchar_as_uint64 = wchar; + Message msg; + // A String object cannot contain '\0', so we print "\\0" when wchar is + // L'\0'. + msg << "L'" << (wchar ? ToUtf8String(wchar).c_str() : "\\0") << "' (" + << wchar_as_uint64 << ", 0x" << ::std::setbase(16) + << wchar_as_uint64 << ")"; + return msg.GetString(); +} + +} // namespace internal + +// AssertionResult constructor. +AssertionResult::AssertionResult(const internal::String& failure_message) + : failure_message_(failure_message) { +} + + +// Makes a successful assertion result. +AssertionResult AssertionSuccess() { + return AssertionResult(); +} + + +// Makes a failed assertion result with the given failure message. +AssertionResult AssertionFailure(const Message& message) { + return AssertionResult(message.GetString()); +} + +namespace internal { + +// Constructs and returns the message for an equality assertion +// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. +// +// The first four parameters are the expressions used in the assertion +// and their values, as strings. For example, for ASSERT_EQ(foo, bar) +// where foo is 5 and bar is 6, we have: +// +// expected_expression: "foo" +// actual_expression: "bar" +// expected_value: "5" +// actual_value: "6" +// +// The ignoring_case parameter is true iff the assertion is a +// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will +// be inserted into the message. +AssertionResult EqFailure(const char* expected_expression, + const char* actual_expression, + const String& expected_value, + const String& actual_value, + bool ignoring_case) { + Message msg; + msg << "Value of: " << actual_expression; + if (actual_value != actual_expression) { + msg << "\n Actual: " << actual_value; + } + + msg << "\nExpected: " << expected_expression; + if (ignoring_case) { + msg << " (ignoring case)"; + } + if (expected_value != expected_expression) { + msg << "\nWhich is: " << expected_value; + } + + return AssertionFailure(msg); +} + + +// Helper function for implementing ASSERT_NEAR. +AssertionResult DoubleNearPredFormat(const char* expr1, + const char* expr2, + const char* abs_error_expr, + double val1, + double val2, + double abs_error) { + const double diff = fabs(val1 - val2); + if (diff <= abs_error) return AssertionSuccess(); + + // TODO(wan): do not print the value of an expression if it's + // already a literal. + Message msg; + msg << "The difference between " << expr1 << " and " << expr2 + << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n" + << expr1 << " evaluates to " << val1 << ",\n" + << expr2 << " evaluates to " << val2 << ", and\n" + << abs_error_expr << " evaluates to " << abs_error << "."; + return AssertionFailure(msg); +} + + +// Helper template for implementing FloatLE() and DoubleLE(). +template <typename RawType> +AssertionResult FloatingPointLE(const char* expr1, + const char* expr2, + RawType val1, + RawType val2) { + // Returns success if val1 is less than val2, + if (val1 < val2) { + return AssertionSuccess(); + } + + // or if val1 is almost equal to val2. + const FloatingPoint<RawType> lhs(val1), rhs(val2); + if (lhs.AlmostEquals(rhs)) { + return AssertionSuccess(); + } + + // Note that the above two checks will both fail if either val1 or + // val2 is NaN, as the IEEE floating-point standard requires that + // any predicate involving a NaN must return false. + + StrStream val1_ss; + val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2) + << val1; + + StrStream val2_ss; + val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2) + << val2; + + Message msg; + msg << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n" + << " Actual: " << StrStreamToString(&val1_ss) << " vs " + << StrStreamToString(&val2_ss); + + return AssertionFailure(msg); +} + +} // namespace internal + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult FloatLE(const char* expr1, const char* expr2, + float val1, float val2) { + return internal::FloatingPointLE<float>(expr1, expr2, val1, val2); +} + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult DoubleLE(const char* expr1, const char* expr2, + double val1, double val2) { + return internal::FloatingPointLE<double>(expr1, expr2, val1, val2); +} + +namespace internal { + +// The helper function for {ASSERT|EXPECT}_EQ with int or enum +// arguments. +AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual) { + if (expected == actual) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + FormatForComparisonFailureMessage(expected, actual), + FormatForComparisonFailureMessage(actual, expected), + false); +} + +// A macro for implementing the helper functions needed to implement +// ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here +// just to avoid copy-and-paste of similar code. +#define GTEST_IMPL_CMP_HELPER(op_name, op)\ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + BiggestInt val1, BiggestInt val2) {\ + if (val1 op val2) {\ + return AssertionSuccess();\ + } else {\ + Message msg;\ + msg << "Expected: (" << expr1 << ") " #op " (" << expr2\ + << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ + << " vs " << FormatForComparisonFailureMessage(val2, val1);\ + return AssertionFailure(msg);\ + }\ +} + +// Implements the helper function for {ASSERT|EXPECT}_NE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER(NE, !=) +// Implements the helper function for {ASSERT|EXPECT}_LE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER(LE, <=) +// Implements the helper function for {ASSERT|EXPECT}_LT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER(LT, < ) +// Implements the helper function for {ASSERT|EXPECT}_GE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER(GE, >=) +// Implements the helper function for {ASSERT|EXPECT}_GT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER(GT, > ) + +#undef GTEST_IMPL_CMP_HELPER + +// The helper function for {ASSERT|EXPECT}_STREQ. +AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual) { + if (String::CStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + String::ShowCStringQuoted(expected), + String::ShowCStringQuoted(actual), + false); +} + +// The helper function for {ASSERT|EXPECT}_STRCASEEQ. +AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual) { + if (String::CaseInsensitiveCStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + String::ShowCStringQuoted(expected), + String::ShowCStringQuoted(actual), + true); +} + +// The helper function for {ASSERT|EXPECT}_STRNE. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + Message msg; + msg << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + return AssertionFailure(msg); + } +} + +// The helper function for {ASSERT|EXPECT}_STRCASENE. +AssertionResult CmpHelperSTRCASENE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CaseInsensitiveCStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + Message msg; + msg << "Expected: (" << s1_expression << ") != (" + << s2_expression << ") (ignoring case), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + return AssertionFailure(msg); + } +} + +} // namespace internal + +namespace { + +// Helper functions for implementing IsSubString() and IsNotSubstring(). + +// This group of overloaded functions return true iff needle is a +// substring of haystack. NULL is considered a substring of itself +// only. + +bool IsSubstringPred(const char* needle, const char* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return strstr(haystack, needle) != NULL; +} + +bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return wcsstr(haystack, needle) != NULL; +} + +// StringType here can be either ::std::string or ::std::wstring. +template <typename StringType> +bool IsSubstringPred(const StringType& needle, + const StringType& haystack) { + return haystack.find(needle) != StringType::npos; +} + +// This function implements either IsSubstring() or IsNotSubstring(), +// depending on the value of the expected_to_be_substring parameter. +// StringType here can be const char*, const wchar_t*, ::std::string, +// or ::std::wstring. +template <typename StringType> +AssertionResult IsSubstringImpl( + bool expected_to_be_substring, + const char* needle_expr, const char* haystack_expr, + const StringType& needle, const StringType& haystack) { + if (IsSubstringPred(needle, haystack) == expected_to_be_substring) + return AssertionSuccess(); + + const bool is_wide_string = sizeof(needle[0]) > 1; + const char* const begin_string_quote = is_wide_string ? "L\"" : "\""; + return AssertionFailure( + Message() + << "Value of: " << needle_expr << "\n" + << " Actual: " << begin_string_quote << needle << "\"\n" + << "Expected: " << (expected_to_be_substring ? "" : "not ") + << "a substring of " << haystack_expr << "\n" + << "Which is: " << begin_string_quote << haystack << "\""); +} + +} // namespace + +// IsSubstring() and IsNotSubstring() check whether needle is a +// substring of haystack (NULL is considered a substring of itself +// only), and return an appropriate error message when they fail. + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +#if GTEST_HAS_STD_STRING +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} +#endif // GTEST_HAS_STD_STRING + +#if GTEST_HAS_STD_WSTRING +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} +#endif // GTEST_HAS_STD_WSTRING + +namespace internal { + +#ifdef GTEST_OS_WINDOWS + +namespace { + +// Helper function for IsHRESULT{SuccessFailure} predicates +AssertionResult HRESULTFailureHelper(const char* expr, + const char* expected, + long hr) { // NOLINT +#ifdef _WIN32_WCE + // Windows CE doesn't support FormatMessage. + const char error_text[] = ""; +#else + // Looks up the human-readable system message for the HRESULT code + // and since we're not passing any params to FormatMessage, we don't + // want inserts expanded. + const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS; + const DWORD kBufSize = 4096; // String::Format can't exceed this length. + // Gets the system's human readable message string for this HRESULT. + char error_text[kBufSize] = { '\0' }; + DWORD message_length = ::FormatMessageA(kFlags, + 0, // no source, we're asking system + hr, // the error + 0, // no line width restrictions + error_text, // output buffer + kBufSize, // buf size + NULL); // no arguments for inserts + // Trims tailing white space (FormatMessage leaves a trailing cr-lf) + for (; message_length && isspace(error_text[message_length - 1]); + --message_length) { + error_text[message_length - 1] = '\0'; + } +#endif // _WIN32_WCE + + const String error_hex(String::Format("0x%08X ", hr)); + Message msg; + msg << "Expected: " << expr << " " << expected << ".\n" + << " Actual: " << error_hex << error_text << "\n"; + + return ::testing::AssertionFailure(msg); +} + +} // namespace + +AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT + if (SUCCEEDED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "succeeds", hr); +} + +AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT + if (FAILED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "fails", hr); +} + +#endif // GTEST_OS_WINDOWS + +// Utility functions for encoding Unicode text (wide strings) in +// UTF-8. + +// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8 +// like this: +// +// Code-point length Encoding +// 0 - 7 bits 0xxxxxxx +// 8 - 11 bits 110xxxxx 10xxxxxx +// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx +// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + +// The maximum code-point a one-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) << 7) - 1; + +// The maximum code-point a two-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1; + +// The maximum code-point a three-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1; + +// The maximum code-point a four-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1; + +// Chops off the n lowest bits from a bit pattern. Returns the n +// lowest bits. As a side effect, the original bit pattern will be +// shifted to the right by n bits. +inline UInt32 ChopLowBits(UInt32* bits, int n) { + const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1); + *bits >>= n; + return low_bits; +} + +// Converts a Unicode code-point to its UTF-8 encoding. +String ToUtf8String(wchar_t wchar) { + char str[5] = {}; // Initializes str to all '\0' characters. + + UInt32 code = static_cast<UInt32>(wchar); + if (code <= kMaxCodePoint1) { + str[0] = static_cast<char>(code); // 0xxxxxxx + } else if (code <= kMaxCodePoint2) { + str[1] = static_cast<char>(0x80 | ChopLowBits(&code, 6)); // 10xxxxxx + str[0] = static_cast<char>(0xC0 | code); // 110xxxxx + } else if (code <= kMaxCodePoint3) { + str[2] = static_cast<char>(0x80 | ChopLowBits(&code, 6)); // 10xxxxxx + str[1] = static_cast<char>(0x80 | ChopLowBits(&code, 6)); // 10xxxxxx + str[0] = static_cast<char>(0xE0 | code); // 1110xxxx + } else if (code <= kMaxCodePoint4) { + str[3] = static_cast<char>(0x80 | ChopLowBits(&code, 6)); // 10xxxxxx + str[2] = static_cast<char>(0x80 | ChopLowBits(&code, 6)); // 10xxxxxx + str[1] = static_cast<char>(0x80 | ChopLowBits(&code, 6)); // 10xxxxxx + str[0] = static_cast<char>(0xF0 | code); // 11110xxx + } else { + return String::Format("(Invalid Unicode 0x%llX)", + static_cast<UInt64>(wchar)); + } + + return String(str); +} + +// Converts a wide C string to a String using the UTF-8 encoding. +// NULL will be converted to "(null)". +String String::ShowWideCString(const wchar_t * wide_c_str) { + if (wide_c_str == NULL) return String("(null)"); + + StrStream ss; + while (*wide_c_str) { + ss << internal::ToUtf8String(*wide_c_str++); + } + + return internal::StrStreamToString(&ss); +} + +// Similar to ShowWideCString(), except that this function encloses +// the converted string in double quotes. +String String::ShowWideCStringQuoted(const wchar_t* wide_c_str) { + if (wide_c_str == NULL) return String("(null)"); + + return String::Format("L\"%s\"", + String::ShowWideCString(wide_c_str).c_str()); +} + +// Compares two wide C strings. Returns true iff they have the same +// content. +// +// Unlike wcscmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) { + if (lhs == NULL) return rhs == NULL; + + if (rhs == NULL) return false; + + return wcscmp(lhs, rhs) == 0; +} + +// Helper function for *_STREQ on wide strings. +AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const wchar_t* expected, + const wchar_t* actual) { + if (String::WideCStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + String::ShowWideCStringQuoted(expected), + String::ShowWideCStringQuoted(actual), + false); +} + +// Helper function for *_STRNE on wide strings. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const wchar_t* s1, + const wchar_t* s2) { + if (!String::WideCStringEquals(s1, s2)) { + return AssertionSuccess(); + } + + Message msg; + msg << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: " + << String::ShowWideCStringQuoted(s1) + << " vs " << String::ShowWideCStringQuoted(s2); + return AssertionFailure(msg); +} + +// Compares two C strings, ignoring case. Returns true iff they have +// the same content. +// +// Unlike strcasecmp(), this function can handle NULL argument(s). A +// NULL C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) { + if ( lhs == NULL ) return rhs == NULL; + + if ( rhs == NULL ) return false; + +#ifdef GTEST_OS_WINDOWS + return _stricmp(lhs, rhs) == 0; +#else // GTEST_OS_WINDOWS + return strcasecmp(lhs, rhs) == 0; +#endif // GTEST_OS_WINDOWS +} + +// Constructs a String by copying a given number of chars from a +// buffer. E.g. String("hello", 3) will create the string "hel". +String::String(const char * buffer, size_t len) { + char * const temp = new char[ len + 1 ]; + memcpy(temp, buffer, len); + temp[ len ] = '\0'; + c_str_ = temp; +} + +// Compares this with another String. +// Returns < 0 if this is less than rhs, 0 if this is equal to rhs, or > 0 +// if this is greater than rhs. +int String::Compare(const String & rhs) const { + if ( c_str_ == NULL ) { + return rhs.c_str_ == NULL ? 0 : -1; // NULL < anything except NULL + } + + return rhs.c_str_ == NULL ? 1 : strcmp(c_str_, rhs.c_str_); +} + +// Returns true iff this String ends with the given suffix. *Any* +// String is considered to end with a NULL or empty suffix. +bool String::EndsWith(const char* suffix) const { + if (suffix == NULL || CStringEquals(suffix, "")) return true; + + if (c_str_ == NULL) return false; + + const size_t this_len = strlen(c_str_); + const size_t suffix_len = strlen(suffix); + return (this_len >= suffix_len) && + CStringEquals(c_str_ + this_len - suffix_len, suffix); +} + +// Returns true iff this String ends with the given suffix, ignoring case. +// Any String is considered to end with a NULL or empty suffix. +bool String::EndsWithCaseInsensitive(const char* suffix) const { + if (suffix == NULL || CStringEquals(suffix, "")) return true; + + if (c_str_ == NULL) return false; + + const size_t this_len = strlen(c_str_); + const size_t suffix_len = strlen(suffix); + return (this_len >= suffix_len) && + CaseInsensitiveCStringEquals(c_str_ + this_len - suffix_len, suffix); +} + +// Sets the 0-terminated C string this String object represents. The +// old string in this object is deleted, and this object will own a +// clone of the input string. This function copies only up to length +// bytes (plus a terminating null byte), or until the first null byte, +// whichever comes first. +// +// This function works even when the c_str parameter has the same +// value as that of the c_str_ field. +void String::Set(const char * c_str, size_t length) { + // Makes sure this works when c_str == c_str_ + const char* const temp = CloneString(c_str, length); + delete[] c_str_; + c_str_ = temp; +} + +// Assigns a C string to this object. Self-assignment works. +const String& String::operator=(const char* c_str) { + // Makes sure this works when c_str == c_str_ + if (c_str != c_str_) { + delete[] c_str_; + c_str_ = CloneCString(c_str); + } + return *this; +} + +// Formats a list of arguments to a String, using the same format +// spec string as for printf. +// +// We do not use the StringPrintf class as it is not universally +// available. +// +// The result is limited to 4096 characters (including the tailing 0). +// If 4096 characters are not enough to format the input, +// "<buffer exceeded>" is returned. +String String::Format(const char * format, ...) { + va_list args; + va_start(args, format); + + char buffer[4096]; + // MSVC 8 deprecates vsnprintf(), so we want to suppress warning + // 4996 (deprecated function) there. +#ifdef GTEST_OS_WINDOWS // We are on Windows. +#pragma warning(push) // Saves the current warning state. +#pragma warning(disable:4996) // Temporarily disables warning 4996. + const int size = + vsnprintf(buffer, sizeof(buffer)/sizeof(buffer[0]) - 1, format, args); +#pragma warning(pop) // Restores the warning state. +#else // We are on Linux or Mac OS. + const int size = + vsnprintf(buffer, sizeof(buffer)/sizeof(buffer[0]) - 1, format, args); +#endif // GTEST_OS_WINDOWS + va_end(args); + + return String(size >= 0 ? buffer : "<buffer exceeded>"); +} + +// Converts the buffer in a StrStream to a String, converting NUL +// bytes to "\\0" along the way. +String StrStreamToString(StrStream* ss) { +#if GTEST_HAS_STD_STRING + const ::std::string& str = ss->str(); + const char* const start = str.c_str(); + const char* const end = start + str.length(); +#else + const char* const start = ss->str(); + const char* const end = start + ss->pcount(); +#endif // GTEST_HAS_STD_STRING + + // We need to use a helper StrStream to do this transformation + // because String doesn't support push_back(). + StrStream helper; + for (const char* ch = start; ch != end; ++ch) { + if (*ch == '\0') { + helper << "\\0"; // Replaces NUL with "\\0"; + } else { + helper.put(*ch); + } + } + +#if GTEST_HAS_STD_STRING + return String(helper.str().c_str()); +#else + const String str(helper.str(), helper.pcount()); + helper.freeze(false); + ss->freeze(false); + return str; +#endif // GTEST_HAS_STD_STRING +} + +// Appends the user-supplied message to the Google-Test-generated message. +String AppendUserMessage(const String& gtest_msg, + const Message& user_msg) { + // Appends the user message if it's non-empty. + const String user_msg_string = user_msg.GetString(); + if (user_msg_string.empty()) { + return gtest_msg; + } + + Message msg; + msg << gtest_msg << "\n" << user_msg_string; + + return msg.GetString(); +} + +} // namespace internal + +// Prints a TestPartResult object. +std::ostream& operator<<(std::ostream& os, const TestPartResult& result) { + return os << result.file_name() << ":" + << result.line_number() << ": " + << (result.type() == TPRT_SUCCESS ? "Success" : + result.type() == TPRT_FATAL_FAILURE ? "Fatal failure" : + "Non-fatal failure") << ":\n" + << result.message() << std::endl; +} + +namespace internal { +// class TestResult + +// Creates an empty TestResult. +TestResult::TestResult() + : death_test_count_(0), + elapsed_time_(0) { +} + +// D'tor. +TestResult::~TestResult() { +} + +// Adds a test part result to the list. +void TestResult::AddTestPartResult(const TestPartResult& test_part_result) { + test_part_results_.PushBack(test_part_result); +} + +// Adds a test property to the list. If a property with the same key as the +// supplied property is already represented, the value of this test_property +// replaces the old value for that key. +void TestResult::RecordProperty(const TestProperty& test_property) { + if (!ValidateTestProperty(test_property)) { + return; + } + MutexLock lock(&test_properites_mutex_); + ListNode<TestProperty>* const node_with_matching_key = + test_properties_.FindIf(TestPropertyKeyIs(test_property.key())); + if (node_with_matching_key == NULL) { + test_properties_.PushBack(test_property); + return; + } + TestProperty& property_with_matching_key = node_with_matching_key->element(); + property_with_matching_key.SetValue(test_property.value()); +} + +// Adds a failure if the key is a reserved attribute of Google Test testcase tags. +// Returns true if the property is valid. +bool TestResult::ValidateTestProperty(const TestProperty& test_property) { + String key(test_property.key()); + if (key == "name" || key == "status" || key == "time" || key == "classname") { + ADD_FAILURE() + << "Reserved key used in RecordProperty(): " + << key + << " ('name', 'status', 'time', and 'classname' are reserved by " + << GTEST_NAME << ")"; + return false; + } + return true; +} + +// Clears the object. +void TestResult::Clear() { + test_part_results_.Clear(); + test_properties_.Clear(); + death_test_count_ = 0; + elapsed_time_ = 0; +} + +// Returns true iff the test part passed. +static bool TestPartPassed(const TestPartResult & result) { + return result.passed(); +} + +// Gets the number of successful test parts. +int TestResult::successful_part_count() const { + return test_part_results_.CountIf(TestPartPassed); +} + +// Returns true iff the test part failed. +static bool TestPartFailed(const TestPartResult & result) { + return result.failed(); +} + +// Gets the number of failed test parts. +int TestResult::failed_part_count() const { + return test_part_results_.CountIf(TestPartFailed); +} + +// Returns true iff the test part fatally failed. +static bool TestPartFatallyFailed(const TestPartResult & result) { + return result.fatally_failed(); +} + +// Returns true iff the test fatally failed. +bool TestResult::HasFatalFailure() const { + return test_part_results_.CountIf(TestPartFatallyFailed) > 0; +} + +// Gets the number of all test parts. This is the sum of the number +// of successful test parts and the number of failed test parts. +int TestResult::total_part_count() const { + return test_part_results_.size(); +} + +} // namespace internal + +// class Test + +// Creates a Test object. + +// The c'tor saves the values of all Google Test flags. +Test::Test() + : gtest_flag_saver_(new internal::GTestFlagSaver) { +} + +// The d'tor restores the values of all Google Test flags. +Test::~Test() { + delete gtest_flag_saver_; +} + +// Sets up the test fixture. +// +// A sub-class may override this. +void Test::SetUp() { +} + +// Tears down the test fixture. +// +// A sub-class may override this. +void Test::TearDown() { +} + +// Allows user supplied key value pairs to be recorded for later output. +void Test::RecordProperty(const char* key, const char* value) { + UnitTest::GetInstance()->RecordPropertyForCurrentTest(key, value); +} + +// Allows user supplied key value pairs to be recorded for later output. +void Test::RecordProperty(const char* key, int value) { + Message value_message; + value_message << value; + RecordProperty(key, value_message.GetString().c_str()); +} + +#ifdef GTEST_OS_WINDOWS +// We are on Windows. + +// Adds an "exception thrown" fatal failure to the current test. +static void AddExceptionThrownFailure(DWORD exception_code, + const char* location) { + Message message; + message << "Exception thrown with code 0x" << std::setbase(16) << + exception_code << std::setbase(10) << " in " << location << "."; + + UnitTest* const unit_test = UnitTest::GetInstance(); + unit_test->AddTestPartResult( + TPRT_FATAL_FAILURE, + static_cast<const char *>(NULL), + // We have no info about the source file where the exception + // occurred. + -1, // We have no info on which line caused the exception. + message.GetString(), + internal::String("")); +} + +#endif // GTEST_OS_WINDOWS + +// Google Test requires all tests in the same test case to use the same test +// fixture class. This function checks if the current test has the +// same fixture class as the first test in the current test case. If +// yes, it returns true; otherwise it generates a Google Test failure and +// returns false. +bool Test::HasSameFixtureClass() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + const TestCase* const test_case = impl->current_test_case(); + + // Info about the first test in the current test case. + const internal::TestInfoImpl* const first_test_info = + test_case->test_info_list().Head()->element()->impl(); + const internal::TypeId first_fixture_id = first_test_info->fixture_class_id(); + const char* const first_test_name = first_test_info->name(); + + // Info about the current test. + const internal::TestInfoImpl* const this_test_info = + impl->current_test_info()->impl(); + const internal::TypeId this_fixture_id = this_test_info->fixture_class_id(); + const char* const this_test_name = this_test_info->name(); + + if (this_fixture_id != first_fixture_id) { + // Is the first test defined using TEST? + const bool first_is_TEST = first_fixture_id == internal::GetTypeId<Test>(); + // Is this test defined using TEST? + const bool this_is_TEST = this_fixture_id == internal::GetTypeId<Test>(); + + if (first_is_TEST || this_is_TEST) { + // The user mixed TEST and TEST_F in this test case - we'll tell + // him/her how to fix it. + + // Gets the name of the TEST and the name of the TEST_F. Note + // that first_is_TEST and this_is_TEST cannot both be true, as + // the fixture IDs are different for the two tests. + const char* const TEST_name = + first_is_TEST ? first_test_name : this_test_name; + const char* const TEST_F_name = + first_is_TEST ? this_test_name : first_test_name; + + ADD_FAILURE() + << "All tests in the same test case must use the same test fixture\n" + << "class, so mixing TEST_F and TEST in the same test case is\n" + << "illegal. In test case " << this_test_info->test_case_name() + << ",\n" + << "test " << TEST_F_name << " is defined using TEST_F but\n" + << "test " << TEST_name << " is defined using TEST. You probably\n" + << "want to change the TEST to TEST_F or move it to another test\n" + << "case."; + } else { + // The user defined two fixture classes with the same name in + // two namespaces - we'll tell him/her how to fix it. + ADD_FAILURE() + << "All tests in the same test case must use the same test fixture\n" + << "class. However, in test case " + << this_test_info->test_case_name() << ",\n" + << "you defined test " << first_test_name + << " and test " << this_test_name << "\n" + << "using two different test fixture classes. This can happen if\n" + << "the two classes are from different namespaces or translation\n" + << "units and have the same name. You should probably rename one\n" + << "of the classes to put the tests into different test cases."; + } + return false; + } + + return true; +} + +// Runs the test and updates the test result. +void Test::Run() { + if (!HasSameFixtureClass()) return; + + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); +#ifdef GTEST_OS_WINDOWS + // We are on Windows. + impl->os_stack_trace_getter()->UponLeavingGTest(); + __try { + SetUp(); + } __except(internal::UnitTestOptions::GTestShouldProcessSEH( + GetExceptionCode())) { + AddExceptionThrownFailure(GetExceptionCode(), "SetUp()"); + } + + // We will run the test only if SetUp() had no fatal failure. + if (!HasFatalFailure()) { + impl->os_stack_trace_getter()->UponLeavingGTest(); + __try { + TestBody(); + } __except(internal::UnitTestOptions::GTestShouldProcessSEH( + GetExceptionCode())) { + AddExceptionThrownFailure(GetExceptionCode(), "the test body"); + } + } + + // However, we want to clean up as much as possible. Hence we will + // always call TearDown(), even if SetUp() or the test body has + // failed. + impl->os_stack_trace_getter()->UponLeavingGTest(); + __try { + TearDown(); + } __except(internal::UnitTestOptions::GTestShouldProcessSEH( + GetExceptionCode())) { + AddExceptionThrownFailure(GetExceptionCode(), "TearDown()"); + } + +#else // We are on Linux or Mac - exceptions are disabled. + impl->os_stack_trace_getter()->UponLeavingGTest(); + SetUp(); + + // We will run the test only if SetUp() was successful. + if (!HasFatalFailure()) { + impl->os_stack_trace_getter()->UponLeavingGTest(); + TestBody(); + } + + // However, we want to clean up as much as possible. Hence we will + // always call TearDown(), even if SetUp() or the test body has + // failed. + impl->os_stack_trace_getter()->UponLeavingGTest(); + TearDown(); +#endif // GTEST_OS_WINDOWS +} + + +// Returns true iff the current test has a fatal failure. +bool Test::HasFatalFailure() { + return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure(); +} + +// class TestInfo + +// Constructs a TestInfo object. +TestInfo::TestInfo(const char* test_case_name, + const char* name, + internal::TypeId fixture_class_id, + TestMaker maker) { + impl_ = new internal::TestInfoImpl(this, test_case_name, name, + fixture_class_id, maker); +} + +// Destructs a TestInfo object. +TestInfo::~TestInfo() { + delete impl_; +} + +// Creates a TestInfo object and registers it with the UnitTest +// singleton; returns the created object. +// +// Arguments: +// +// test_case_name: name of the test case +// name: name of the test +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +// maker: pointer to the function that creates a test object +TestInfo* TestInfo::MakeAndRegisterInstance( + const char* test_case_name, + const char* name, + internal::TypeId fixture_class_id, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc, + TestMaker maker) { + TestInfo* const test_info = + new TestInfo(test_case_name, name, fixture_class_id, maker); + internal::GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info); + return test_info; +} + +// Returns the test case name. +const char* TestInfo::test_case_name() const { + return impl_->test_case_name(); +} + +// Returns the test name. +const char* TestInfo::name() const { + return impl_->name(); +} + +// Returns true if this test should run. +bool TestInfo::should_run() const { return impl_->should_run(); } + +// Returns the result of the test. +const internal::TestResult* TestInfo::result() const { return impl_->result(); } + +// Increments the number of death tests encountered in this test so +// far. +int TestInfo::increment_death_test_count() { + return impl_->result()->increment_death_test_count(); +} + +namespace { + +// A predicate that checks the test name of a TestInfo against a known +// value. +// +// This is used for implementation of the TestCase class only. We put +// it in the anonymous namespace to prevent polluting the outer +// namespace. +// +// TestNameIs is copyable. +class TestNameIs { + public: + // Constructor. + // + // TestNameIs has NO default constructor. + explicit TestNameIs(const char* name) + : name_(name) {} + + // Returns true iff the test name of test_info matches name_. + bool operator()(const TestInfo * test_info) const { + return test_info && internal::String(test_info->name()).Compare(name_) == 0; + } + + private: + internal::String name_; +}; + +} // namespace + +// Finds and returns a TestInfo with the given name. If one doesn't +// exist, returns NULL. +TestInfo * TestCase::GetTestInfo(const char* test_name) { + // Can we find a TestInfo with the given name? + internal::ListNode<TestInfo *> * const node = test_info_list_->FindIf( + TestNameIs(test_name)); + + // Returns the TestInfo found. + return node ? node->element() : NULL; +} + +namespace internal { + +// Creates the test object, runs it, records its result, and then +// deletes it. +void TestInfoImpl::Run() { + if (!should_run_) return; + + // Tells UnitTest where to store test result. + UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->set_current_test_info(parent_); + + // Notifies the unit test event listener that a test is about to + // start. + UnitTestEventListenerInterface* const result_printer = + impl->result_printer(); + result_printer->OnTestStart(parent_); + + const TimeInMillis start = GetTimeInMillis(); + + impl->os_stack_trace_getter()->UponLeavingGTest(); +#ifdef GTEST_OS_WINDOWS + // We are on Windows. + Test* test = NULL; + + __try { + // Creates the test object. + test = (*maker_)(); + } __except(internal::UnitTestOptions::GTestShouldProcessSEH( + GetExceptionCode())) { + AddExceptionThrownFailure(GetExceptionCode(), + "the test fixture's constructor"); + return; + } +#else // We are on Linux or Mac OS - exceptions are disabled. + + // TODO(wan): If test->Run() throws, test won't be deleted. This is + // not a problem now as we don't use exceptions. If we were to + // enable exceptions, we should revise the following to be + // exception-safe. + + // Creates the test object. + Test* test = (*maker_)(); +#endif // GTEST_OS_WINDOWS + + // Runs the test only if the constructor of the test fixture didn't + // generate a fatal failure. + if (!Test::HasFatalFailure()) { + test->Run(); + } + + // Deletes the test object. + impl->os_stack_trace_getter()->UponLeavingGTest(); + delete test; + test = NULL; + + result_.set_elapsed_time(GetTimeInMillis() - start); + + // Notifies the unit test event listener that a test has just finished. + result_printer->OnTestEnd(parent_); + + // Tells UnitTest to stop associating assertion results to this + // test. + impl->set_current_test_info(NULL); +} + +} // namespace internal + +// class TestCase + +// Gets the number of successful tests in this test case. +int TestCase::successful_test_count() const { + return test_info_list_->CountIf(TestPassed); +} + +// Gets the number of failed tests in this test case. +int TestCase::failed_test_count() const { + return test_info_list_->CountIf(TestFailed); +} + +int TestCase::disabled_test_count() const { + return test_info_list_->CountIf(TestDisabled); +} + +// Get the number of tests in this test case that should run. +int TestCase::test_to_run_count() const { + return test_info_list_->CountIf(ShouldRunTest); +} + +// Gets the number of all tests. +int TestCase::total_test_count() const { + return test_info_list_->size(); +} + +// Creates a TestCase with the given name. +// +// Arguments: +// +// name: name of the test case +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +TestCase::TestCase(const char* name, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc) + : name_(name), + set_up_tc_(set_up_tc), + tear_down_tc_(tear_down_tc), + should_run_(false), + elapsed_time_(0) { + test_info_list_ = new internal::List<TestInfo *>; +} + +// Destructor of TestCase. +TestCase::~TestCase() { + // Deletes every Test in the collection. + test_info_list_->ForEach(internal::Delete<TestInfo>); + + // Then deletes the Test collection. + delete test_info_list_; + test_info_list_ = NULL; +} + +// Adds a test to this test case. Will delete the test upon +// destruction of the TestCase object. +void TestCase::AddTestInfo(TestInfo * test_info) { + test_info_list_->PushBack(test_info); +} + +// Runs every test in this TestCase. +void TestCase::Run() { + if (!should_run_) return; + + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->set_current_test_case(this); + + UnitTestEventListenerInterface * const result_printer = + impl->result_printer(); + + result_printer->OnTestCaseStart(this); + impl->os_stack_trace_getter()->UponLeavingGTest(); + set_up_tc_(); + + const internal::TimeInMillis start = internal::GetTimeInMillis(); + test_info_list_->ForEach(internal::TestInfoImpl::RunTest); + elapsed_time_ = internal::GetTimeInMillis() - start; + + impl->os_stack_trace_getter()->UponLeavingGTest(); + tear_down_tc_(); + result_printer->OnTestCaseEnd(this); + impl->set_current_test_case(NULL); +} + +// Clears the results of all tests in this test case. +void TestCase::ClearResult() { + test_info_list_->ForEach(internal::TestInfoImpl::ClearTestResult); +} + + +// class UnitTestEventListenerInterface + +// The virtual d'tor. +UnitTestEventListenerInterface::~UnitTestEventListenerInterface() { +} + +// A result printer that never prints anything. Used in the child process +// of an exec-style death test to avoid needless output clutter. +class NullUnitTestResultPrinter : public UnitTestEventListenerInterface {}; + +// Formats a countable noun. Depending on its quantity, either the +// singular form or the plural form is used. e.g. +// +// FormatCountableNoun(1, "formula", "formuli") returns "1 formula". +// FormatCountableNoun(5, "book", "books") returns "5 books". +static internal::String FormatCountableNoun(int count, + const char * singular_form, + const char * plural_form) { + return internal::String::Format("%d %s", count, + count == 1 ? singular_form : plural_form); +} + +// Formats the count of tests. +static internal::String FormatTestCount(int test_count) { + return FormatCountableNoun(test_count, "test", "tests"); +} + +// Formats the count of test cases. +static internal::String FormatTestCaseCount(int test_case_count) { + return FormatCountableNoun(test_case_count, "test case", "test cases"); +} + +// Converts a TestPartResultType enum to human-friendly string +// representation. Both TPRT_NONFATAL_FAILURE and TPRT_FATAL_FAILURE +// are translated to "Failure", as the user usually doesn't care about +// the difference between the two when viewing the test result. +static const char * TestPartResultTypeToString(TestPartResultType type) { + switch (type) { + case TPRT_SUCCESS: + return "Success"; + + case TPRT_NONFATAL_FAILURE: + case TPRT_FATAL_FAILURE: + return "Failure"; + } + + return "Unknown result type"; +} + +// Prints a TestPartResult. +static void PrintTestPartResult( + const TestPartResult & test_part_result) { + const char * const file_name = test_part_result.file_name(); + + printf("%s", file_name == NULL ? "unknown file" : file_name); + if (test_part_result.line_number() >= 0) { + printf(":%d", test_part_result.line_number()); + } + printf(": %s\n", TestPartResultTypeToString(test_part_result.type())); + printf("%s\n", test_part_result.message()); + fflush(stdout); +} + +// class PrettyUnitTestResultPrinter + +namespace internal { + +enum GTestColor { + COLOR_RED, + COLOR_GREEN, + COLOR_YELLOW +}; + +#ifdef _WIN32 + +// Returns the character attribute for the given color. +WORD GetColorAttribute(GTestColor color) { + switch (color) { + case COLOR_RED: return FOREGROUND_RED; + case COLOR_GREEN: return FOREGROUND_GREEN; + case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN; + } + return 0; +} + +#else + +// Returns the ANSI color code for the given color. +const char* GetAnsiColorCode(GTestColor color) { + switch (color) { + case COLOR_RED: return "1"; + case COLOR_GREEN: return "2"; + case COLOR_YELLOW: return "3"; + }; + return NULL; +} + +#endif // _WIN32 + +// Returns true iff Google Test should use colors in the output. +bool ShouldUseColor(bool stdout_is_tty) { + const char* const gtest_color = GTEST_FLAG(color).c_str(); + + if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) { +#ifdef _WIN32 + // On Windows the TERM variable is usually not set, but the + // console there does support colors. + return stdout_is_tty; +#else + // On non-Windows platforms, we rely on the TERM variable. + const char* const term = GetEnv("TERM"); + const bool term_supports_color = + String::CStringEquals(term, "xterm") || + String::CStringEquals(term, "xterm-color") || + String::CStringEquals(term, "cygwin"); + return stdout_is_tty && term_supports_color; +#endif // _WIN32 + } + + return String::CaseInsensitiveCStringEquals(gtest_color, "yes") || + String::CaseInsensitiveCStringEquals(gtest_color, "true") || + String::CaseInsensitiveCStringEquals(gtest_color, "t") || + String::CStringEquals(gtest_color, "1"); + // We take "yes", "true", "t", and "1" as meaning "yes". If the + // value is neither one of these nor "auto", we treat it as "no" to + // be conservative. +} + +// Helpers for printing colored strings to stdout. Note that on Windows, we +// cannot simply emit special characters and have the terminal change colors. +// This routine must actually emit the characters rather than return a string +// that would be colored when printed, as can be done on Linux. +void ColoredPrintf(GTestColor color, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + + static const bool use_color = ShouldUseColor(isatty(fileno(stdout)) != 0); + // The '!= 0' comparison is necessary to satisfy MSVC 7.1. + + if (!use_color) { + vprintf(fmt, args); + va_end(args); + return; + } + +#ifdef _WIN32 + const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); + + // Gets the current text color. + CONSOLE_SCREEN_BUFFER_INFO buffer_info; + GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); + const WORD old_color_attrs = buffer_info.wAttributes; + + SetConsoleTextAttribute(stdout_handle, + GetColorAttribute(color) | FOREGROUND_INTENSITY); + vprintf(fmt, args); + + // Restores the text color. + SetConsoleTextAttribute(stdout_handle, old_color_attrs); +#else + printf("\033[0;3%sm", GetAnsiColorCode(color)); + vprintf(fmt, args); + printf("\033[m"); // Resets the terminal to default. +#endif // _WIN32 + va_end(args); +} + +} // namespace internal + +using internal::ColoredPrintf; +using internal::COLOR_RED; +using internal::COLOR_GREEN; +using internal::COLOR_YELLOW; + +// This class implements the UnitTestEventListenerInterface interface. +// +// Class PrettyUnitTestResultPrinter is copyable. +class PrettyUnitTestResultPrinter : public UnitTestEventListenerInterface { + public: + PrettyUnitTestResultPrinter() {} + static void PrintTestName(const char * test_case, const char * test) { + printf("%s.%s", test_case, test); + } + + // The following methods override what's in the + // UnitTestEventListenerInterface class. + virtual void OnUnitTestStart(const UnitTest * unit_test); + virtual void OnGlobalSetUpStart(const UnitTest*); + virtual void OnTestCaseStart(const TestCase * test_case); + virtual void OnTestStart(const TestInfo * test_info); + virtual void OnNewTestPartResult(const TestPartResult * result); + virtual void OnTestEnd(const TestInfo * test_info); + virtual void OnGlobalTearDownStart(const UnitTest*); + virtual void OnUnitTestEnd(const UnitTest * unit_test); + + private: + internal::String test_case_name_; +}; + +// Called before the unit test starts. +void PrettyUnitTestResultPrinter::OnUnitTestStart( + const UnitTest * unit_test) { + const char * const filter = GTEST_FLAG(filter).c_str(); + + // Prints the filter if it's not *. This reminds the user that some + // tests may be skipped. + if (!internal::String::CStringEquals(filter, kUniversalFilter)) { + ColoredPrintf(COLOR_YELLOW, + "Note: %s filter = %s\n", GTEST_NAME, filter); + } + + const internal::UnitTestImpl* const impl = unit_test->impl(); + ColoredPrintf(COLOR_GREEN, "[==========] "); + printf("Running %s from %s.\n", + FormatTestCount(impl->test_to_run_count()).c_str(), + FormatTestCaseCount(impl->test_case_to_run_count()).c_str()); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnGlobalSetUpStart(const UnitTest*) { + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("Global test environment set-up.\n"); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestCaseStart( + const TestCase * test_case) { + test_case_name_ = test_case->name(); + const internal::String counts = + FormatCountableNoun(test_case->test_to_run_count(), "test", "tests"); + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("%s from %s\n", counts.c_str(), test_case_name_.c_str()); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo * test_info) { + ColoredPrintf(COLOR_GREEN, "[ RUN ] "); + PrintTestName(test_case_name_.c_str(), test_info->name()); + printf("\n"); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo * test_info) { + if (test_info->result()->Passed()) { + ColoredPrintf(COLOR_GREEN, "[ OK ] "); + } else { + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + } + PrintTestName(test_case_name_.c_str(), test_info->name()); + printf("\n"); + fflush(stdout); +} + +// Called after an assertion failure. +void PrettyUnitTestResultPrinter::OnNewTestPartResult( + const TestPartResult * result) { + // If the test part succeeded, we don't need to do anything. + if (result->type() == TPRT_SUCCESS) + return; + + // Print failure message from the assertion (e.g. expected this and got that). + PrintTestPartResult(*result); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnGlobalTearDownStart(const UnitTest*) { + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("Global test environment tear-down\n"); + fflush(stdout); +} + +namespace internal { + +// Internal helper for printing the list of failed tests. +static void PrintFailedTestsPretty(const UnitTestImpl* impl) { + const int failed_test_count = impl->failed_test_count(); + if (failed_test_count == 0) { + return; + } + + for (const internal::ListNode<TestCase*>* node = impl->test_cases()->Head(); + node != NULL; node = node->next()) { + const TestCase* const tc = node->element(); + if (!tc->should_run() || (tc->failed_test_count() == 0)) { + continue; + } + for (const internal::ListNode<TestInfo*>* tinode = + tc->test_info_list().Head(); + tinode != NULL; tinode = tinode->next()) { + const TestInfo* const ti = tinode->element(); + if (!tc->ShouldRunTest(ti) || tc->TestPassed(ti)) { + continue; + } + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + printf("%s.%s\n", ti->test_case_name(), ti->name()); + } + } +} + +} // namespace internal + +void PrettyUnitTestResultPrinter::OnUnitTestEnd( + const UnitTest * unit_test) { + const internal::UnitTestImpl* const impl = unit_test->impl(); + + ColoredPrintf(COLOR_GREEN, "[==========] "); + printf("%s from %s ran.\n", + FormatTestCount(impl->test_to_run_count()).c_str(), + FormatTestCaseCount(impl->test_case_to_run_count()).c_str()); + ColoredPrintf(COLOR_GREEN, "[ PASSED ] "); + printf("%s.\n", FormatTestCount(impl->successful_test_count()).c_str()); + + int num_failures = impl->failed_test_count(); + if (!impl->Passed()) { + const int failed_test_count = impl->failed_test_count(); + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str()); + internal::PrintFailedTestsPretty(impl); + printf("\n%2d FAILED %s\n", num_failures, + num_failures == 1 ? "TEST" : "TESTS"); + } + + int num_disabled = impl->disabled_test_count(); + if (num_disabled) { + if (!num_failures) { + printf("\n"); // Add a spacer if no FAILURE banner is displayed. + } + ColoredPrintf(COLOR_YELLOW, + " YOU HAVE %d DISABLED %s\n\n", + num_disabled, + num_disabled == 1 ? "TEST" : "TESTS"); + } + // Ensure that Google Test output is printed before, e.g., heapchecker output. + fflush(stdout); +} + +// End PrettyUnitTestResultPrinter + +// class UnitTestEventsRepeater +// +// This class forwards events to other event listeners. +class UnitTestEventsRepeater : public UnitTestEventListenerInterface { + public: + typedef internal::List<UnitTestEventListenerInterface *> Listeners; + typedef internal::ListNode<UnitTestEventListenerInterface *> ListenersNode; + UnitTestEventsRepeater() {} + virtual ~UnitTestEventsRepeater(); + void AddListener(UnitTestEventListenerInterface *listener); + + virtual void OnUnitTestStart(const UnitTest* unit_test); + virtual void OnUnitTestEnd(const UnitTest* unit_test); + virtual void OnGlobalSetUpStart(const UnitTest* unit_test); + virtual void OnGlobalSetUpEnd(const UnitTest* unit_test); + virtual void OnGlobalTearDownStart(const UnitTest* unit_test); + virtual void OnGlobalTearDownEnd(const UnitTest* unit_test); + virtual void OnTestCaseStart(const TestCase* test_case); + virtual void OnTestCaseEnd(const TestCase* test_case); + virtual void OnTestStart(const TestInfo* test_info); + virtual void OnTestEnd(const TestInfo* test_info); + virtual void OnNewTestPartResult(const TestPartResult* result); + + private: + Listeners listeners_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(UnitTestEventsRepeater); +}; + +UnitTestEventsRepeater::~UnitTestEventsRepeater() { + for (ListenersNode* listener = listeners_.Head(); + listener != NULL; + listener = listener->next()) { + delete listener->element(); + } +} + +void UnitTestEventsRepeater::AddListener( + UnitTestEventListenerInterface *listener) { + listeners_.PushBack(listener); +} + +// Since the methods are identical, use a macro to reduce boilerplate. +// This defines a member that repeats the call to all listeners. +#define GTEST_REPEATER_METHOD(Name, Type) \ +void UnitTestEventsRepeater::Name(const Type* parameter) { \ + for (ListenersNode* listener = listeners_.Head(); \ + listener != NULL; \ + listener = listener->next()) { \ + listener->element()->Name(parameter); \ + } \ +} + +GTEST_REPEATER_METHOD(OnUnitTestStart, UnitTest) +GTEST_REPEATER_METHOD(OnUnitTestEnd, UnitTest) +GTEST_REPEATER_METHOD(OnGlobalSetUpStart, UnitTest) +GTEST_REPEATER_METHOD(OnGlobalSetUpEnd, UnitTest) +GTEST_REPEATER_METHOD(OnGlobalTearDownStart, UnitTest) +GTEST_REPEATER_METHOD(OnGlobalTearDownEnd, UnitTest) +GTEST_REPEATER_METHOD(OnTestCaseStart, TestCase) +GTEST_REPEATER_METHOD(OnTestCaseEnd, TestCase) +GTEST_REPEATER_METHOD(OnTestStart, TestInfo) +GTEST_REPEATER_METHOD(OnTestEnd, TestInfo) +GTEST_REPEATER_METHOD(OnNewTestPartResult, TestPartResult) + +#undef GTEST_REPEATER_METHOD + +// End PrettyUnitTestResultPrinter + +// This class generates an XML output file. +class XmlUnitTestResultPrinter : public UnitTestEventListenerInterface { + public: + explicit XmlUnitTestResultPrinter(const char* output_file); + + virtual void OnUnitTestEnd(const UnitTest* unit_test); + + private: + // Is c a whitespace character that is normalized to a space character + // when it appears in an XML attribute value? + static bool IsNormalizableWhitespace(char c) { + return c == 0x9 || c == 0xA || c == 0xD; + } + + // May c appear in a well-formed XML document? + static bool IsValidXmlCharacter(char c) { + return IsNormalizableWhitespace(c) || c >= 0x20; + } + + // Returns an XML-escaped copy of the input string str. If + // is_attribute is true, the text is meant to appear as an attribute + // value, and normalizable whitespace is preserved by replacing it + // with character references. + static internal::String EscapeXml(const char* str, + bool is_attribute); + + // Convenience wrapper around EscapeXml when str is an attribute value. + static internal::String EscapeXmlAttribute(const char* str) { + return EscapeXml(str, true); + } + + // Convenience wrapper around EscapeXml when str is not an attribute value. + static internal::String EscapeXmlText(const char* str) { + return EscapeXml(str, false); + } + + // Prints an XML representation of a TestInfo object. + static void PrintXmlTestInfo(FILE* out, + const char* test_case_name, + const TestInfo* test_info); + + // Prints an XML representation of a TestCase object + static void PrintXmlTestCase(FILE* out, const TestCase* test_case); + + // Prints an XML summary of unit_test to output stream out. + static void PrintXmlUnitTest(FILE* out, const UnitTest* unit_test); + + // Produces a string representing the test properties in a result as space + // delimited XML attributes based on the property key="value" pairs. + // When the String is not empty, it includes a space at the beginning, + // to delimit this attribute from prior attributes. + static internal::String TestPropertiesAsXmlAttributes( + const internal::TestResult* result); + + // The output file. + const internal::String output_file_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(XmlUnitTestResultPrinter); +}; + +// Creates a new XmlUnitTestResultPrinter. +XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file) + : output_file_(output_file) { + if (output_file_.c_str() == NULL || output_file_.empty()) { + fprintf(stderr, "XML output file may not be null\n"); + fflush(stderr); + exit(EXIT_FAILURE); + } +} + +// Called after the unit test ends. +void XmlUnitTestResultPrinter::OnUnitTestEnd(const UnitTest* unit_test) { + FILE* xmlout = NULL; + internal::FilePath output_file(output_file_); + internal::FilePath output_dir(output_file.RemoveFileName()); + + if (output_dir.CreateDirectoriesRecursively()) { + // MSVC 8 deprecates fopen(), so we want to suppress warning 4996 + // (deprecated function) there. +#ifdef GTEST_OS_WINDOWS + // We are on Windows. +#pragma warning(push) // Saves the current warning state. +#pragma warning(disable:4996) // Temporarily disables warning 4996. + xmlout = fopen(output_file_.c_str(), "w"); +#pragma warning(pop) // Restores the warning state. +#else // We are on Linux or Mac OS. + xmlout = fopen(output_file_.c_str(), "w"); +#endif // GTEST_OS_WINDOWS + } + if (xmlout == NULL) { + // TODO(wan): report the reason of the failure. + // + // We don't do it for now as: + // + // 1. There is no urgent need for it. + // 2. It's a bit involved to make the errno variable thread-safe on + // all three operating systems (Linux, Windows, and Mac OS). + // 3. To interpret the meaning of errno in a thread-safe way, + // we need the strerror_r() function, which is not available on + // Windows. + fprintf(stderr, + "Unable to open file \"%s\"\n", + output_file_.c_str()); + fflush(stderr); + exit(EXIT_FAILURE); + } + PrintXmlUnitTest(xmlout, unit_test); + fclose(xmlout); +} + +// Returns an XML-escaped copy of the input string str. If is_attribute +// is true, the text is meant to appear as an attribute value, and +// normalizable whitespace is preserved by replacing it with character +// references. +// +// Invalid XML characters in str, if any, are stripped from the output. +// It is expected that most, if not all, of the text processed by this +// module will consist of ordinary English text. +// If this module is ever modified to produce version 1.1 XML output, +// most invalid characters can be retained using character references. +// TODO(wan): It might be nice to have a minimally invasive, human-readable +// escaping scheme for invalid characters, rather than dropping them. +internal::String XmlUnitTestResultPrinter::EscapeXml(const char* str, + bool is_attribute) { + Message m; + + if (str != NULL) { + for (const char* src = str; *src; ++src) { + switch (*src) { + case '<': + m << "<"; + break; + case '>': + m << ">"; + break; + case '&': + m << "&"; + break; + case '\'': + if (is_attribute) + m << "'"; + else + m << '\''; + break; + case '"': + if (is_attribute) + m << """; + else + m << '"'; + break; + default: + if (IsValidXmlCharacter(*src)) { + if (is_attribute && IsNormalizableWhitespace(*src)) + m << internal::String::Format("&#x%02X;", unsigned(*src)); + else + m << *src; + } + break; + } + } + } + + return m.GetString(); +} + + +// The following routines generate an XML representation of a UnitTest +// object. +// +// This is how Google Test concepts map to the DTD: +// +// <testsuite name="AllTests"> <-- corresponds to a UnitTest object +// <testsuite name="testcase-name"> <-- corresponds to a TestCase object +// <testcase name="test-name"> <-- corresponds to a TestInfo object +// <failure message="..." /> +// <failure message="..." /> <-- individual assertion failures +// <failure message="..." /> +// </testcase> +// </testsuite> +// </testsuite> + +// Prints an XML representation of a TestInfo object. +// TODO(wan): There is also value in printing properties with the plain printer. +void XmlUnitTestResultPrinter::PrintXmlTestInfo(FILE* out, + const char* test_case_name, + const TestInfo* test_info) { + const internal::TestResult * const result = test_info->result(); + const internal::List<TestPartResult> &results = result->test_part_results(); + fprintf(out, + " <testcase name=\"%s\" status=\"%s\" time=\"%s\" " + "classname=\"%s\"%s", + EscapeXmlAttribute(test_info->name()).c_str(), + test_info->should_run() ? "run" : "notrun", + internal::StreamableToString(result->elapsed_time()).c_str(), + EscapeXmlAttribute(test_case_name).c_str(), + TestPropertiesAsXmlAttributes(result).c_str()); + + int failures = 0; + for (const internal::ListNode<TestPartResult>* part_node = results.Head(); + part_node != NULL; + part_node = part_node->next()) { + const TestPartResult& part = part_node->element(); + if (part.failed()) { + const internal::String message = + internal::String::Format("%s:%d\n%s", part.file_name(), + part.line_number(), part.message()); + if (++failures == 1) + fprintf(out, ">\n"); + fprintf(out, + " <failure message=\"%s\" type=\"\"/>\n", + EscapeXmlAttribute(message.c_str()).c_str()); + } + } + + if (failures == 0) + fprintf(out, " />\n"); + else + fprintf(out, " </testcase>\n"); +} + +// Prints an XML representation of a TestCase object +void XmlUnitTestResultPrinter::PrintXmlTestCase(FILE* out, + const TestCase* test_case) { + fprintf(out, + " <testsuite name=\"%s\" tests=\"%d\" failures=\"%d\" " + "disabled=\"%d\" ", + EscapeXmlAttribute(test_case->name()).c_str(), + test_case->total_test_count(), + test_case->failed_test_count(), + test_case->disabled_test_count()); + fprintf(out, + "errors=\"0\" time=\"%s\">\n", + internal::StreamableToString(test_case->elapsed_time()).c_str()); + for (const internal::ListNode<TestInfo*>* info_node = + test_case->test_info_list().Head(); + info_node != NULL; + info_node = info_node->next()) { + PrintXmlTestInfo(out, test_case->name(), info_node->element()); + } + fprintf(out, " </testsuite>\n"); +} + +// Prints an XML summary of unit_test to output stream out. +void XmlUnitTestResultPrinter::PrintXmlUnitTest(FILE* out, + const UnitTest* unit_test) { + const internal::UnitTestImpl* const impl = unit_test->impl(); + fprintf(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); + fprintf(out, + "<testsuite tests=\"%d\" failures=\"%d\" disabled=\"%d\" " + "errors=\"0\" time=\"%s\" ", + impl->total_test_count(), + impl->failed_test_count(), + impl->disabled_test_count(), + internal::StreamableToString(impl->elapsed_time()).c_str()); + fprintf(out, "name=\"AllTests\">\n"); + for (const internal::ListNode<TestCase*>* case_node = + impl->test_cases()->Head(); + case_node != NULL; + case_node = case_node->next()) { + PrintXmlTestCase(out, case_node->element()); + } + fprintf(out, "</testsuite>\n"); +} + +// Produces a string representing the test properties in a result as space +// delimited XML attributes based on the property key="value" pairs. +internal::String XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( + const internal::TestResult* result) { + using internal::TestProperty; + Message attributes; + const internal::List<TestProperty>& properties = result->test_properties(); + for (const internal::ListNode<TestProperty>* property_node = + properties.Head(); + property_node != NULL; + property_node = property_node->next()) { + const TestProperty& property = property_node->element(); + attributes << " " << property.key() << "=" + << "\"" << EscapeXmlAttribute(property.value()) << "\""; + } + return attributes.GetString(); +} + +// End XmlUnitTestResultPrinter + +namespace internal { + +// Class ScopedTrace + +// Pushes the given source file location and message onto a per-thread +// trace stack maintained by Google Test. +// L < UnitTest::mutex_ +ScopedTrace::ScopedTrace(const char* file, int line, const Message& message) { + TraceInfo trace; + trace.file = file; + trace.line = line; + trace.message = message.GetString(); + + UnitTest::GetInstance()->PushGTestTrace(trace); +} + +// Pops the info pushed by the c'tor. +// L < UnitTest::mutex_ +ScopedTrace::~ScopedTrace() { + UnitTest::GetInstance()->PopGTestTrace(); +} + + +// class OsStackTraceGetter + +// Returns the current OS stack trace as a String. Parameters: +// +// max_depth - the maximum number of stack frames to be included +// in the trace. +// skip_count - the number of top frames to be skipped; doesn't count +// against max_depth. +// +// L < mutex_ +// We use "L < mutex_" to denote that the function may acquire mutex_. +String OsStackTraceGetter::CurrentStackTrace(int, int) { + return String(""); +} + +// L < mutex_ +void OsStackTraceGetter::UponLeavingGTest() { +} + +const char* const +OsStackTraceGetter::kElidedFramesMarker = + "... " GTEST_NAME " internal frames ..."; + +} // namespace internal + +// class UnitTest + +// Gets the singleton UnitTest object. The first time this method is +// called, a UnitTest object is constructed and returned. Consecutive +// calls will return the same object. +// +// We don't protect this under mutex_ as a user is not supposed to +// call this before main() starts, from which point on the return +// value will never change. +UnitTest * UnitTest::GetInstance() { + // When compiled with MSVC 7.1 in optimized mode, destroying the + // UnitTest object upon exiting the program messes up the exit code, + // causing successful tests to appear failed. We have to use a + // different implementation in this case to bypass the compiler bug. + // This implementation makes the compiler happy, at the cost of + // leaking the UnitTest object. +#if _MSC_VER == 1310 && !defined(_DEBUG) // MSVC 7.1 and optimized build. + static UnitTest* const instance = new UnitTest; + return instance; +#else + static UnitTest instance; + return &instance; +#endif // _MSC_VER==1310 && !defined(_DEBUG) +} + +// Registers and returns a global test environment. When a test +// program is run, all global test environments will be set-up in the +// order they were registered. After all tests in the program have +// finished, all global test environments will be torn-down in the +// *reverse* order they were registered. +// +// The UnitTest object takes ownership of the given environment. +// +// We don't protect this under mutex_, as we only support calling it +// from the main thread. +Environment* UnitTest::AddEnvironment(Environment* env) { + if (env == NULL) { + return NULL; + } + + impl_->environments()->PushBack(env); + impl_->environments_in_reverse_order()->PushFront(env); + return env; +} + +// Adds a TestPartResult to the current TestResult object. All Google Test +// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call +// this to report their results. The user code should use the +// assertion macros instead of calling this directly. +// L < mutex_ +void UnitTest::AddTestPartResult(TestPartResultType result_type, + const char* file_name, + int line_number, + const internal::String& message, + const internal::String& os_stack_trace) { + Message msg; + msg << message; + + internal::MutexLock lock(&mutex_); + if (impl_->gtest_trace_stack()->size() > 0) { + msg << "\n" << GTEST_NAME << " trace:"; + + for (internal::ListNode<internal::TraceInfo>* node = + impl_->gtest_trace_stack()->Head(); + node != NULL; + node = node->next()) { + const internal::TraceInfo& trace = node->element(); + msg << "\n" << trace.file << ":" << trace.line << ": " << trace.message; + } + } + + if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) { + msg << "\nStack trace:\n" << os_stack_trace; + } + + const TestPartResult result = + TestPartResult(result_type, file_name, line_number, + msg.GetString().c_str()); + impl_->test_part_result_reporter()->ReportTestPartResult(result); + + // If this is a failure and the user wants the debugger to break on + // failures ... + if (result_type != TPRT_SUCCESS && GTEST_FLAG(break_on_failure)) { + // ... then we generate a seg fault. + *static_cast<int*>(NULL) = 1; + } +} + +// Creates and adds a property to the current TestResult. If a property matching +// the supplied value already exists, updates its value instead. +void UnitTest::RecordPropertyForCurrentTest(const char* key, + const char* value) { + const internal::TestProperty test_property(key, value); + impl_->current_test_result()->RecordProperty(test_property); +} + +// Runs all tests in this UnitTest object and prints the result. +// Returns 0 if successful, or 1 otherwise. +// +// We don't protect this under mutex_, as we only support calling it +// from the main thread. +int UnitTest::Run() { +#ifdef GTEST_OS_WINDOWS + +#if !defined(_WIN32_WCE) + // SetErrorMode doesn't exist on CE. + if (GTEST_FLAG(catch_exceptions)) { + // The user wants Google Test to catch exceptions thrown by the tests. + + // This lets fatal errors be handled by us, instead of causing pop-ups. + SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT | + SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); + } +#endif // _WIN32_WCE + + __try { + return impl_->RunAllTests(); + } __except(internal::UnitTestOptions::GTestShouldProcessSEH( + GetExceptionCode())) { + printf("Exception thrown with code 0x%x.\nFAIL\n", GetExceptionCode()); + fflush(stdout); + return 1; + } + +#else + // We are on Linux or Mac OS. There is no exception of any kind. + + return impl_->RunAllTests(); +#endif // GTEST_OS_WINDOWS +} + +// Returns the TestCase object for the test that's currently running, +// or NULL if no test is running. +// L < mutex_ +const TestCase* UnitTest::current_test_case() const { + internal::MutexLock lock(&mutex_); + return impl_->current_test_case(); +} + +// Returns the TestInfo object for the test that's currently running, +// or NULL if no test is running. +// L < mutex_ +const TestInfo* UnitTest::current_test_info() const { + internal::MutexLock lock(&mutex_); + return impl_->current_test_info(); +} + +// Creates an empty UnitTest. +UnitTest::UnitTest() { + impl_ = new internal::UnitTestImpl(this); +} + +// Destructor of UnitTest. +UnitTest::~UnitTest() { + delete impl_; +} + +// Pushes a trace defined by SCOPED_TRACE() on to the per-thread +// Google Test trace stack. +// L < mutex_ +void UnitTest::PushGTestTrace(const internal::TraceInfo& trace) { + internal::MutexLock lock(&mutex_); + impl_->gtest_trace_stack()->PushFront(trace); +} + +// Pops a trace from the per-thread Google Test trace stack. +// L < mutex_ +void UnitTest::PopGTestTrace() { + internal::MutexLock lock(&mutex_); + impl_->gtest_trace_stack()->PopFront(NULL); +} + +namespace internal { + +UnitTestImpl::UnitTestImpl(UnitTest* parent) + : parent_(parent), + test_cases_(), + last_death_test_case_(NULL), + current_test_case_(NULL), + current_test_info_(NULL), + ad_hoc_test_result_(), + result_printer_(NULL), + os_stack_trace_getter_(NULL), +#ifdef GTEST_HAS_DEATH_TEST + elapsed_time_(0), + internal_run_death_test_flag_(NULL), + death_test_factory_(new DefaultDeathTestFactory) { +#else + elapsed_time_(0) { +#endif // GTEST_HAS_DEATH_TEST + // We do the assignment here instead of in the initializer list, as + // doing that latter causes MSVC to issue a warning about using + // 'this' in initializers. + test_part_result_reporter_ = this; +} + +UnitTestImpl::~UnitTestImpl() { + // Deletes every TestCase. + test_cases_.ForEach(internal::Delete<TestCase>); + + // Deletes every Environment. + environments_.ForEach(internal::Delete<Environment>); + + // Deletes the current test result printer. + delete result_printer_; + + delete os_stack_trace_getter_; +} + +// A predicate that checks the name of a TestCase against a known +// value. +// +// This is used for implementation of the UnitTest class only. We put +// it in the anonymous namespace to prevent polluting the outer +// namespace. +// +// TestCaseNameIs is copyable. +class TestCaseNameIs { + public: + // Constructor. + explicit TestCaseNameIs(const String& name) + : name_(name) {} + + // Returns true iff the name of test_case matches name_. + bool operator()(const TestCase* test_case) const { + return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0; + } + + private: + String name_; +}; + +// Finds and returns a TestCase with the given name. If one doesn't +// exist, creates one and returns it. +// +// Arguments: +// +// test_case_name: name of the test case +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +TestCase* UnitTestImpl::GetTestCase(const char* test_case_name, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc) { + // Can we find a TestCase with the given name? + internal::ListNode<TestCase*>* node = test_cases_.FindIf( + TestCaseNameIs(test_case_name)); + + if (node == NULL) { + // No. Let's create one. + TestCase* const test_case = + new TestCase(test_case_name, set_up_tc, tear_down_tc); + + // Is this a death test case? + if (String(test_case_name).EndsWith("DeathTest")) { + // Yes. Inserts the test case after the last death test case + // defined so far. + node = test_cases_.InsertAfter(last_death_test_case_, test_case); + last_death_test_case_ = node; + } else { + // No. Appends to the end of the list. + test_cases_.PushBack(test_case); + node = test_cases_.Last(); + } + } + + // Returns the TestCase found. + return node->element(); +} + +// Helpers for setting up / tearing down the given environment. They +// are for use in the List::ForEach() method. +static void SetUpEnvironment(Environment* env) { env->SetUp(); } +static void TearDownEnvironment(Environment* env) { env->TearDown(); } + +// Runs all tests in this UnitTest object, prints the result, and +// returns 0 if all tests are successful, or 1 otherwise. If any +// exception is thrown during a test on Windows, this test is +// considered to be failed, but the rest of the tests will still be +// run. (We disable exceptions on Linux and Mac OS X, so the issue +// doesn't apply there.) +int UnitTestImpl::RunAllTests() { + // Makes sure InitGoogleTest() was called. + if (!GTestIsInitialized()) { + printf("%s", + "\nThis test program did NOT call ::testing::InitGoogleTest " + "before calling RUN_ALL_TESTS(). Please fix it.\n"); + return 1; + } + + // Lists all the tests and exits if the --gtest_list_tests + // flag was specified. + if (GTEST_FLAG(list_tests)) { + ListAllTests(); + return 0; + } + + // True iff we are in a subprocess for running a thread-safe-style + // death test. + bool in_subprocess_for_death_test = false; + +#ifdef GTEST_HAS_DEATH_TEST + internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag()); + in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL); +#endif // GTEST_HAS_DEATH_TEST + + UnitTestEventListenerInterface * const printer = result_printer(); + + // Compares the full test names with the filter to decide which + // tests to run. + const bool has_tests_to_run = FilterTests() > 0; + // True iff at least one test has failed. + bool failed = false; + + // How many times to repeat the tests? We don't want to repeat them + // when we are inside the subprocess of a death test. + const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat); + // Repeats forever if the repeat count is negative. + const bool forever = repeat < 0; + for (int i = 0; forever || i != repeat; i++) { + if (repeat != 1) { + printf("\nRepeating all tests (iteration %d) . . .\n\n", i + 1); + } + + // Tells the unit test event listener that the tests are about to + // start. + printer->OnUnitTestStart(parent_); + + const TimeInMillis start = GetTimeInMillis(); + + // Runs each test case if there is at least one test to run. + if (has_tests_to_run) { + // Sets up all environments beforehand. + printer->OnGlobalSetUpStart(parent_); + environments_.ForEach(SetUpEnvironment); + printer->OnGlobalSetUpEnd(parent_); + + // Runs the tests only if there was no fatal failure during global + // set-up. + if (!Test::HasFatalFailure()) { + test_cases_.ForEach(TestCase::RunTestCase); + } + + // Tears down all environments in reverse order afterwards. + printer->OnGlobalTearDownStart(parent_); + environments_in_reverse_order_.ForEach(TearDownEnvironment); + printer->OnGlobalTearDownEnd(parent_); + } + + elapsed_time_ = GetTimeInMillis() - start; + + // Tells the unit test event listener that the tests have just + // finished. + printer->OnUnitTestEnd(parent_); + + // Gets the result and clears it. + if (!Passed()) { + failed = true; + } + ClearResult(); + } + + // Returns 0 if all tests passed, or 1 other wise. + return failed ? 1 : 0; +} + +// Compares the name of each test with the user-specified filter to +// decide whether the test should be run, then records the result in +// each TestCase and TestInfo object. +// Returns the number of tests that should run. +int UnitTestImpl::FilterTests() { + int num_runnable_tests = 0; + for (const internal::ListNode<TestCase *> *test_case_node = + test_cases_.Head(); + test_case_node != NULL; + test_case_node = test_case_node->next()) { + TestCase * const test_case = test_case_node->element(); + const String &test_case_name = test_case->name(); + test_case->set_should_run(false); + + for (const internal::ListNode<TestInfo *> *test_info_node = + test_case->test_info_list().Head(); + test_info_node != NULL; + test_info_node = test_info_node->next()) { + TestInfo * const test_info = test_info_node->element(); + const String test_name(test_info->name()); + // A test is disabled if test case name or test name matches + // kDisableTestPattern. + const bool is_disabled = + internal::UnitTestOptions::PatternMatchesString(kDisableTestPattern, + test_case_name.c_str()) || + internal::UnitTestOptions::PatternMatchesString(kDisableTestPattern, + test_name.c_str()); + test_info->impl()->set_is_disabled(is_disabled); + + const bool should_run = !is_disabled && + internal::UnitTestOptions::FilterMatchesTest(test_case_name, + test_name); + test_info->impl()->set_should_run(should_run); + test_case->set_should_run(test_case->should_run() || should_run); + if (should_run) { + num_runnable_tests++; + } + } + } + return num_runnable_tests; +} + +// Lists all tests by name. +void UnitTestImpl::ListAllTests() { + for (const internal::ListNode<TestCase*>* test_case_node = test_cases_.Head(); + test_case_node != NULL; + test_case_node = test_case_node->next()) { + const TestCase* const test_case = test_case_node->element(); + + // Prints the test case name following by an indented list of test nodes. + printf("%s.\n", test_case->name()); + + for (const internal::ListNode<TestInfo*>* test_info_node = + test_case->test_info_list().Head(); + test_info_node != NULL; + test_info_node = test_info_node->next()) { + const TestInfo* const test_info = test_info_node->element(); + + printf(" %s\n", test_info->name()); + } + } + fflush(stdout); +} + +// Sets the unit test result printer. +// +// Does nothing if the input and the current printer object are the +// same; otherwise, deletes the old printer object and makes the +// input the current printer. +void UnitTestImpl::set_result_printer( + UnitTestEventListenerInterface* result_printer) { + if (result_printer_ != result_printer) { + delete result_printer_; + result_printer_ = result_printer; + } +} + +// Returns the current unit test result printer if it is not NULL; +// otherwise, creates an appropriate result printer, makes it the +// current printer, and returns it. +UnitTestEventListenerInterface* UnitTestImpl::result_printer() { + if (result_printer_ != NULL) { + return result_printer_; + } + +#ifdef GTEST_HAS_DEATH_TEST + if (internal_run_death_test_flag_.get() != NULL) { + result_printer_ = new NullUnitTestResultPrinter; + return result_printer_; + } +#endif // GTEST_HAS_DEATH_TEST + + UnitTestEventsRepeater *repeater = new UnitTestEventsRepeater; + const String& output_format = internal::UnitTestOptions::GetOutputFormat(); + if (output_format == "xml") { + repeater->AddListener(new XmlUnitTestResultPrinter( + internal::UnitTestOptions::GetOutputFile().c_str())); + } else if (output_format != "") { + printf("WARNING: unrecognized output format \"%s\" ignored.\n", + output_format.c_str()); + fflush(stdout); + } + repeater->AddListener(new PrettyUnitTestResultPrinter); + result_printer_ = repeater; + return result_printer_; +} + +// Sets the OS stack trace getter. +// +// Does nothing if the input and the current OS stack trace getter are +// the same; otherwise, deletes the old getter and makes the input the +// current getter. +void UnitTestImpl::set_os_stack_trace_getter( + OsStackTraceGetterInterface* getter) { + if (os_stack_trace_getter_ != getter) { + delete os_stack_trace_getter_; + os_stack_trace_getter_ = getter; + } +} + +// Returns the current OS stack trace getter if it is not NULL; +// otherwise, creates an OsStackTraceGetter, makes it the current +// getter, and returns it. +OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() { + if (os_stack_trace_getter_ == NULL) { + os_stack_trace_getter_ = new OsStackTraceGetter; + } + + return os_stack_trace_getter_; +} + +// Returns the TestResult for the test that's currently running, or +// the TestResult for the ad hoc test if no test is running. +internal::TestResult* UnitTestImpl::current_test_result() { + return current_test_info_ ? + current_test_info_->impl()->result() : &ad_hoc_test_result_; +} + +// TestInfoImpl constructor. +TestInfoImpl::TestInfoImpl(TestInfo* parent, + const char* test_case_name, + const char* name, + TypeId fixture_class_id, + TestMaker maker) : + parent_(parent), + test_case_name_(String(test_case_name)), + name_(String(name)), + fixture_class_id_(fixture_class_id), + should_run_(false), + is_disabled_(false), + maker_(maker) { +} + +// TestInfoImpl destructor. +TestInfoImpl::~TestInfoImpl() { +} + +} // namespace internal + +namespace internal { + +// Parses a string as a command line flag. The string should have +// the format "--flag=value". When def_optional is true, the "=value" +// part can be omitted. +// +// Returns the value of the flag, or NULL if the parsing failed. +const char* ParseFlagValue(const char* str, + const char* flag, + bool def_optional) { + // str and flag must not be NULL. + if (str == NULL || flag == NULL) return NULL; + + // The flag must start with "--" followed by GTEST_FLAG_PREFIX. + const String flag_str = String::Format("--%s%s", GTEST_FLAG_PREFIX, flag); + const size_t flag_len = flag_str.GetLength(); + if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL; + + // Skips the flag name. + const char* flag_end = str + flag_len; + + // When def_optional is true, it's OK to not have a "=value" part. + if (def_optional && (flag_end[0] == '\0')) { + return flag_end; + } + + // If def_optional is true and there are more characters after the + // flag name, or if def_optional is false, there must be a '=' after + // the flag name. + if (flag_end[0] != '=') return NULL; + + // Returns the string after "=". + return flag_end + 1; +} + +// Parses a string for a bool flag, in the form of either +// "--flag=value" or "--flag". +// +// In the former case, the value is taken as true as long as it does +// not start with '0', 'f', or 'F'. +// +// In the latter case, the value is taken as true. +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseBoolFlag(const char* str, const char* flag, bool* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, true); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Converts the string value to a bool. + *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F'); + return true; +} + +// Parses a string for an Int32 flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseInt32Flag(const char* str, const char* flag, Int32* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Sets *value to the value of the flag. + return ParseInt32(Message() << "The value of flag --" << flag, + value_str, value); +} + +// Parses a string for a string flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseStringFlag(const char* str, const char* flag, String* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Sets *value to the value of the flag. + *value = value_str; + return true; +} + +// The internal implementation of InitGoogleTest(). +// +// The type parameter CharType can be instantiated to either char or +// wchar_t. +template <typename CharType> +void InitGoogleTestImpl(int* argc, CharType** argv) { + g_parse_gtest_flags_called = true; + if (*argc <= 0) return; + +#ifdef GTEST_HAS_DEATH_TEST + g_argvs.clear(); + for (int i = 0; i != *argc; i++) { + g_argvs.push_back(StreamableToString(argv[i])); + } +#endif // GTEST_HAS_DEATH_TEST + + for (int i = 1; i != *argc; i++) { + const String arg_string = StreamableToString(argv[i]); + const char* const arg = arg_string.c_str(); + + using internal::ParseBoolFlag; + using internal::ParseInt32Flag; + using internal::ParseStringFlag; + + // Do we see a Google Test flag? + if (ParseBoolFlag(arg, kBreakOnFailureFlag, + >EST_FLAG(break_on_failure)) || + ParseBoolFlag(arg, kCatchExceptionsFlag, + >EST_FLAG(catch_exceptions)) || + ParseStringFlag(arg, kColorFlag, >EST_FLAG(color)) || + ParseStringFlag(arg, kDeathTestStyleFlag, + >EST_FLAG(death_test_style)) || + ParseStringFlag(arg, kFilterFlag, >EST_FLAG(filter)) || + ParseStringFlag(arg, kInternalRunDeathTestFlag, + >EST_FLAG(internal_run_death_test)) || + ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || + ParseStringFlag(arg, kOutputFlag, >EST_FLAG(output)) || + ParseInt32Flag(arg, kRepeatFlag, >EST_FLAG(repeat)) + ) { + // Yes. Shift the remainder of the argv list left by one. Note + // that argv has (*argc + 1) elements, the last one always being + // NULL. The following loop moves the trailing NULL element as + // well. + for (int j = i; j != *argc; j++) { + argv[j] = argv[j + 1]; + } + + // Decrements the argument count. + (*argc)--; + + // We also need to decrement the iterator as we just removed + // an element. + i--; + } + } +} + +} // namespace internal + +// Initializes Google Test. This must be called before calling +// RUN_ALL_TESTS(). In particular, it parses a command line for the +// flags that Google Test recognizes. Whenever a Google Test flag is +// seen, it is removed from argv, and *argc is decremented. +// +// No value is returned. Instead, the Google Test flag variables are +// updated. +void InitGoogleTest(int* argc, char** argv) { + internal::g_executable_path = argv[0]; + internal::InitGoogleTestImpl(argc, argv); +} + +// This overloaded version can be used in Windows programs compiled in +// UNICODE mode. +#ifdef GTEST_OS_WINDOWS +void InitGoogleTest(int* argc, wchar_t** argv) { + // g_executable_path uses normal characters rather than wide chars, so call + // StreamableToString to convert argv[0] to normal characters (utf8 encoding). + internal::g_executable_path = internal::StreamableToString(argv[0]); + internal::InitGoogleTestImpl(argc, argv); +} +#endif // GTEST_OS_WINDOWS + +} // namespace testing diff --git a/src/gtest/gtest.h b/src/gtest/gtest.h new file mode 100644 index 0000000..2464f72 --- /dev/null +++ b/src/gtest/gtest.h @@ -0,0 +1,1216 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the public API for Google Test. It should be +// included by any test program that uses Google Test. +// +// IMPORTANT NOTE: Due to limitation of the C++ language, we have to +// leave some internal implementation details in this header file. +// They are clearly marked by comments like this: +// +// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +// +// Such code is NOT meant to be used by a user directly, and is subject +// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user +// program! +// +// Acknowledgment: Google Test borrowed the idea of automatic test +// registration from Barthelemy Dagenais' (barthelemy@prologique.com) +// easyUnit framework. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_H_ + +// The following platform macros are used throughout Google Test: +// _WIN32_WCE Windows CE (set in project files) +// __SYMBIAN32__ Symbian (set by Symbian tool chain) +// +// Note that even though _MSC_VER and _WIN32_WCE really indicate a compiler +// and a Win32 implementation, respectively, we use them to indicate the +// combination of compiler - Win 32 API - C library, since the code currently +// only supports: +// Windows proper with Visual C++ and MS C library (_MSC_VER && !_WIN32_WCE) and +// Windows Mobile with Visual C++ and no C library (_WIN32_WCE). + +#include <gtest/internal/gtest-internal.h> +#include <gtest/internal/gtest-string.h> +#include <gtest/gtest-death-test.h> +#include <gtest/gtest-message.h> +#include <gtest/gtest_prod.h> + +// Depending on the platform, different string classes are available. +// On Windows, ::std::string compiles only when exceptions are +// enabled. On Linux, in addition to ::std::string, Google also makes +// use of class ::string, which has the same interface as +// ::std::string, but has a different implementation. +// +// The user can tell us whether ::std::string is available in his +// environment by defining the macro GTEST_HAS_STD_STRING to either 1 +// or 0 on the compiler command line. He can also define +// GTEST_HAS_GLOBAL_STRING to 1 to indicate that ::string is available +// AND is a distinct type to ::std::string, or define it to 0 to +// indicate otherwise. +// +// If the user's ::std::string and ::string are the same class due to +// aliasing, he should define GTEST_HAS_STD_STRING to 1 and +// GTEST_HAS_GLOBAL_STRING to 0. +// +// If the user doesn't define GTEST_HAS_STD_STRING and/or +// GTEST_HAS_GLOBAL_STRING, they are defined heuristically. + +namespace testing { + +// The upper limit for valid stack trace depths. +const int kMaxStackTraceDepth = 100; + +// This flag specifies the maximum number of stack frames to be +// printed in a failure message. +GTEST_DECLARE_int32(stack_trace_depth); + +// This flag controls whether Google Test includes Google Test internal +// stack frames in failure stack traces. +GTEST_DECLARE_bool(show_internal_stack_frames); + +// The possible outcomes of a test part (i.e. an assertion or an +// explicit SUCCEED(), FAIL(), or ADD_FAILURE()). +enum TestPartResultType { + TPRT_SUCCESS, // Succeeded. + TPRT_NONFATAL_FAILURE, // Failed but the test can continue. + TPRT_FATAL_FAILURE // Failed and the test should be terminated. +}; + +namespace internal { + +class GTestFlagSaver; + +// Converts a streamable value to a String. A NULL pointer is +// converted to "(null)". When the input value is a ::string, +// ::std::string, ::wstring, or ::std::wstring object, each NUL +// character in it is replaced with "\\0". +// Declared in gtest-internal.h but defined here, so that it has access +// to the definition of the Message class, required by the ARM +// compiler. +template <typename T> +String StreamableToString(const T& streamable) { + return (Message() << streamable).GetString(); +} + +} // namespace internal + +// A class for indicating whether an assertion was successful. When +// the assertion wasn't successful, the AssertionResult object +// remembers a non-empty message that described how it failed. +// +// This class is useful for defining predicate-format functions to be +// used with predicate assertions (ASSERT_PRED_FORMAT*, etc). +// +// The constructor of AssertionResult is private. To create an +// instance of this class, use one of the factory functions +// (AssertionSuccess() and AssertionFailure()). +// +// For example, in order to be able to write: +// +// // Verifies that Foo() returns an even number. +// EXPECT_PRED_FORMAT1(IsEven, Foo()); +// +// you just need to define: +// +// testing::AssertionResult IsEven(const char* expr, int n) { +// if ((n % 2) == 0) return testing::AssertionSuccess(); +// +// Message msg; +// msg << "Expected: " << expr << " is even\n" +// << " Actual: it's " << n; +// return testing::AssertionFailure(msg); +// } +// +// If Foo() returns 5, you will see the following message: +// +// Expected: Foo() is even +// Actual: it's 5 +class AssertionResult { + public: + // Declares factory functions for making successful and failed + // assertion results as friends. + friend AssertionResult AssertionSuccess(); + friend AssertionResult AssertionFailure(const Message&); + + // Returns true iff the assertion succeeded. + operator bool() const { return failure_message_.c_str() == NULL; } // NOLINT + + // Returns the assertion's failure message. + const char* failure_message() const { return failure_message_.c_str(); } + + private: + // The default constructor. It is used when the assertion succeeded. + AssertionResult() {} + + // The constructor used when the assertion failed. + explicit AssertionResult(const internal::String& failure_message); + + // Stores the assertion's failure message. + internal::String failure_message_; +}; + +// Makes a successful assertion result. +AssertionResult AssertionSuccess(); + +// Makes a failed assertion result with the given failure message. +AssertionResult AssertionFailure(const Message& msg); + +// The abstract class that all tests inherit from. +// +// In Google Test, a unit test program contains one or many TestCases, and +// each TestCase contains one or many Tests. +// +// When you define a test using the TEST macro, you don't need to +// explicitly derive from Test - the TEST macro automatically does +// this for you. +// +// The only time you derive from Test is when defining a test fixture +// to be used a TEST_F. For example: +// +// class FooTest : public testing::Test { +// protected: +// virtual void SetUp() { ... } +// virtual void TearDown() { ... } +// ... +// }; +// +// TEST_F(FooTest, Bar) { ... } +// TEST_F(FooTest, Baz) { ... } +// +// Test is not copyable. +class Test { + public: + friend class internal::TestInfoImpl; + + // Defines types for pointers to functions that set up and tear down + // a test case. + typedef void (*SetUpTestCaseFunc)(); + typedef void (*TearDownTestCaseFunc)(); + + // The d'tor is virtual as we intend to inherit from Test. + virtual ~Test(); + + // Returns true iff the current test has a fatal failure. + static bool HasFatalFailure(); + + // Logs a property for the current test. Only the last value for a given + // key is remembered. + // These are public static so they can be called from utility functions + // that are not members of the test fixture. + // The arguments are const char* instead strings, as Google Test is used + // on platforms where string doesn't compile. + // + // Note that a driving consideration for these RecordProperty methods + // was to produce xml output suited to the Greenspan charting utility, + // which at present will only chart values that fit in a 32-bit int. It + // is the user's responsibility to restrict their values to 32-bit ints + // if they intend them to be used with Greenspan. + static void RecordProperty(const char* key, const char* value); + static void RecordProperty(const char* key, int value); + + protected: + // Creates a Test object. + Test(); + + // Sets up the stuff shared by all tests in this test case. + // + // Google Test will call Foo::SetUpTestCase() before running the first + // test in test case Foo. Hence a sub-class can define its own + // SetUpTestCase() method to shadow the one defined in the super + // class. + static void SetUpTestCase() {} + + // Tears down the stuff shared by all tests in this test case. + // + // Google Test will call Foo::TearDownTestCase() after running the last + // test in test case Foo. Hence a sub-class can define its own + // TearDownTestCase() method to shadow the one defined in the super + // class. + static void TearDownTestCase() {} + + // Sets up the test fixture. + virtual void SetUp(); + + // Tears down the test fixture. + virtual void TearDown(); + + private: + // Returns true iff the current test has the same fixture class as + // the first test in the current test case. + static bool HasSameFixtureClass(); + + // Runs the test after the test fixture has been set up. + // + // A sub-class must implement this to define the test logic. + // + // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM. + // Instead, use the TEST or TEST_F macro. + virtual void TestBody() = 0; + + // Sets up, executes, and tears down the test. + void Run(); + + // Uses a GTestFlagSaver to save and restore all Google Test flags. + const internal::GTestFlagSaver* const gtest_flag_saver_; + + // Often a user mis-spells SetUp() as Setup() and spends a long time + // wondering why it is never called by Google Test. The declaration of + // the following method is solely for catching such an error at + // compile time: + // + // - The return type is deliberately chosen to be not void, so it + // will be a conflict if a user declares void Setup() in his test + // fixture. + // + // - This method is private, so it will be another compiler error + // if a user calls it from his test fixture. + // + // DO NOT OVERRIDE THIS FUNCTION. + // + // If you see an error about overriding the following function or + // about it being private, you have mis-spelled SetUp() as Setup(). + struct Setup_should_be_spelled_SetUp {}; + virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } + + // We disallow copying Tests. + GTEST_DISALLOW_COPY_AND_ASSIGN(Test); +}; + + +// Defines the type of a function pointer that creates a Test object +// when invoked. +typedef Test* (*TestMaker)(); + + +// A TestInfo object stores the following information about a test: +// +// Test case name +// Test name +// Whether the test should be run +// A function pointer that creates the test object when invoked +// Test result +// +// The constructor of TestInfo registers itself with the UnitTest +// singleton such that the RUN_ALL_TESTS() macro knows which tests to +// run. +class TestInfo { + public: + // Destructs a TestInfo object. This function is not virtual, so + // don't inherit from TestInfo. + ~TestInfo(); + + // Creates a TestInfo object and registers it with the UnitTest + // singleton; returns the created object. + // + // Arguments: + // + // test_case_name: name of the test case + // name: name of the test + // fixture_class_id: ID of the test fixture class + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + // maker: pointer to the function that creates a test object + // + // This is public only because it's needed by the TEST and TEST_F macros. + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + static TestInfo* MakeAndRegisterInstance( + const char* test_case_name, + const char* name, + internal::TypeId fixture_class_id, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc, + TestMaker maker); + + // Returns the test case name. + const char* test_case_name() const; + + // Returns the test name. + const char* name() const; + + // Returns true if this test should run. + // + // Google Test allows the user to filter the tests by their full names. + // The full name of a test Bar in test case Foo is defined as + // "Foo.Bar". Only the tests that match the filter will run. + // + // A filter is a colon-separated list of glob (not regex) patterns, + // optionally followed by a '-' and a colon-separated list of + // negative patterns (tests to exclude). A test is run if it + // matches one of the positive patterns and does not match any of + // the negative patterns. + // + // For example, *A*:Foo.* is a filter that matches any string that + // contains the character 'A' or starts with "Foo.". + bool should_run() const; + + // Returns the result of the test. + const internal::TestResult* result() const; + private: +#ifdef GTEST_HAS_DEATH_TEST + friend class internal::DefaultDeathTestFactory; +#endif // GTEST_HAS_DEATH_TEST + friend class internal::TestInfoImpl; + friend class internal::UnitTestImpl; + friend class Test; + friend class TestCase; + + // Increments the number of death tests encountered in this test so + // far. + int increment_death_test_count(); + + // Accessors for the implementation object. + internal::TestInfoImpl* impl() { return impl_; } + const internal::TestInfoImpl* impl() const { return impl_; } + + // Constructs a TestInfo object. + TestInfo(const char* test_case_name, const char* name, + internal::TypeId fixture_class_id, TestMaker maker); + + // An opaque implementation object. + internal::TestInfoImpl* impl_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(TestInfo); +}; + +// An Environment object is capable of setting up and tearing down an +// environment. The user should subclass this to define his own +// environment(s). +// +// An Environment object does the set-up and tear-down in virtual +// methods SetUp() and TearDown() instead of the constructor and the +// destructor, as: +// +// 1. You cannot safely throw from a destructor. This is a problem +// as in some cases Google Test is used where exceptions are enabled, and +// we may want to implement ASSERT_* using exceptions where they are +// available. +// 2. You cannot use ASSERT_* directly in a constructor or +// destructor. +class Environment { + public: + // The d'tor is virtual as we need to subclass Environment. + virtual ~Environment() {} + + // Override this to define how to set up the environment. + virtual void SetUp() {} + + // Override this to define how to tear down the environment. + virtual void TearDown() {} + private: + // If you see an error about overriding the following function or + // about it being private, you have mis-spelled SetUp() as Setup(). + struct Setup_should_be_spelled_SetUp {}; + virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } +}; + +// A UnitTest consists of a list of TestCases. +// +// This is a singleton class. The only instance of UnitTest is +// created when UnitTest::GetInstance() is first called. This +// instance is never deleted. +// +// UnitTest is not copyable. +// +// This class is thread-safe as long as the methods are called +// according to their specification. +class UnitTest { + public: + // Gets the singleton UnitTest object. The first time this method + // is called, a UnitTest object is constructed and returned. + // Consecutive calls will return the same object. + static UnitTest* GetInstance(); + + // Registers and returns a global test environment. When a test + // program is run, all global test environments will be set-up in + // the order they were registered. After all tests in the program + // have finished, all global test environments will be torn-down in + // the *reverse* order they were registered. + // + // The UnitTest object takes ownership of the given environment. + // + // This method can only be called from the main thread. + Environment* AddEnvironment(Environment* env); + + // Adds a TestPartResult to the current TestResult object. All + // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) + // eventually call this to report their results. The user code + // should use the assertion macros instead of calling this directly. + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + void AddTestPartResult(TestPartResultType result_type, + const char* file_name, + int line_number, + const internal::String& message, + const internal::String& os_stack_trace); + + // Adds a TestProperty to the current TestResult object. If the result already + // contains a property with the same key, the value will be updated. + void RecordPropertyForCurrentTest(const char* key, const char* value); + + // Runs all tests in this UnitTest object and prints the result. + // Returns 0 if successful, or 1 otherwise. + // + // This method can only be called from the main thread. + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + int Run() GTEST_MUST_USE_RESULT; + + // Returns the TestCase object for the test that's currently running, + // or NULL if no test is running. + const TestCase* current_test_case() const; + + // Returns the TestInfo object for the test that's currently running, + // or NULL if no test is running. + const TestInfo* current_test_info() const; + + // Accessors for the implementation object. + internal::UnitTestImpl* impl() { return impl_; } + const internal::UnitTestImpl* impl() const { return impl_; } + private: + // ScopedTrace is a friend as it needs to modify the per-thread + // trace stack, which is a private member of UnitTest. + friend class internal::ScopedTrace; + + // Creates an empty UnitTest. + UnitTest(); + + // D'tor + virtual ~UnitTest(); + + // Pushes a trace defined by SCOPED_TRACE() on to the per-thread + // Google Test trace stack. + void PushGTestTrace(const internal::TraceInfo& trace); + + // Pops a trace from the per-thread Google Test trace stack. + void PopGTestTrace(); + + // Protects mutable state in *impl_. This is mutable as some const + // methods need to lock it too. + mutable internal::Mutex mutex_; + + // Opaque implementation object. This field is never changed once + // the object is constructed. We don't mark it as const here, as + // doing so will cause a warning in the constructor of UnitTest. + // Mutable state in *impl_ is protected by mutex_. + internal::UnitTestImpl* impl_; + + // We disallow copying UnitTest. + GTEST_DISALLOW_COPY_AND_ASSIGN(UnitTest); +}; + +// A convenient wrapper for adding an environment for the test +// program. +// +// You should call this before RUN_ALL_TESTS() is called, probably in +// main(). If you use gtest_main, you need to call this before main() +// starts for it to take effect. For example, you can define a global +// variable like this: +// +// testing::Environment* const foo_env = +// testing::AddGlobalTestEnvironment(new FooEnvironment); +// +// However, we strongly recommend you to write your own main() and +// call AddGlobalTestEnvironment() there, as relying on initialization +// of global variables makes the code harder to read and may cause +// problems when you register multiple environments from different +// translation units and the environments have dependencies among them +// (remember that the compiler doesn't guarantee the order in which +// global variables from different translation units are initialized). +inline Environment* AddGlobalTestEnvironment(Environment* env) { + return UnitTest::GetInstance()->AddEnvironment(env); +} + +// Initializes Google Test. This must be called before calling +// RUN_ALL_TESTS(). In particular, it parses a command line for the +// flags that Google Test recognizes. Whenever a Google Test flag is +// seen, it is removed from argv, and *argc is decremented. +// +// No value is returned. Instead, the Google Test flag variables are +// updated. +void InitGoogleTest(int* argc, char** argv); + +// This overloaded version can be used in Windows programs compiled in +// UNICODE mode. +#ifdef GTEST_OS_WINDOWS +void InitGoogleTest(int* argc, wchar_t** argv); +#endif // GTEST_OS_WINDOWS + +namespace internal { + +// These overloaded versions handle ::std::string and ::std::wstring. +#if GTEST_HAS_STD_STRING +inline String FormatForFailureMessage(const ::std::string& str) { + return (Message() << '"' << str << '"').GetString(); +} +#endif // GTEST_HAS_STD_STRING + +#if GTEST_HAS_STD_WSTRING +inline String FormatForFailureMessage(const ::std::wstring& wstr) { + return (Message() << "L\"" << wstr << '"').GetString(); +} +#endif // GTEST_HAS_STD_WSTRING + +// These overloaded versions handle ::string and ::wstring. +#if GTEST_HAS_GLOBAL_STRING +inline String FormatForFailureMessage(const ::string& str) { + return (Message() << '"' << str << '"').GetString(); +} +#endif // GTEST_HAS_GLOBAL_STRING + +#if GTEST_HAS_GLOBAL_WSTRING +inline String FormatForFailureMessage(const ::wstring& wstr) { + return (Message() << "L\"" << wstr << '"').GetString(); +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc) +// operand to be used in a failure message. The type (but not value) +// of the other operand may affect the format. This allows us to +// print a char* as a raw pointer when it is compared against another +// char*, and print it as a C string when it is compared against an +// std::string object, for example. +// +// The default implementation ignores the type of the other operand. +// Some specialized versions are used to handle formatting wide or +// narrow C strings. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +template <typename T1, typename T2> +String FormatForComparisonFailureMessage(const T1& value, + const T2& /* other_operand */) { + return FormatForFailureMessage(value); +} + +// The helper function for {ASSERT|EXPECT}_EQ. +template <typename T1, typename T2> +AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual) { + if (expected == actual) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + FormatForComparisonFailureMessage(expected, actual), + FormatForComparisonFailureMessage(actual, expected), + false); +} + +// With this overloaded version, we allow anonymous enums to be used +// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums +// can be implicitly cast to BiggestInt. +AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual); + +// The helper class for {ASSERT|EXPECT}_EQ. The template argument +// lhs_is_null_literal is true iff the first argument to ASSERT_EQ() +// is a null pointer literal. The following default implementation is +// for lhs_is_null_literal being false. +template <bool lhs_is_null_literal> +class EqHelper { + public: + // This templatized version is for the general case. + template <typename T1, typename T2> + static AssertionResult Compare(const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } + + // With this overloaded version, we allow anonymous enums to be used + // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous + // enums can be implicitly cast to BiggestInt. + // + // Even though its body looks the same as the above version, we + // cannot merge the two, as it will make anonymous enums unhappy. + static AssertionResult Compare(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } +}; + +// This specialization is used when the first argument to ASSERT_EQ() +// is a null pointer literal. +template <> +class EqHelper<true> { + public: + // We define two overloaded versions of Compare(). The first + // version will be picked when the second argument to ASSERT_EQ() is + // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or + // EXPECT_EQ(false, a_bool). + template <typename T1, typename T2> + static AssertionResult Compare(const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } + + // This version will be picked when the second argument to + // ASSERT_EQ() is a pointer, e.g. ASSERT_EQ(NULL, a_pointer). + template <typename T1, typename T2> + static AssertionResult Compare(const char* expected_expression, + const char* actual_expression, + const T1& expected, + T2* actual) { + // We already know that 'expected' is a null pointer. + return CmpHelperEQ(expected_expression, actual_expression, + static_cast<T2*>(NULL), actual); + } +}; + +// A macro for implementing the helper functions needed to implement +// ASSERT_?? and EXPECT_??. It is here just to avoid copy-and-paste +// of similar code. +// +// For each templatized helper function, we also define an overloaded +// version for BiggestInt in order to reduce code bloat and allow +// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled +// with gcc 4. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +#define GTEST_IMPL_CMP_HELPER(op_name, op)\ +template <typename T1, typename T2>\ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + const T1& val1, const T2& val2) {\ + if (val1 op val2) {\ + return AssertionSuccess();\ + } else {\ + Message msg;\ + msg << "Expected: (" << expr1 << ") " #op " (" << expr2\ + << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ + << " vs " << FormatForComparisonFailureMessage(val2, val1);\ + return AssertionFailure(msg);\ + }\ +}\ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + BiggestInt val1, BiggestInt val2); + +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + +// Implements the helper function for {ASSERT|EXPECT}_NE +GTEST_IMPL_CMP_HELPER(NE, !=) +// Implements the helper function for {ASSERT|EXPECT}_LE +GTEST_IMPL_CMP_HELPER(LE, <=) +// Implements the helper function for {ASSERT|EXPECT}_LT +GTEST_IMPL_CMP_HELPER(LT, < ) +// Implements the helper function for {ASSERT|EXPECT}_GE +GTEST_IMPL_CMP_HELPER(GE, >=) +// Implements the helper function for {ASSERT|EXPECT}_GT +GTEST_IMPL_CMP_HELPER(GT, > ) + +#undef GTEST_IMPL_CMP_HELPER + +// The helper function for {ASSERT|EXPECT}_STREQ. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual); + +// The helper function for {ASSERT|EXPECT}_STRCASEEQ. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual); + +// The helper function for {ASSERT|EXPECT}_STRNE. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2); + +// The helper function for {ASSERT|EXPECT}_STRCASENE. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +AssertionResult CmpHelperSTRCASENE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2); + + +// Helper function for *_STREQ on wide strings. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const wchar_t* expected, + const wchar_t* actual); + +// Helper function for *_STRNE on wide strings. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const wchar_t* s1, + const wchar_t* s2); + +} // namespace internal + +// IsSubstring() and IsNotSubstring() are intended to be used as the +// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by +// themselves. They check whether needle is a substring of haystack +// (NULL is considered a substring of itself only), and return an +// appropriate error message when they fail. +// +// The {needle,haystack}_expr arguments are the stringified +// expressions that generated the two real arguments. +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack); +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack); +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack); +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack); +#if GTEST_HAS_STD_STRING +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack); +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack); +#endif // GTEST_HAS_STD_STRING + +#if GTEST_HAS_STD_WSTRING +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack); +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack); +#endif // GTEST_HAS_STD_WSTRING + +namespace internal { + +// Helper template function for comparing floating-points. +// +// Template parameter: +// +// RawType: the raw floating-point type (either float or double) +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +template <typename RawType> +AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression, + const char* actual_expression, + RawType expected, + RawType actual) { + const FloatingPoint<RawType> lhs(expected), rhs(actual); + + if (lhs.AlmostEquals(rhs)) { + return AssertionSuccess(); + } + + StrStream expected_ss; + expected_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2) + << expected; + + StrStream actual_ss; + actual_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2) + << actual; + + return EqFailure(expected_expression, + actual_expression, + StrStreamToString(&expected_ss), + StrStreamToString(&actual_ss), + false); +} + +// Helper function for implementing ASSERT_NEAR. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +AssertionResult DoubleNearPredFormat(const char* expr1, + const char* expr2, + const char* abs_error_expr, + double val1, + double val2, + double abs_error); + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// A class that enables one to stream messages to assertion macros +class AssertHelper { + public: + // Constructor. + AssertHelper(TestPartResultType type, const char* file, int line, + const char* message); + // Message assignment is a semantic trick to enable assertion + // streaming; see the GTEST_MESSAGE macro below. + void operator=(const Message& message) const; + private: + TestPartResultType const type_; + const char* const file_; + int const line_; + String const message_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(AssertHelper); +}; + +} // namespace internal + +// Macros for indicating success/failure in test code. + +// ADD_FAILURE unconditionally adds a failure to the current test. +// SUCCEED generates a success - it doesn't automatically make the +// current test successful, as a test is only successful when it has +// no failure. +// +// EXPECT_* verifies that a certain condition is satisfied. If not, +// it behaves like ADD_FAILURE. In particular: +// +// EXPECT_TRUE verifies that a Boolean condition is true. +// EXPECT_FALSE verifies that a Boolean condition is false. +// +// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except +// that they will also abort the current function on failure. People +// usually want the fail-fast behavior of FAIL and ASSERT_*, but those +// writing data-driven tests often find themselves using ADD_FAILURE +// and EXPECT_* more. +// +// Examples: +// +// EXPECT_TRUE(server.StatusIsOK()); +// ASSERT_FALSE(server.HasPendingRequest(port)) +// << "There are still pending requests " << "on port " << port; + +// Generates a nonfatal failure with a generic message. +#define ADD_FAILURE() GTEST_NONFATAL_FAILURE("Failed") + +// Generates a fatal failure with a generic message. +#define FAIL() GTEST_FATAL_FAILURE("Failed") + +// Generates a success with a generic message. +#define SUCCEED() GTEST_SUCCESS("Succeeded") + +// Boolean assertions. +#define EXPECT_TRUE(condition) \ + GTEST_TEST_BOOLEAN(condition, #condition, false, true, \ + GTEST_NONFATAL_FAILURE) +#define EXPECT_FALSE(condition) \ + GTEST_TEST_BOOLEAN(!(condition), #condition, true, false, \ + GTEST_NONFATAL_FAILURE) +#define ASSERT_TRUE(condition) \ + GTEST_TEST_BOOLEAN(condition, #condition, false, true, \ + GTEST_FATAL_FAILURE) +#define ASSERT_FALSE(condition) \ + GTEST_TEST_BOOLEAN(!(condition), #condition, true, false, \ + GTEST_FATAL_FAILURE) + +// Includes the auto-generated header that implements a family of +// generic predicate assertion macros. +#include <gtest/gtest_pred_impl.h> + +// Macros for testing equalities and inequalities. +// +// * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual +// * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2 +// * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2 +// * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2 +// * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2 +// * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2 +// +// When they are not, Google Test prints both the tested expressions and +// their actual values. The values must be compatible built-in types, +// or you will get a compiler error. By "compatible" we mean that the +// values can be compared by the respective operator. +// +// Note: +// +// 1. It is possible to make a user-defined type work with +// {ASSERT|EXPECT}_??(), but that requires overloading the +// comparison operators and is thus discouraged by the Google C++ +// Usage Guide. Therefore, you are advised to use the +// {ASSERT|EXPECT}_TRUE() macro to assert that two objects are +// equal. +// +// 2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on +// pointers (in particular, C strings). Therefore, if you use it +// with two C strings, you are testing how their locations in memory +// are related, not how their content is related. To compare two C +// strings by content, use {ASSERT|EXPECT}_STR*(). +// +// 3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to +// {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you +// what the actual value is when it fails, and similarly for the +// other comparisons. +// +// 4. Do not depend on the order in which {ASSERT|EXPECT}_??() +// evaluate their arguments, which is undefined. +// +// 5. These macros evaluate their arguments exactly once. +// +// Examples: +// +// EXPECT_NE(5, Foo()); +// EXPECT_EQ(NULL, a_pointer); +// ASSERT_LT(i, array_size); +// ASSERT_GT(records.size(), 0) << "There is no record left."; + +#define EXPECT_EQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal:: \ + EqHelper<GTEST_IS_NULL_LITERAL(expected)>::Compare, \ + expected, actual) +#define EXPECT_NE(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual) +#define EXPECT_LE(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) +#define EXPECT_LT(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) +#define EXPECT_GE(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) +#define EXPECT_GT(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) + +#define ASSERT_EQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal:: \ + EqHelper<GTEST_IS_NULL_LITERAL(expected)>::Compare, \ + expected, actual) +#define ASSERT_NE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2) +#define ASSERT_LE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) +#define ASSERT_LT(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) +#define ASSERT_GE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) +#define ASSERT_GT(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) + +// C String Comparisons. All tests treat NULL and any non-NULL string +// as different. Two NULLs are equal. +// +// * {ASSERT|EXPECT}_STREQ(s1, s2): Tests that s1 == s2 +// * {ASSERT|EXPECT}_STRNE(s1, s2): Tests that s1 != s2 +// * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case +// * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case +// +// For wide or narrow string objects, you can use the +// {ASSERT|EXPECT}_??() macros. +// +// Don't depend on the order in which the arguments are evaluated, +// which is undefined. +// +// These macros evaluate their arguments exactly once. + +#define EXPECT_STREQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual) +#define EXPECT_STRNE(s1, s2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) +#define EXPECT_STRCASEEQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual) +#define EXPECT_STRCASENE(s1, s2)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) + +#define ASSERT_STREQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual) +#define ASSERT_STRNE(s1, s2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) +#define ASSERT_STRCASEEQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual) +#define ASSERT_STRCASENE(s1, s2)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) + +// Macros for comparing floating-point numbers. +// +// * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual): +// Tests that two float values are almost equal. +// * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual): +// Tests that two double values are almost equal. +// * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error): +// Tests that v1 and v2 are within the given distance to each other. +// +// Google Test uses ULP-based comparison to automatically pick a default +// error bound that is appropriate for the operands. See the +// FloatingPoint template class in gtest-internal.h if you are +// interested in the implementation details. + +#define EXPECT_FLOAT_EQ(expected, actual)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \ + expected, actual) + +#define EXPECT_DOUBLE_EQ(expected, actual)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \ + expected, actual) + +#define ASSERT_FLOAT_EQ(expected, actual)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \ + expected, actual) + +#define ASSERT_DOUBLE_EQ(expected, actual)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \ + expected, actual) + +#define EXPECT_NEAR(val1, val2, abs_error)\ + EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ + val1, val2, abs_error) + +#define ASSERT_NEAR(val1, val2, abs_error)\ + ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ + val1, val2, abs_error) + +// These predicate format functions work on floating-point values, and +// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g. +// +// EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0); + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult FloatLE(const char* expr1, const char* expr2, + float val1, float val2); +AssertionResult DoubleLE(const char* expr1, const char* expr2, + double val1, double val2); + + +#ifdef GTEST_OS_WINDOWS + +// Macros that test for HRESULT failure and success, these are only useful +// on Windows, and rely on Windows SDK macros and APIs to compile. +// +// * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr) +// +// When expr unexpectedly fails or succeeds, Google Test prints the expected result +// and the actual result with both a human-readable string representation of +// the error, if available, as well as the hex result code. +#define EXPECT_HRESULT_SUCCEEDED(expr) \ + EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) + +#define ASSERT_HRESULT_SUCCEEDED(expr) \ + ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) + +#define EXPECT_HRESULT_FAILED(expr) \ + EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) + +#define ASSERT_HRESULT_FAILED(expr) \ + ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) + +#endif // GTEST_OS_WINDOWS + + +// Causes a trace (including the source file path, the current line +// number, and the given message) to be included in every test failure +// message generated by code in the current scope. The effect is +// undone when the control leaves the current scope. +// +// The message argument can be anything streamable to std::ostream. +// +// In the implementation, we include the current line number as part +// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s +// to appear in the same block - as long as they are on different +// lines. +#define SCOPED_TRACE(message) \ + ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN(gtest_trace_, __LINE__)(\ + __FILE__, __LINE__, ::testing::Message() << (message)) + + +// Defines a test. +// +// The first parameter is the name of the test case, and the second +// parameter is the name of the test within the test case. +// +// The convention is to end the test case name with "Test". For +// example, a test case for the Foo class can be named FooTest. +// +// The user should put his test code between braces after using this +// macro. Example: +// +// TEST(FooTest, InitializesCorrectly) { +// Foo foo; +// EXPECT_TRUE(foo.StatusIsOK()); +// } + +#define TEST(test_case_name, test_name)\ + GTEST_TEST(test_case_name, test_name, ::testing::Test) + + +// Defines a test that uses a test fixture. +// +// The first parameter is the name of the test fixture class, which +// also doubles as the test case name. The second parameter is the +// name of the test within the test case. +// +// A test fixture class must be declared earlier. The user should put +// his test code between braces after using this macro. Example: +// +// class FooTest : public testing::Test { +// protected: +// virtual void SetUp() { b_.AddElement(3); } +// +// Foo a_; +// Foo b_; +// }; +// +// TEST_F(FooTest, InitializesCorrectly) { +// EXPECT_TRUE(a_.StatusIsOK()); +// } +// +// TEST_F(FooTest, ReturnsElementCountCorrectly) { +// EXPECT_EQ(0, a_.size()); +// EXPECT_EQ(1, b_.size()); +// } + +#define TEST_F(test_fixture, test_name)\ + GTEST_TEST(test_fixture, test_name, test_fixture) + +// Use this macro in main() to run all tests. It returns 0 if all +// tests are successful, or 1 otherwise. +// +// RUN_ALL_TESTS() should be invoked after the command line has been +// parsed by InitGoogleTest(). + +#define RUN_ALL_TESTS()\ + (::testing::UnitTest::GetInstance()->Run()) + +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_H_ diff --git a/src/gtest/gtest_main.cc b/src/gtest/gtest_main.cc new file mode 100644 index 0000000..d20c02f --- /dev/null +++ b/src/gtest/gtest_main.cc @@ -0,0 +1,39 @@ +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <iostream> + +#include <gtest/gtest.h> + +int main(int argc, char **argv) { + std::cout << "Running main() from gtest_main.cc\n"; + + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/gtest/gtest_pred_impl.h b/src/gtest/gtest_pred_impl.h new file mode 100644 index 0000000..984f793 --- /dev/null +++ b/src/gtest/gtest_pred_impl.h @@ -0,0 +1,368 @@ +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is AUTOMATICALLY GENERATED on 06/22/2008 by command +// 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND! +// +// Implements a family of generic predicate assertion macros. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ + +// Makes sure this header is not included before gtest.h. +#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ +#error Do not include gtest_pred_impl.h directly. Include gtest.h instead. +#endif // GTEST_INCLUDE_GTEST_GTEST_H_ + +// This header implements a family of generic predicate assertion +// macros: +// +// ASSERT_PRED_FORMAT1(pred_format, v1) +// ASSERT_PRED_FORMAT2(pred_format, v1, v2) +// ... +// +// where pred_format is a function or functor that takes n (in the +// case of ASSERT_PRED_FORMATn) values and their source expression +// text, and returns a testing::AssertionResult. See the definition +// of ASSERT_EQ in gtest.h for an example. +// +// If you don't care about formatting, you can use the more +// restrictive version: +// +// ASSERT_PRED1(pred, v1) +// ASSERT_PRED2(pred, v1, v2) +// ... +// +// where pred is an n-ary function or functor that returns bool, +// and the values v1, v2, ..., must support the << operator for +// streaming to std::ostream. +// +// We also define the EXPECT_* variations. +// +// For now we only support predicates whose arity is at most 5. +// Please email googletestframework@googlegroups.com if you need +// support for higher arities. + +// GTEST_ASSERT is the basic statement to which all of the assertions +// in this file reduce. Don't use this in your code. + +#define GTEST_ASSERT(expression, on_failure) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER \ + if (const ::testing::AssertionResult gtest_ar = (expression)) \ + ; \ + else \ + on_failure(gtest_ar.failure_message()) + + +// Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use +// this in your code. +template <typename Pred, + typename T1> +AssertionResult AssertPred1Helper(const char* pred_text, + const char* e1, + Pred pred, + const T1& v1) { + if (pred(v1)) return AssertionSuccess(); + + Message msg; + msg << pred_text << "(" + << e1 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1; + return AssertionFailure(msg); +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1. +// Don't use this in your code. +#define GTEST_PRED_FORMAT1(pred_format, v1, on_failure)\ + GTEST_ASSERT(pred_format(#v1, v1),\ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use +// this in your code. +#define GTEST_PRED1(pred, v1, on_failure)\ + GTEST_ASSERT(::testing::AssertPred1Helper(#pred, \ + #v1, \ + pred, \ + v1), on_failure) + +// Unary predicate assertion macros. +#define EXPECT_PRED_FORMAT1(pred_format, v1) \ + GTEST_PRED_FORMAT1(pred_format, v1, GTEST_NONFATAL_FAILURE) +#define EXPECT_PRED1(pred, v1) \ + GTEST_PRED1(pred, v1, GTEST_NONFATAL_FAILURE) +#define ASSERT_PRED_FORMAT1(pred_format, v1) \ + GTEST_PRED_FORMAT1(pred_format, v1, GTEST_FATAL_FAILURE) +#define ASSERT_PRED1(pred, v1) \ + GTEST_PRED1(pred, v1, GTEST_FATAL_FAILURE) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use +// this in your code. +template <typename Pred, + typename T1, + typename T2> +AssertionResult AssertPred2Helper(const char* pred_text, + const char* e1, + const char* e2, + Pred pred, + const T1& v1, + const T2& v2) { + if (pred(v1, v2)) return AssertionSuccess(); + + Message msg; + msg << pred_text << "(" + << e1 << ", " + << e2 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2; + return AssertionFailure(msg); +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2. +// Don't use this in your code. +#define GTEST_PRED_FORMAT2(pred_format, v1, v2, on_failure)\ + GTEST_ASSERT(pred_format(#v1, #v2, v1, v2),\ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use +// this in your code. +#define GTEST_PRED2(pred, v1, v2, on_failure)\ + GTEST_ASSERT(::testing::AssertPred2Helper(#pred, \ + #v1, \ + #v2, \ + pred, \ + v1, \ + v2), on_failure) + +// Binary predicate assertion macros. +#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \ + GTEST_PRED_FORMAT2(pred_format, v1, v2, GTEST_NONFATAL_FAILURE) +#define EXPECT_PRED2(pred, v1, v2) \ + GTEST_PRED2(pred, v1, v2, GTEST_NONFATAL_FAILURE) +#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \ + GTEST_PRED_FORMAT2(pred_format, v1, v2, GTEST_FATAL_FAILURE) +#define ASSERT_PRED2(pred, v1, v2) \ + GTEST_PRED2(pred, v1, v2, GTEST_FATAL_FAILURE) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use +// this in your code. +template <typename Pred, + typename T1, + typename T2, + typename T3> +AssertionResult AssertPred3Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3) { + if (pred(v1, v2, v3)) return AssertionSuccess(); + + Message msg; + msg << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3; + return AssertionFailure(msg); +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3. +// Don't use this in your code. +#define GTEST_PRED_FORMAT3(pred_format, v1, v2, v3, on_failure)\ + GTEST_ASSERT(pred_format(#v1, #v2, #v3, v1, v2, v3),\ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use +// this in your code. +#define GTEST_PRED3(pred, v1, v2, v3, on_failure)\ + GTEST_ASSERT(::testing::AssertPred3Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + pred, \ + v1, \ + v2, \ + v3), on_failure) + +// Ternary predicate assertion macros. +#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \ + GTEST_PRED_FORMAT3(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE) +#define EXPECT_PRED3(pred, v1, v2, v3) \ + GTEST_PRED3(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE) +#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \ + GTEST_PRED_FORMAT3(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE) +#define ASSERT_PRED3(pred, v1, v2, v3) \ + GTEST_PRED3(pred, v1, v2, v3, GTEST_FATAL_FAILURE) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use +// this in your code. +template <typename Pred, + typename T1, + typename T2, + typename T3, + typename T4> +AssertionResult AssertPred4Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + const char* e4, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4) { + if (pred(v1, v2, v3, v4)) return AssertionSuccess(); + + Message msg; + msg << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ", " + << e4 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3 + << "\n" << e4 << " evaluates to " << v4; + return AssertionFailure(msg); +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4. +// Don't use this in your code. +#define GTEST_PRED_FORMAT4(pred_format, v1, v2, v3, v4, on_failure)\ + GTEST_ASSERT(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4),\ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use +// this in your code. +#define GTEST_PRED4(pred, v1, v2, v3, v4, on_failure)\ + GTEST_ASSERT(::testing::AssertPred4Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + #v4, \ + pred, \ + v1, \ + v2, \ + v3, \ + v4), on_failure) + +// 4-ary predicate assertion macros. +#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ + GTEST_PRED_FORMAT4(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE) +#define EXPECT_PRED4(pred, v1, v2, v3, v4) \ + GTEST_PRED4(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE) +#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ + GTEST_PRED_FORMAT4(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE) +#define ASSERT_PRED4(pred, v1, v2, v3, v4) \ + GTEST_PRED4(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use +// this in your code. +template <typename Pred, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5> +AssertionResult AssertPred5Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + const char* e4, + const char* e5, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4, + const T5& v5) { + if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess(); + + Message msg; + msg << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ", " + << e4 << ", " + << e5 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3 + << "\n" << e4 << " evaluates to " << v4 + << "\n" << e5 << " evaluates to " << v5; + return AssertionFailure(msg); +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5. +// Don't use this in your code. +#define GTEST_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5, on_failure)\ + GTEST_ASSERT(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5),\ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use +// this in your code. +#define GTEST_PRED5(pred, v1, v2, v3, v4, v5, on_failure)\ + GTEST_ASSERT(::testing::AssertPred5Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + #v4, \ + #v5, \ + pred, \ + v1, \ + v2, \ + v3, \ + v4, \ + v5), on_failure) + +// 5-ary predicate assertion macros. +#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ + GTEST_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE) +#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \ + GTEST_PRED5(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE) +#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ + GTEST_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE) +#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \ + GTEST_PRED5(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE) + + + +#endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ diff --git a/src/gtest/gtest_prod.h b/src/gtest/gtest_prod.h new file mode 100644 index 0000000..da80ddc --- /dev/null +++ b/src/gtest/gtest_prod.h @@ -0,0 +1,58 @@ +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Google C++ Testing Framework definitions useful in production code. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_ + +// When you need to test the private or protected members of a class, +// use the FRIEND_TEST macro to declare your tests as friends of the +// class. For example: +// +// class MyClass { +// private: +// void MyMethod(); +// FRIEND_TEST(MyClassTest, MyMethod); +// }; +// +// class MyClassTest : public testing::Test { +// // ... +// }; +// +// TEST_F(MyClassTest, MyMethod) { +// // Can call MyClass::MyMethod() here. +// } + +#define FRIEND_TEST(test_case_name, test_name)\ +friend class test_case_name##_##test_name##_Test + +#endif // GTEST_INCLUDE_GTEST_GTEST_PROD_H_ diff --git a/src/gtest/internal/gtest-death-test-internal.h b/src/gtest/internal/gtest-death-test-internal.h new file mode 100644 index 0000000..b49c6e4 --- /dev/null +++ b/src/gtest/internal/gtest-death-test-internal.h @@ -0,0 +1,201 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines internal utilities needed for implementing +// death tests. They are subject to change without notice. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ + +#include <gtest/internal/gtest-internal.h> + +namespace testing { +namespace internal { + +GTEST_DECLARE_string(internal_run_death_test); + +// Names of the flags (needed for parsing Google Test flags). +const char kDeathTestStyleFlag[] = "death_test_style"; +const char kInternalRunDeathTestFlag[] = "internal_run_death_test"; + +#ifdef GTEST_HAS_DEATH_TEST + +// DeathTest is a class that hides much of the complexity of the +// GTEST_DEATH_TEST macro. It is abstract; its static Create method +// returns a concrete class that depends on the prevailing death test +// style, as defined by the --gtest_death_test_style and/or +// --gtest_internal_run_death_test flags. + +// In describing the results of death tests, these terms are used with +// the corresponding definitions: +// +// exit status: The integer exit information in the format specified +// by wait(2) +// exit code: The integer code passed to exit(3), _exit(2), or +// returned from main() +class DeathTest { + public: + // Create returns false if there was an error determining the + // appropriate action to take for the current death test; for example, + // if the gtest_death_test_style flag is set to an invalid value. + // The LastMessage method will return a more detailed message in that + // case. Otherwise, the DeathTest pointer pointed to by the "test" + // argument is set. If the death test should be skipped, the pointer + // is set to NULL; otherwise, it is set to the address of a new concrete + // DeathTest object that controls the execution of the current test. + static bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test); + DeathTest(); + virtual ~DeathTest() { } + + // A helper class that aborts a death test when it's deleted. + class ReturnSentinel { + public: + explicit ReturnSentinel(DeathTest* test) : test_(test) { } + ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); } + private: + DeathTest* const test_; + GTEST_DISALLOW_COPY_AND_ASSIGN(ReturnSentinel); + } GTEST_ATTRIBUTE_UNUSED; + + // An enumeration of possible roles that may be taken when a death + // test is encountered. EXECUTE means that the death test logic should + // be executed immediately. OVERSEE means that the program should prepare + // the appropriate environment for a child process to execute the death + // test, then wait for it to complete. + enum TestRole { OVERSEE_TEST, EXECUTE_TEST }; + + // An enumeration of the two reasons that a test might be aborted. + enum AbortReason { TEST_ENCOUNTERED_RETURN_STATEMENT, TEST_DID_NOT_DIE }; + + // Assumes one of the above roles. + virtual TestRole AssumeRole() = 0; + + // Waits for the death test to finish and returns its status. + virtual int Wait() = 0; + + // Returns true if the death test passed; that is, the test process + // exited during the test, its exit status matches a user-supplied + // predicate, and its stderr output matches a user-supplied regular + // expression. + // The user-supplied predicate may be a macro expression rather + // than a function pointer or functor, or else Wait and Passed could + // be combined. + virtual bool Passed(bool exit_status_ok) = 0; + + // Signals that the death test did not die as expected. + virtual void Abort(AbortReason reason) = 0; + + // Returns a human-readable outcome message regarding the outcome of + // the last death test. + static const char* LastMessage(); + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN(DeathTest); +}; + +// Factory interface for death tests. May be mocked out for testing. +class DeathTestFactory { + public: + virtual ~DeathTestFactory() { } + virtual bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test) = 0; +}; + +// A concrete DeathTestFactory implementation for normal use. +class DefaultDeathTestFactory : public DeathTestFactory { + public: + virtual bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test); +}; + +// Returns true if exit_status describes a process that was terminated +// by a signal, or exited normally with a nonzero exit code. +bool ExitedUnsuccessfully(int exit_status); + +// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*, +// ASSERT_EXIT*, and EXPECT_EXIT*. +#define GTEST_DEATH_TEST(statement, predicate, regex, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER \ + if (true) { \ + const ::testing::internal::RE& gtest_regex = (regex); \ + ::testing::internal::DeathTest* gtest_dt; \ + if (!::testing::internal::DeathTest::Create(#statement, >est_regex, \ + __FILE__, __LINE__, >est_dt)) { \ + goto GTEST_CONCAT_TOKEN(gtest_label_, __LINE__); \ + } \ + if (gtest_dt != NULL) { \ + ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \ + gtest_dt_ptr(gtest_dt); \ + switch (gtest_dt->AssumeRole()) { \ + case ::testing::internal::DeathTest::OVERSEE_TEST: \ + if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \ + goto GTEST_CONCAT_TOKEN(gtest_label_, __LINE__); \ + } \ + break; \ + case ::testing::internal::DeathTest::EXECUTE_TEST: { \ + ::testing::internal::DeathTest::ReturnSentinel \ + gtest_sentinel(gtest_dt); \ + { statement; } \ + gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \ + break; \ + } \ + } \ + } \ + } else \ + GTEST_CONCAT_TOKEN(gtest_label_, __LINE__): \ + fail(::testing::internal::DeathTest::LastMessage()) +// The symbol "fail" here expands to something into which a message +// can be streamed. + +// A struct representing the parsed contents of the +// --gtest_internal_run_death_test flag, as it existed when +// RUN_ALL_TESTS was called. +struct InternalRunDeathTestFlag { + String file; + int line; + int index; + int status_fd; +}; + +// Returns a newly created InternalRunDeathTestFlag object with fields +// initialized from the GTEST_FLAG(internal_run_death_test) flag if +// the flag is specified; otherwise returns NULL. +InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag(); + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ diff --git a/src/gtest/internal/gtest-filepath.h b/src/gtest/internal/gtest-filepath.h new file mode 100644 index 0000000..308a2c6 --- /dev/null +++ b/src/gtest/internal/gtest-filepath.h @@ -0,0 +1,156 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: keith.ray@gmail.com (Keith Ray) +// +// Google Test filepath utilities +// +// This header file declares classes and functions used internally by +// Google Test. They are subject to change without notice. +// +// This file is #included in testing/base/internal/gtest-internal.h +// Do not include this header file separately! + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ + +#include <gtest/internal/gtest-string.h> + +namespace testing { +namespace internal { + +// FilePath - a class for file and directory pathname manipulation which +// handles platform-specific conventions (like the pathname separator). +// Used for helper functions for naming files in a directory for xml output. +// Except for Set methods, all methods are const or static, which provides an +// "immutable value object" -- useful for peace of mind. +// A FilePath with a value ending in a path separator ("like/this/") represents +// a directory, otherwise it is assumed to represent a file. In either case, +// it may or may not represent an actual file or directory in the file system. +// Names are NOT checked for syntax correctness -- no checking for illegal +// characters, malformed paths, etc. + +class FilePath { + public: + FilePath() : pathname_("") { } + FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { } + explicit FilePath(const char* pathname) : pathname_(pathname) { } + explicit FilePath(const String& pathname) : pathname_(pathname) { } + + void Set(const FilePath& rhs) { + pathname_ = rhs.pathname_; + } + + String ToString() const { return pathname_; } + const char* c_str() const { return pathname_.c_str(); } + + // Given directory = "dir", base_name = "test", number = 0, + // extension = "xml", returns "dir/test.xml". If number is greater + // than zero (e.g., 12), returns "dir/test_12.xml". + // On Windows platform, uses \ as the separator rather than /. + static FilePath MakeFileName(const FilePath& directory, + const FilePath& base_name, + int number, + const char* extension); + + // Returns a pathname for a file that does not currently exist. The pathname + // will be directory/base_name.extension or + // directory/base_name_<number>.extension if directory/base_name.extension + // already exists. The number will be incremented until a pathname is found + // that does not already exist. + // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. + // There could be a race condition if two or more processes are calling this + // function at the same time -- they could both pick the same filename. + static FilePath GenerateUniqueFileName(const FilePath& directory, + const FilePath& base_name, + const char* extension); + + // If input name has a trailing separator character, removes it and returns + // the name, otherwise return the name string unmodified. + // On Windows platform, uses \ as the separator, other platforms use /. + FilePath RemoveTrailingPathSeparator() const; + + // Returns a copy of the FilePath with the directory part removed. + // Example: FilePath("path/to/file").RemoveDirectoryName() returns + // FilePath("file"). If there is no directory part ("just_a_file"), it returns + // the FilePath unmodified. If there is no file part ("just_a_dir/") it + // returns an empty FilePath (""). + // On Windows platform, '\' is the path separator, otherwise it is '/'. + FilePath RemoveDirectoryName() const; + + // RemoveFileName returns the directory path with the filename removed. + // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". + // If the FilePath is "a_file" or "/a_file", RemoveFileName returns + // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does + // not have a file, like "just/a/dir/", it returns the FilePath unmodified. + // On Windows platform, '\' is the path separator, otherwise it is '/'. + FilePath RemoveFileName() const; + + // Returns a copy of the FilePath with the case-insensitive extension removed. + // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns + // FilePath("dir/file"). If a case-insensitive extension is not + // found, returns a copy of the original FilePath. + FilePath RemoveExtension(const char* extension) const; + + // Creates directories so that path exists. Returns true if successful or if + // the directories already exist; returns false if unable to create + // directories for any reason. Will also return false if the FilePath does + // not represent a directory (that is, it doesn't end with a path separator). + bool CreateDirectoriesRecursively() const; + + // Create the directory so that path exists. Returns true if successful or + // if the directory already exists; returns false if unable to create the + // directory for any reason, including if the parent directory does not + // exist. Not named "CreateDirectory" because that's a macro on Windows. + bool CreateFolder() const; + + // Returns true if FilePath describes something in the file-system, + // either a file, directory, or whatever, and that something exists. + bool FileOrDirectoryExists() const; + + // Returns true if pathname describes a directory in the file-system + // that exists. + bool DirectoryExists() const; + + // Returns true if FilePath ends with a path separator, which indicates that + // it is intended to represent a directory. Returns false otherwise. + // This does NOT check that a directory (or file) actually exists. + bool IsDirectory() const; + + private: + String pathname_; + + // Don't implement operator= because it is banned by the style guide. + FilePath& operator=(const FilePath& rhs); +}; // class FilePath + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ diff --git a/src/gtest/internal/gtest-internal.h b/src/gtest/internal/gtest-internal.h new file mode 100644 index 0000000..2eefc7b --- /dev/null +++ b/src/gtest/internal/gtest-internal.h @@ -0,0 +1,546 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file declares functions and macros used internally by +// Google Test. They are subject to change without notice. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ + +#include <gtest/internal/gtest-port.h> + +#ifdef GTEST_OS_LINUX +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#endif // GTEST_OS_LINUX + +#include <iomanip> // NOLINT +#include <limits> // NOLINT + +#include <gtest/internal/gtest-string.h> +#include <gtest/internal/gtest-filepath.h> + +// Due to C++ preprocessor weirdness, we need double indirection to +// concatenate two tokens when one of them is __LINE__. Writing +// +// foo ## __LINE__ +// +// will result in the token foo__LINE__, instead of foo followed by +// the current line number. For more details, see +// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6 +#define GTEST_CONCAT_TOKEN(foo, bar) GTEST_CONCAT_TOKEN_IMPL(foo, bar) +#define GTEST_CONCAT_TOKEN_IMPL(foo, bar) foo ## bar + +// Google Test defines the testing::Message class to allow construction of +// test messages via the << operator. The idea is that anything +// streamable to std::ostream can be streamed to a testing::Message. +// This allows a user to use his own types in Google Test assertions by +// overloading the << operator. +// +// util/gtl/stl_logging-inl.h overloads << for STL containers. These +// overloads cannot be defined in the std namespace, as that will be +// undefined behavior. Therefore, they are defined in the global +// namespace instead. +// +// C++'s symbol lookup rule (i.e. Koenig lookup) says that these +// overloads are visible in either the std namespace or the global +// namespace, but not other namespaces, including the testing +// namespace which Google Test's Message class is in. +// +// To allow STL containers (and other types that has a << operator +// defined in the global namespace) to be used in Google Test assertions, +// testing::Message must access the custom << operator from the global +// namespace. Hence this helper function. +// +// Note: Jeffrey Yasskin suggested an alternative fix by "using +// ::operator<<;" in the definition of Message's operator<<. That fix +// doesn't require a helper function, but unfortunately doesn't +// compile with MSVC. +template <typename T> +inline void GTestStreamToHelper(std::ostream* os, const T& val) { + *os << val; +} + +namespace testing { + +// Forward declaration of classes. + +class Message; // Represents a failure message. +class TestCase; // A collection of related tests. +class TestPartResult; // Result of a test part. +class TestInfo; // Information about a test. +class UnitTest; // A collection of test cases. +class UnitTestEventListenerInterface; // Listens to Google Test events. +class AssertionResult; // Result of an assertion. + +namespace internal { + +struct TraceInfo; // Information about a trace point. +class ScopedTrace; // Implements scoped trace. +class TestInfoImpl; // Opaque implementation of TestInfo +class TestResult; // Result of a single Test. +class UnitTestImpl; // Opaque implementation of UnitTest + +template <typename E> class List; // A generic list. +template <typename E> class ListNode; // A node in a generic list. + +// A secret type that Google Test users don't know about. It has no +// definition on purpose. Therefore it's impossible to create a +// Secret object, which is what we want. +class Secret; + +// Two overloaded helpers for checking at compile time whether an +// expression is a null pointer literal (i.e. NULL or any 0-valued +// compile-time integral constant). Their return values have +// different sizes, so we can use sizeof() to test which version is +// picked by the compiler. These helpers have no implementations, as +// we only need their signatures. +// +// Given IsNullLiteralHelper(x), the compiler will pick the first +// version if x can be implicitly converted to Secret*, and pick the +// second version otherwise. Since Secret is a secret and incomplete +// type, the only expression a user can write that has type Secret* is +// a null pointer literal. Therefore, we know that x is a null +// pointer literal if and only if the first version is picked by the +// compiler. +char IsNullLiteralHelper(Secret* p); +char (&IsNullLiteralHelper(...))[2]; // NOLINT + +// A compile-time bool constant that is true if and only if x is a +// null pointer literal (i.e. NULL or any 0-valued compile-time +// integral constant). +#ifdef __SYMBIAN32__ // Symbian +// Passing non-POD classes through ellipsis (...) crashes the ARM compiler. +// The Nokia Symbian compiler tries to instantiate a copy constructor for +// objects passed through ellipsis (...), failing for uncopyable objects. +// Hence we define this to false (and lose support for NULL detection). +#define GTEST_IS_NULL_LITERAL(x) false +#else // ! __SYMBIAN32__ +#define GTEST_IS_NULL_LITERAL(x) \ + (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1) +#endif // __SYMBIAN32__ + +// Appends the user-supplied message to the Google-Test-generated message. +String AppendUserMessage(const String& gtest_msg, + const Message& user_msg); + +// A helper class for creating scoped traces in user programs. +class ScopedTrace { + public: + // The c'tor pushes the given source file location and message onto + // a trace stack maintained by Google Test. + ScopedTrace(const char* file, int line, const Message& message); + + // The d'tor pops the info pushed by the c'tor. + // + // Note that the d'tor is not virtual in order to be efficient. + // Don't inherit from ScopedTrace! + ~ScopedTrace(); + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN(ScopedTrace); +} GTEST_ATTRIBUTE_UNUSED; // A ScopedTrace object does its job in its + // c'tor and d'tor. Therefore it doesn't + // need to be used otherwise. + +// Converts a streamable value to a String. A NULL pointer is +// converted to "(null)". When the input value is a ::string, +// ::std::string, ::wstring, or ::std::wstring object, each NUL +// character in it is replaced with "\\0". +// Declared here but defined in gtest.h, so that it has access +// to the definition of the Message class, required by the ARM +// compiler. +template <typename T> +String StreamableToString(const T& streamable); + +// Formats a value to be used in a failure message. + +#ifdef __SYMBIAN32__ + +// These are needed as the Nokia Symbian Compiler cannot decide between +// const T& and const T* in a function template. The Nokia compiler _can_ +// decide between class template specializations for T and T*, so a +// tr1::type_traits-like is_pointer works, and we can overload on that. + +// This overload makes sure that all pointers (including +// those to char or wchar_t) are printed as raw pointers. +template <typename T> +inline String FormatValueForFailureMessage(internal::true_type dummy, + T* pointer) { + return StreamableToString(static_cast<const void*>(pointer)); +} + +template <typename T> +inline String FormatValueForFailureMessage(internal::false_type dummy, + const T& value) { + return StreamableToString(value); +} + +template <typename T> +inline String FormatForFailureMessage(const T& value) { + return FormatValueForFailureMessage( + typename internal::is_pointer<T>::type(), value); +} + +#else + +template <typename T> +inline String FormatForFailureMessage(const T& value) { + return StreamableToString(value); +} + +// This overload makes sure that all pointers (including +// those to char or wchar_t) are printed as raw pointers. +template <typename T> +inline String FormatForFailureMessage(T* pointer) { + return StreamableToString(static_cast<const void*>(pointer)); +} + +#endif // __SYMBIAN32__ + +// These overloaded versions handle narrow and wide characters. +String FormatForFailureMessage(char ch); +String FormatForFailureMessage(wchar_t wchar); + +// When this operand is a const char* or char*, and the other operand +// is a ::std::string or ::string, we print this operand as a C string +// rather than a pointer. We do the same for wide strings. + +// This internal macro is used to avoid duplicated code. +#define GTEST_FORMAT_IMPL(operand2_type, operand1_printer)\ +inline String FormatForComparisonFailureMessage(\ + operand2_type::value_type* str, const operand2_type& /*operand2*/) {\ + return operand1_printer(str);\ +}\ +inline String FormatForComparisonFailureMessage(\ + const operand2_type::value_type* str, const operand2_type& /*operand2*/) {\ + return operand1_printer(str);\ +} + +#if GTEST_HAS_STD_STRING +GTEST_FORMAT_IMPL(::std::string, String::ShowCStringQuoted) +#endif // GTEST_HAS_STD_STRING +#if GTEST_HAS_STD_WSTRING +GTEST_FORMAT_IMPL(::std::wstring, String::ShowWideCStringQuoted) +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_STRING +GTEST_FORMAT_IMPL(::string, String::ShowCStringQuoted) +#endif // GTEST_HAS_GLOBAL_STRING +#if GTEST_HAS_GLOBAL_WSTRING +GTEST_FORMAT_IMPL(::wstring, String::ShowWideCStringQuoted) +#endif // GTEST_HAS_GLOBAL_WSTRING + +#undef GTEST_FORMAT_IMPL + +// Constructs and returns the message for an equality assertion +// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. +// +// The first four parameters are the expressions used in the assertion +// and their values, as strings. For example, for ASSERT_EQ(foo, bar) +// where foo is 5 and bar is 6, we have: +// +// expected_expression: "foo" +// actual_expression: "bar" +// expected_value: "5" +// actual_value: "6" +// +// The ignoring_case parameter is true iff the assertion is a +// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will +// be inserted into the message. +AssertionResult EqFailure(const char* expected_expression, + const char* actual_expression, + const String& expected_value, + const String& actual_value, + bool ignoring_case); + + +// This template class represents an IEEE floating-point number +// (either single-precision or double-precision, depending on the +// template parameters). +// +// The purpose of this class is to do more sophisticated number +// comparison. (Due to round-off error, etc, it's very unlikely that +// two floating-points will be equal exactly. Hence a naive +// comparison by the == operation often doesn't work.) +// +// Format of IEEE floating-point: +// +// The most-significant bit being the leftmost, an IEEE +// floating-point looks like +// +// sign_bit exponent_bits fraction_bits +// +// Here, sign_bit is a single bit that designates the sign of the +// number. +// +// For float, there are 8 exponent bits and 23 fraction bits. +// +// For double, there are 11 exponent bits and 52 fraction bits. +// +// More details can be found at +// http://en.wikipedia.org/wiki/IEEE_floating-point_standard. +// +// Template parameter: +// +// RawType: the raw floating-point type (either float or double) +template <typename RawType> +class FloatingPoint { + public: + // Defines the unsigned integer type that has the same size as the + // floating point number. + typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits; + + // Constants. + + // # of bits in a number. + static const size_t kBitCount = 8*sizeof(RawType); + + // # of fraction bits in a number. + static const size_t kFractionBitCount = + std::numeric_limits<RawType>::digits - 1; + + // # of exponent bits in a number. + static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount; + + // The mask for the sign bit. + static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1); + + // The mask for the fraction bits. + static const Bits kFractionBitMask = + ~static_cast<Bits>(0) >> (kExponentBitCount + 1); + + // The mask for the exponent bits. + static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask); + + // How many ULP's (Units in the Last Place) we want to tolerate when + // comparing two numbers. The larger the value, the more error we + // allow. A 0 value means that two numbers must be exactly the same + // to be considered equal. + // + // The maximum error of a single floating-point operation is 0.5 + // units in the last place. On Intel CPU's, all floating-point + // calculations are done with 80-bit precision, while double has 64 + // bits. Therefore, 4 should be enough for ordinary use. + // + // See the following article for more details on ULP: + // http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm. + static const size_t kMaxUlps = 4; + + // Constructs a FloatingPoint from a raw floating-point number. + // + // On an Intel CPU, passing a non-normalized NAN (Not a Number) + // around may change its bits, although the new value is guaranteed + // to be also a NAN. Therefore, don't expect this constructor to + // preserve the bits in x when x is a NAN. + explicit FloatingPoint(const RawType& x) : value_(x) {} + + // Static methods + + // Reinterprets a bit pattern as a floating-point number. + // + // This function is needed to test the AlmostEquals() method. + static RawType ReinterpretBits(const Bits bits) { + FloatingPoint fp(0); + fp.bits_ = bits; + return fp.value_; + } + + // Returns the floating-point number that represent positive infinity. + static RawType Infinity() { + return ReinterpretBits(kExponentBitMask); + } + + // Non-static methods + + // Returns the bits that represents this number. + const Bits &bits() const { return bits_; } + + // Returns the exponent bits of this number. + Bits exponent_bits() const { return kExponentBitMask & bits_; } + + // Returns the fraction bits of this number. + Bits fraction_bits() const { return kFractionBitMask & bits_; } + + // Returns the sign bit of this number. + Bits sign_bit() const { return kSignBitMask & bits_; } + + // Returns true iff this is NAN (not a number). + bool is_nan() const { + // It's a NAN if the exponent bits are all ones and the fraction + // bits are not entirely zeros. + return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0); + } + + // Returns true iff this number is at most kMaxUlps ULP's away from + // rhs. In particular, this function: + // + // - returns false if either number is (or both are) NAN. + // - treats really large numbers as almost equal to infinity. + // - thinks +0.0 and -0.0 are 0 DLP's apart. + bool AlmostEquals(const FloatingPoint& rhs) const { + // The IEEE standard says that any comparison operation involving + // a NAN must return false. + if (is_nan() || rhs.is_nan()) return false; + + return DistanceBetweenSignAndMagnitudeNumbers(bits_, rhs.bits_) <= kMaxUlps; + } + + private: + // Converts an integer from the sign-and-magnitude representation to + // the biased representation. More precisely, let N be 2 to the + // power of (kBitCount - 1), an integer x is represented by the + // unsigned number x + N. + // + // For instance, + // + // -N + 1 (the most negative number representable using + // sign-and-magnitude) is represented by 1; + // 0 is represented by N; and + // N - 1 (the biggest number representable using + // sign-and-magnitude) is represented by 2N - 1. + // + // Read http://en.wikipedia.org/wiki/Signed_number_representations + // for more details on signed number representations. + static Bits SignAndMagnitudeToBiased(const Bits &sam) { + if (kSignBitMask & sam) { + // sam represents a negative number. + return ~sam + 1; + } else { + // sam represents a positive number. + return kSignBitMask | sam; + } + } + + // Given two numbers in the sign-and-magnitude representation, + // returns the distance between them as an unsigned number. + static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1, + const Bits &sam2) { + const Bits biased1 = SignAndMagnitudeToBiased(sam1); + const Bits biased2 = SignAndMagnitudeToBiased(sam2); + return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1); + } + + union { + RawType value_; // The raw floating-point number. + Bits bits_; // The bits that represent the number. + }; +}; + +// Typedefs the instances of the FloatingPoint template class that we +// care to use. +typedef FloatingPoint<float> Float; +typedef FloatingPoint<double> Double; + +// In order to catch the mistake of putting tests that use different +// test fixture classes in the same test case, we need to assign +// unique IDs to fixture classes and compare them. The TypeId type is +// used to hold such IDs. The user should treat TypeId as an opaque +// type: the only operation allowed on TypeId values is to compare +// them for equality using the == operator. +typedef void* TypeId; + +// GetTypeId<T>() returns the ID of type T. Different values will be +// returned for different types. Calling the function twice with the +// same type argument is guaranteed to return the same ID. +template <typename T> +inline TypeId GetTypeId() { + static bool dummy = false; + // The compiler is required to create an instance of the static + // variable dummy for each T used to instantiate the template. + // Therefore, the address of dummy is guaranteed to be unique. + return &dummy; +} + +#ifdef GTEST_OS_WINDOWS + +// Predicate-formatters for implementing the HRESULT checking macros +// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED} +// We pass a long instead of HRESULT to avoid causing an +// include dependency for the HRESULT type. +AssertionResult IsHRESULTSuccess(const char* expr, long hr); // NOLINT +AssertionResult IsHRESULTFailure(const char* expr, long hr); // NOLINT + +#endif // GTEST_OS_WINDOWS + +} // namespace internal +} // namespace testing + +#define GTEST_MESSAGE(message, result_type) \ + ::testing::internal::AssertHelper(result_type, __FILE__, __LINE__, message) \ + = ::testing::Message() + +#define GTEST_FATAL_FAILURE(message) \ + return GTEST_MESSAGE(message, ::testing::TPRT_FATAL_FAILURE) + +#define GTEST_NONFATAL_FAILURE(message) \ + GTEST_MESSAGE(message, ::testing::TPRT_NONFATAL_FAILURE) + +#define GTEST_SUCCESS(message) \ + GTEST_MESSAGE(message, ::testing::TPRT_SUCCESS) + +#define GTEST_TEST_BOOLEAN(boolexpr, booltext, actual, expected, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER \ + if (boolexpr) \ + ; \ + else \ + fail("Value of: " booltext "\n Actual: " #actual "\nExpected: " #expected) + +// Helper macro for defining tests. +#define GTEST_TEST(test_case_name, test_name, parent_class)\ +class test_case_name##_##test_name##_Test : public parent_class {\ + public:\ + test_case_name##_##test_name##_Test() {}\ + static ::testing::Test* NewTest() {\ + return new test_case_name##_##test_name##_Test;\ + }\ + private:\ + virtual void TestBody();\ + static ::testing::TestInfo* const test_info_;\ + GTEST_DISALLOW_COPY_AND_ASSIGN(test_case_name##_##test_name##_Test);\ +};\ +\ +::testing::TestInfo* const test_case_name##_##test_name##_Test::test_info_ =\ + ::testing::TestInfo::MakeAndRegisterInstance(\ + #test_case_name, \ + #test_name, \ + ::testing::internal::GetTypeId< parent_class >(), \ + parent_class::SetUpTestCase, \ + parent_class::TearDownTestCase, \ + test_case_name##_##test_name##_Test::NewTest);\ +void test_case_name##_##test_name##_Test::TestBody() + + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ diff --git a/src/gtest/internal/gtest-port.h b/src/gtest/internal/gtest-port.h new file mode 100644 index 0000000..0c422cd --- /dev/null +++ b/src/gtest/internal/gtest-port.h @@ -0,0 +1,620 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan) +// +// Low-level types and utilities for porting Google Test to various +// platforms. They are subject to change without notice. DO NOT USE +// THEM IN USER CODE. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ + +// The user can define the following macros in the build script to +// control Google Test's behavior: +// +// GTEST_HAS_STD_STRING - Define it to 1/0 to indicate that +// std::string does/doesn't work (Google Test can +// be used where std::string is unavailable). +// Leave it undefined to let Google Test define it. +// GTEST_HAS_GLOBAL_STRING - Define it to 1/0 to indicate that ::string +// is/isn't available (some systems define +// ::string, which is different to std::string). +// Leave it undefined to let Google Test define it. +// GTEST_HAS_STD_WSTRING - Define it to 1/0 to indicate that +// std::wstring does/doesn't work (Google Test can +// be used where std::wstring is unavailable). +// Leave it undefined to let Google Test define it. +// GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string +// is/isn't available (some systems define +// ::wstring, which is different to std::wstring). +// Leave it undefined to let Google Test define it. + +// This header defines the following utilities: +// +// Macros indicating the name of the Google C++ Testing Framework project: +// GTEST_NAME - a string literal of the project name. +// GTEST_FLAG_PREFIX - a string literal of the prefix all Google +// Test flag names share. +// GTEST_FLAG_PREFIX_UPPER - a string literal of the prefix all Google +// Test flag names share, in upper case. +// +// Macros indicating the current platform: +// GTEST_OS_CYGWIN - defined iff compiled on Cygwin. +// GTEST_OS_LINUX - defined iff compiled on Linux. +// GTEST_OS_MAC - defined iff compiled on Mac OS X. +// GTEST_OS_WINDOWS - defined iff compiled on Windows. +// Note that it is possible that none of the GTEST_OS_ macros are defined. +// +// Macros indicating available Google Test features: +// GTEST_HAS_DEATH_TEST - defined iff death tests are supported. +// +// Macros for basic C++ coding: +// GTEST_AMBIGUOUS_ELSE_BLOCKER - for disabling a gcc warning. +// GTEST_ATTRIBUTE_UNUSED - declares that a class' instances don't have to +// be used. +// GTEST_DISALLOW_COPY_AND_ASSIGN() - disables copy ctor and operator=. +// GTEST_MUST_USE_RESULT - declares that a function's result must be used. +// +// Synchronization: +// Mutex, MutexLock, ThreadLocal, GetThreadCount() +// - synchronization primitives. +// +// Template meta programming: +// is_pointer - as in TR1; needed on Symbian only. +// +// Smart pointers: +// scoped_ptr - as in TR2. +// +// Regular expressions: +// RE - a simple regular expression class using the POSIX +// Extended Regular Expression syntax. Not available on +// Windows. +// +// Logging: +// GTEST_LOG() - logs messages at the specified severity level. +// LogToStderr() - directs all log messages to stderr. +// FlushInfoLog() - flushes informational log messages. +// +// Stderr capturing: +// CaptureStderr() - starts capturing stderr. +// GetCapturedStderr() - stops capturing stderr and returns the captured +// string. +// +// Integer types: +// TypeWithSize - maps an integer to a int type. +// Int32, UInt32, Int64, UInt64, TimeInMillis +// - integers of known sizes. +// BiggestInt - the biggest signed integer type. +// +// Command-line utilities: +// GTEST_FLAG() - references a flag. +// GTEST_DECLARE_*() - declares a flag. +// GTEST_DEFINE_*() - defines a flag. +// GetArgvs() - returns the command line as a vector of strings. +// +// Environment variable utilities: +// GetEnv() - gets the value of an environment variable. +// BoolFromGTestEnv() - parses a bool environment variable. +// Int32FromGTestEnv() - parses an Int32 environment variable. +// StringFromGTestEnv() - parses a string environment variable. + +#include <stdlib.h> +#include <stdio.h> + +#define GTEST_NAME "Google Test" +#define GTEST_FLAG_PREFIX "gtest_" +#define GTEST_FLAG_PREFIX_UPPER "GTEST_" + +// Determines the platform on which Google Test is compiled. +#ifdef __CYGWIN__ +#define GTEST_OS_CYGWIN +#elif defined _MSC_VER +// TODO(kenton@google.com): GTEST_OS_WINDOWS is currently used to mean +// both "The OS is Windows" and "The compiler is MSVC". These +// meanings really should be separated in order to better support +// Windows compilers other than MSVC. +#define GTEST_OS_WINDOWS +#elif defined __APPLE__ +#define GTEST_OS_MAC +#elif defined __linux__ +#define GTEST_OS_LINUX +#endif // _MSC_VER + +// Determines whether ::std::string and ::string are available. + +#ifndef GTEST_HAS_STD_STRING +// The user didn't tell us whether ::std::string is available, so we +// need to figure it out. + +#ifdef GTEST_OS_WINDOWS +// Assumes that exceptions are enabled by default. +#ifndef _HAS_EXCEPTIONS +#define _HAS_EXCEPTIONS 1 +#endif // _HAS_EXCEPTIONS +// GTEST_HAS_EXCEPTIONS is non-zero iff exceptions are enabled. It is +// always defined, while _HAS_EXCEPTIONS is defined only on Windows. +#define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS +// On Windows, we can use ::std::string if the compiler version is VS +// 2005 or above, or if exceptions are enabled. +#define GTEST_HAS_STD_STRING ((_MSC_VER >= 1400) || GTEST_HAS_EXCEPTIONS) +#else // We are on Linux or Mac OS. +#define GTEST_HAS_EXCEPTIONS 0 +#define GTEST_HAS_STD_STRING 1 +#endif // GTEST_OS_WINDOWS + +#endif // GTEST_HAS_STD_STRING + +#ifndef GTEST_HAS_GLOBAL_STRING +// The user didn't tell us whether ::string is available, so we need +// to figure it out. + +#define GTEST_HAS_GLOBAL_STRING 0 + +#endif // GTEST_HAS_GLOBAL_STRING + +#ifndef GTEST_HAS_STD_WSTRING +// The user didn't tell us whether ::std::wstring is available, so we need +// to figure it out. +// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring +// is available. + +#ifdef GTEST_OS_CYGWIN +// At least some versions of cygwin doesn't support ::std::wstring. +#define GTEST_HAS_STD_WSTRING 0 +#else +#define GTEST_HAS_STD_WSTRING GTEST_HAS_STD_STRING +#endif // GTEST_OS_CYGWIN + +#endif // GTEST_HAS_STD_WSTRING + +#ifndef GTEST_HAS_GLOBAL_WSTRING +// The user didn't tell us whether ::wstring is available, so we need +// to figure it out. +#define GTEST_HAS_GLOBAL_WSTRING GTEST_HAS_GLOBAL_STRING +#endif // GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_STRING || GTEST_HAS_GLOBAL_STRING || \ + GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING +#include <string> // NOLINT +#endif // GTEST_HAS_STD_STRING || GTEST_HAS_GLOBAL_STRING || + // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_STRING +#include <sstream> // NOLINT +#else +#include <strstream> // NOLINT +#endif // GTEST_HAS_STD_STRING + +// Determines whether to support death tests. +#if GTEST_HAS_STD_STRING && defined(GTEST_OS_LINUX) +#define GTEST_HAS_DEATH_TEST +// On some platforms, <regex.h> needs someone to define size_t, and +// won't compile otherwise. We can #include it here as we already +// included <stdlib.h>, which is guaranteed to define size_t through +// <stddef.h>. +#include <regex.h> +#include <vector> +#include <fcntl.h> +#include <sys/mman.h> +#endif // GTEST_HAS_STD_STRING && defined(GTEST_OS_LINUX) + +// Defines some utility macros. + +// The GNU compiler emits a warning if nested "if" statements are followed by +// an "else" statement and braces are not used to explicitly disambiguate the +// "else" binding. This leads to problems with code like: +// +// if (gate) +// ASSERT_*(condition) << "Some message"; +// +// The "switch (0) case 0:" idiom is used to suppress this. +#ifdef __INTEL_COMPILER +#define GTEST_AMBIGUOUS_ELSE_BLOCKER +#else +#define GTEST_AMBIGUOUS_ELSE_BLOCKER switch (0) case 0: // NOLINT +#endif + +// Use this annotation at the end of a struct / class definition to +// prevent the compiler from optimizing away instances that are never +// used. This is useful when all interesting logic happens inside the +// c'tor and / or d'tor. Example: +// +// struct Foo { +// Foo() { ... } +// } GTEST_ATTRIBUTE_UNUSED; +#if defined(GTEST_OS_WINDOWS) || (defined(GTEST_OS_LINUX) && defined(SWIG)) +#define GTEST_ATTRIBUTE_UNUSED +#else +#define GTEST_ATTRIBUTE_UNUSED __attribute__ ((unused)) +#endif // GTEST_OS_WINDOWS || (GTEST_OS_LINUX && SWIG) + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class. +#define GTEST_DISALLOW_COPY_AND_ASSIGN(type)\ + type(const type &);\ + void operator=(const type &) + +// Tell the compiler to warn about unused return values for functions declared +// with this macro. The macro should be used on function declarations +// following the argument list: +// +// Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT; +#if defined(__GNUC__) \ + && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \ + && !defined(COMPILER_ICC) +#define GTEST_MUST_USE_RESULT __attribute__ ((warn_unused_result)) +#else +#define GTEST_MUST_USE_RESULT +#endif // (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4) + +namespace testing { + +class Message; + +namespace internal { + +class String; + +// std::strstream is deprecated. However, we have to use it on +// Windows as std::stringstream won't compile on Windows when +// exceptions are disabled. We use std::stringstream on other +// platforms to avoid compiler warnings there. +#if GTEST_HAS_STD_STRING +typedef ::std::stringstream StrStream; +#else +typedef ::std::strstream StrStream; +#endif // GTEST_HAS_STD_STRING + +// Defines scoped_ptr. + +// This implementation of scoped_ptr is PARTIAL - it only contains +// enough stuff to satisfy Google Test's need. +template <typename T> +class scoped_ptr { + public: + explicit scoped_ptr(T* p = NULL) : ptr_(p) {} + ~scoped_ptr() { reset(); } + + T& operator*() const { return *ptr_; } + T* operator->() const { return ptr_; } + T* get() const { return ptr_; } + + T* release() { + T* const ptr = ptr_; + ptr_ = NULL; + return ptr; + } + + void reset(T* p = NULL) { + if (p != ptr_) { + if (sizeof(T) > 0) { // Makes sure T is a complete type. + delete ptr_; + } + ptr_ = p; + } + } + private: + T* ptr_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(scoped_ptr); +}; + +#ifdef GTEST_HAS_DEATH_TEST + +// Defines RE. Currently only needed for death tests. + +// A simple C++ wrapper for <regex.h>. It uses the POSIX Enxtended +// Regular Expression syntax. +class RE { + public: + // Constructs an RE from a string. +#if GTEST_HAS_STD_STRING + RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT +#endif // GTEST_HAS_STD_STRING + +#if GTEST_HAS_GLOBAL_STRING + RE(const ::string& regex) { Init(regex.c_str()); } // NOLINT +#endif // GTEST_HAS_GLOBAL_STRING + + RE(const char* regex) { Init(regex); } // NOLINT + ~RE(); + + // Returns the string representation of the regex. + const char* pattern() const { return pattern_; } + + // Returns true iff str contains regular expression re. + + // TODO(wan): make PartialMatch() work when str contains NUL + // characters. +#if GTEST_HAS_STD_STRING + static bool PartialMatch(const ::std::string& str, const RE& re) { + return PartialMatch(str.c_str(), re); + } +#endif // GTEST_HAS_STD_STRING + +#if GTEST_HAS_GLOBAL_STRING + static bool PartialMatch(const ::string& str, const RE& re) { + return PartialMatch(str.c_str(), re); + } +#endif // GTEST_HAS_GLOBAL_STRING + + static bool PartialMatch(const char* str, const RE& re); + + private: + void Init(const char* regex); + + // We use a const char* instead of a string, as Google Test may be used + // where string is not available. We also do not use Google Test's own + // String type here, in order to simplify dependencies between the + // files. + const char* pattern_; + regex_t regex_; + bool is_valid_; +}; + +#endif // GTEST_HAS_DEATH_TEST + +// Defines logging utilities: +// GTEST_LOG() - logs messages at the specified severity level. +// LogToStderr() - directs all log messages to stderr. +// FlushInfoLog() - flushes informational log messages. + +enum GTestLogSeverity { + GTEST_INFO, + GTEST_WARNING, + GTEST_ERROR, + GTEST_FATAL +}; + +void GTestLog(GTestLogSeverity severity, const char* file, + int line, const char* msg); + +#define GTEST_LOG(severity, msg)\ + ::testing::internal::GTestLog(\ + ::testing::internal::GTEST_##severity, __FILE__, __LINE__, \ + (::testing::Message() << (msg)).GetString().c_str()) + +inline void LogToStderr() {} +inline void FlushInfoLog() { fflush(NULL); } + +// Defines the stderr capturer: +// CaptureStderr - starts capturing stderr. +// GetCapturedStderr - stops capturing stderr and returns the captured string. + +#ifdef GTEST_HAS_DEATH_TEST + +// A copy of all command line arguments. Set by InitGoogleTest(). +extern ::std::vector<String> g_argvs; + +void CaptureStderr(); +// GTEST_HAS_DEATH_TEST implies we have ::std::string. +::std::string GetCapturedStderr(); +const ::std::vector<String>& GetArgvs(); + +#endif // GTEST_HAS_DEATH_TEST + +// Defines synchronization primitives. + +// A dummy implementation of synchronization primitives (mutex, lock, +// and thread-local variable). Necessary for compiling Google Test where +// mutex is not supported - using Google Test in multiple threads is not +// supported on such platforms. + +class Mutex { + public: + Mutex() {} + explicit Mutex(int /*unused*/) {} + void AssertHeld() const {} + enum { NO_CONSTRUCTOR_NEEDED_FOR_STATIC_MUTEX = 0 }; +}; + +// We cannot call it MutexLock directly as the ctor declaration would +// conflict with a macro named MutexLock, which is defined on some +// platforms. Hence the typedef trick below. +class GTestMutexLock { + public: + explicit GTestMutexLock(Mutex*) {} // NOLINT +}; + +typedef GTestMutexLock MutexLock; + +template <typename T> +class ThreadLocal { + public: + T* pointer() { return &value_; } + const T* pointer() const { return &value_; } + const T& get() const { return value_; } + void set(const T& value) { value_ = value; } + private: + T value_; +}; + +// There's no portable way to detect the number of threads, so we just +// return 0 to indicate that we cannot detect it. +inline size_t GetThreadCount() { return 0; } + +// Defines tr1::is_pointer (only needed for Symbian). + +#ifdef __SYMBIAN32__ + +// Symbian does not have tr1::type_traits, so we define our own is_pointer +// These are needed as the Nokia Symbian Compiler cannot decide between +// const T& and const T* in a function template. + +template <bool bool_value> +struct bool_constant { + typedef bool_constant<bool_value> type; + static const bool value = bool_value; +}; +template <bool bool_value> const bool bool_constant<bool_value>::value; + +typedef bool_constant<false> false_type; +typedef bool_constant<true> true_type; + +template <typename T> +struct is_pointer : public false_type {}; + +template <typename T> +struct is_pointer<T*> : public true_type {}; + +#endif // __SYMBIAN32__ + +// Defines BiggestInt as the biggest signed integer type the compiler +// supports. + +#ifdef GTEST_OS_WINDOWS +typedef __int64 BiggestInt; +#else +typedef long long BiggestInt; // NOLINT +#endif // GTEST_OS_WINDOWS + +// The maximum number a BiggestInt can represent. This definition +// works no matter BiggestInt is represented in one's complement or +// two's complement. +// +// We cannot rely on numeric_limits in STL, as __int64 and long long +// are not part of standard C++ and numeric_limits doesn't need to be +// defined for them. +const BiggestInt kMaxBiggestInt = + ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1)); + +// This template class serves as a compile-time function from size to +// type. It maps a size in bytes to a primitive type with that +// size. e.g. +// +// TypeWithSize<4>::UInt +// +// is typedef-ed to be unsigned int (unsigned integer made up of 4 +// bytes). +// +// Such functionality should belong to STL, but I cannot find it +// there. +// +// Google Test uses this class in the implementation of floating-point +// comparison. +// +// For now it only handles UInt (unsigned int) as that's all Google Test +// needs. Other types can be easily added in the future if need +// arises. +template <size_t size> +class TypeWithSize { + public: + // This prevents the user from using TypeWithSize<N> with incorrect + // values of N. + typedef void UInt; +}; + +// The specialization for size 4. +template <> +class TypeWithSize<4> { + public: + // unsigned int has size 4 in both gcc and MSVC. + // + // As base/basictypes.h doesn't compile on Windows, we cannot use + // uint32, uint64, and etc here. + typedef int Int; + typedef unsigned int UInt; +}; + +// The specialization for size 8. +template <> +class TypeWithSize<8> { + public: +#ifdef GTEST_OS_WINDOWS + typedef __int64 Int; + typedef unsigned __int64 UInt; +#else + typedef long long Int; // NOLINT + typedef unsigned long long UInt; // NOLINT +#endif // GTEST_OS_WINDOWS +}; + +// Integer types of known sizes. +typedef TypeWithSize<4>::Int Int32; +typedef TypeWithSize<4>::UInt UInt32; +typedef TypeWithSize<8>::Int Int64; +typedef TypeWithSize<8>::UInt UInt64; +typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds. + +// Utilities for command line flags and environment variables. + +// A wrapper for getenv() that works on Linux, Windows, and Mac OS. +inline const char* GetEnv(const char* name) { +#ifdef _WIN32_WCE // We are on Windows CE. + // CE has no environment variables. + return NULL; +#elif defined(GTEST_OS_WINDOWS) // We are on Windows proper. + // MSVC 8 deprecates getenv(), so we want to suppress warning 4996 + // (deprecated function) there. +#pragma warning(push) // Saves the current warning state. +#pragma warning(disable:4996) // Temporarily disables warning 4996. + return getenv(name); +#pragma warning(pop) // Restores the warning state. +#else // We are on Linux or Mac OS. + return getenv(name); +#endif +} + +// Macro for referencing flags. +#define GTEST_FLAG(name) FLAGS_gtest_##name + +// Macros for declaring flags. +#define GTEST_DECLARE_bool(name) extern bool GTEST_FLAG(name) +#define GTEST_DECLARE_int32(name) \ + extern ::testing::internal::Int32 GTEST_FLAG(name) +#define GTEST_DECLARE_string(name) \ + extern ::testing::internal::String GTEST_FLAG(name) + +// Macros for defining flags. +#define GTEST_DEFINE_bool(name, default_val, doc) \ + bool GTEST_FLAG(name) = (default_val) +#define GTEST_DEFINE_int32(name, default_val, doc) \ + ::testing::internal::Int32 GTEST_FLAG(name) = (default_val) +#define GTEST_DEFINE_string(name, default_val, doc) \ + ::testing::internal::String GTEST_FLAG(name) = (default_val) + +// Parses 'str' for a 32-bit signed integer. If successful, writes the result +// to *value and returns true; otherwise leaves *value unchanged and returns +// false. +// TODO(chandlerc): Find a better way to refactor flag and environment parsing +// out of both gtest-port.cc and gtest.cc to avoid exporting this utility +// function. +bool ParseInt32(const Message& src_text, const char* str, Int32* value); + +// Parses a bool/Int32/string from the environment variable +// corresponding to the given Google Test flag. +bool BoolFromGTestEnv(const char* flag, bool default_val); +Int32 Int32FromGTestEnv(const char* flag, Int32 default_val); +const char* StringFromGTestEnv(const char* flag, const char* default_val); + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ diff --git a/src/gtest/internal/gtest-string.h b/src/gtest/internal/gtest-string.h new file mode 100644 index 0000000..b5a303f --- /dev/null +++ b/src/gtest/internal/gtest-string.h @@ -0,0 +1,268 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file declares the String class and functions used internally by +// Google Test. They are subject to change without notice. They should not used +// by code external to Google Test. +// +// This header file is #included by testing/base/internal/gtest-internal.h. +// It should not be #included by other files. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ + +#include <string.h> +#include <gtest/internal/gtest-port.h> + +namespace testing { +namespace internal { + +// String - a UTF-8 string class. +// +// We cannot use std::string as Microsoft's STL implementation in +// Visual C++ 7.1 has problems when exception is disabled. There is a +// hack to work around this, but we've seen cases where the hack fails +// to work. +// +// Also, String is different from std::string in that it can represent +// both NULL and the empty string, while std::string cannot represent +// NULL. +// +// NULL and the empty string are considered different. NULL is less +// than anything (including the empty string) except itself. +// +// This class only provides minimum functionality necessary for +// implementing Google Test. We do not intend to implement a full-fledged +// string class here. +// +// Since the purpose of this class is to provide a substitute for +// std::string on platforms where it cannot be used, we define a copy +// constructor and assignment operators such that we don't need +// conditional compilation in a lot of places. +// +// In order to make the representation efficient, the d'tor of String +// is not virtual. Therefore DO NOT INHERIT FROM String. +class String { + public: + // Static utility methods + + // Returns the input if it's not NULL, otherwise returns "(null)". + // This function serves two purposes: + // + // 1. ShowCString(NULL) has type 'const char *', instead of the + // type of NULL (which is int). + // + // 2. In MSVC, streaming a null char pointer to StrStream generates + // an access violation, so we need to convert NULL to "(null)" + // before streaming it. + static inline const char* ShowCString(const char* c_str) { + return c_str ? c_str : "(null)"; + } + + // Returns the input enclosed in double quotes if it's not NULL; + // otherwise returns "(null)". For example, "\"Hello\"" is returned + // for input "Hello". + // + // This is useful for printing a C string in the syntax of a literal. + // + // Known issue: escape sequences are not handled yet. + static String ShowCStringQuoted(const char* c_str); + + // Clones a 0-terminated C string, allocating memory using new. The + // caller is responsible for deleting the return value using + // delete[]. Returns the cloned string, or NULL if the input is + // NULL. + // + // This is different from strdup() in string.h, which allocates + // memory using malloc(). + static const char* CloneCString(const char* c_str); + + // Compares two C strings. Returns true iff they have the same content. + // + // Unlike strcmp(), this function can handle NULL argument(s). A + // NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool CStringEquals(const char* lhs, const char* rhs); + + // Converts a wide C string to a String using the UTF-8 encoding. + // NULL will be converted to "(null)". If an error occurred during + // the conversion, "(failed to convert from wide string)" is + // returned. + static String ShowWideCString(const wchar_t* wide_c_str); + + // Similar to ShowWideCString(), except that this function encloses + // the converted string in double quotes. + static String ShowWideCStringQuoted(const wchar_t* wide_c_str); + + // Compares two wide C strings. Returns true iff they have the same + // content. + // + // Unlike wcscmp(), this function can handle NULL argument(s). A + // NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs); + + // Compares two C strings, ignoring case. Returns true iff they + // have the same content. + // + // Unlike strcasecmp(), this function can handle NULL argument(s). + // A NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool CaseInsensitiveCStringEquals(const char* lhs, + const char* rhs); + + // Formats a list of arguments to a String, using the same format + // spec string as for printf. + // + // We do not use the StringPrintf class as it is not universally + // available. + // + // The result is limited to 4096 characters (including the tailing + // 0). If 4096 characters are not enough to format the input, + // "<buffer exceeded>" is returned. + static String Format(const char* format, ...); + + // C'tors + + // The default c'tor constructs a NULL string. + String() : c_str_(NULL) {} + + // Constructs a String by cloning a 0-terminated C string. + String(const char* c_str) : c_str_(NULL) { // NOLINT + *this = c_str; + } + + // Constructs a String by copying a given number of chars from a + // buffer. E.g. String("hello", 3) will create the string "hel". + String(const char* buffer, size_t len); + + // The copy c'tor creates a new copy of the string. The two + // String objects do not share content. + String(const String& str) : c_str_(NULL) { + *this = str; + } + + // D'tor. String is intended to be a final class, so the d'tor + // doesn't need to be virtual. + ~String() { delete[] c_str_; } + + // Returns true iff this is an empty string (i.e. ""). + bool empty() const { + return (c_str_ != NULL) && (*c_str_ == '\0'); + } + + // Compares this with another String. + // Returns < 0 if this is less than rhs, 0 if this is equal to rhs, or > 0 + // if this is greater than rhs. + int Compare(const String& rhs) const; + + // Returns true iff this String equals the given C string. A NULL + // string and a non-NULL string are considered not equal. + bool operator==(const char* c_str) const { + return CStringEquals(c_str_, c_str); + } + + // Returns true iff this String doesn't equal the given C string. A NULL + // string and a non-NULL string are considered not equal. + bool operator!=(const char* c_str) const { + return !CStringEquals(c_str_, c_str); + } + + // Returns true iff this String ends with the given suffix. *Any* + // String is considered to end with a NULL or empty suffix. + bool EndsWith(const char* suffix) const; + + // Returns true iff this String ends with the given suffix, not considering + // case. Any String is considered to end with a NULL or empty suffix. + bool EndsWithCaseInsensitive(const char* suffix) const; + + // Returns the length of the encapsulated string, or -1 if the + // string is NULL. + int GetLength() const { + return c_str_ ? static_cast<int>(strlen(c_str_)) : -1; + } + + // Gets the 0-terminated C string this String object represents. + // The String object still owns the string. Therefore the caller + // should NOT delete the return value. + const char* c_str() const { return c_str_; } + + // Sets the 0-terminated C string this String object represents. + // The old string in this object is deleted, and this object will + // own a clone of the input string. This function copies only up to + // length bytes (plus a terminating null byte), or until the first + // null byte, whichever comes first. + // + // This function works even when the c_str parameter has the same + // value as that of the c_str_ field. + void Set(const char* c_str, size_t length); + + // Assigns a C string to this object. Self-assignment works. + const String& operator=(const char* c_str); + + // Assigns a String object to this object. Self-assignment works. + const String& operator=(const String &rhs) { + *this = rhs.c_str_; + return *this; + } + + private: + const char* c_str_; +}; + +// Streams a String to an ostream. +inline ::std::ostream& operator <<(::std::ostream& os, const String& str) { + // We call String::ShowCString() to convert NULL to "(null)". + // Otherwise we'll get an access violation on Windows. + return os << String::ShowCString(str.c_str()); +} + +// Gets the content of the StrStream's buffer as a String. Each '\0' +// character in the buffer is replaced with "\\0". +String StrStreamToString(StrStream* stream); + +// Converts a streamable value to a String. A NULL pointer is +// converted to "(null)". When the input value is a ::string, +// ::std::string, ::wstring, or ::std::wstring object, each NUL +// character in it is replaced with "\\0". + +// Declared here but defined in gtest.h, so that it has access +// to the definition of the Message class, required by the ARM +// compiler. +template <typename T> +String StreamableToString(const T& streamable); + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ diff --git a/src/gtest/src/gtest-internal-inl.h b/src/gtest/src/gtest-internal-inl.h new file mode 100644 index 0000000..2a7d71c --- /dev/null +++ b/src/gtest/src/gtest-internal-inl.h @@ -0,0 +1,1118 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Utility functions and classes used by the Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) +// +// This file contains purely Google Test's internal implementation. Please +// DO NOT #INCLUDE IT IN A USER PROGRAM. + +#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_ +#define GTEST_SRC_GTEST_INTERNAL_INL_H_ + +// GTEST_IMPLEMENTATION is defined iff the current translation unit is +// part of Google Test's implementation. +#ifndef GTEST_IMPLEMENTATION +// A user is trying to include this from his code - just say no. +#error "gtest-internal-inl.h is part of Google Test's internal implementation." +#error "It must not be included except by Google Test itself." +#endif // GTEST_IMPLEMENTATION + +#include <stddef.h> + +#include <gtest/internal/gtest-port.h> + +#ifdef GTEST_OS_WINDOWS +#include <windows.h> // NOLINT +#endif // GTEST_OS_WINDOWS + +#include <ostream> // NOLINT +#include <gtest/gtest.h> +#include <gtest/gtest-spi.h> + +namespace testing { + +// Declares the flags. +// +// We don't want the users to modify these flags in the code, but want +// Google Test's own unit tests to be able to access them. Therefore we +// declare them here as opposed to in gtest.h. +GTEST_DECLARE_bool(break_on_failure); +GTEST_DECLARE_bool(catch_exceptions); +GTEST_DECLARE_string(color); +GTEST_DECLARE_string(filter); +GTEST_DECLARE_bool(list_tests); +GTEST_DECLARE_string(output); +GTEST_DECLARE_int32(repeat); +GTEST_DECLARE_int32(stack_trace_depth); +GTEST_DECLARE_bool(show_internal_stack_frames); + +namespace internal { + +// Names of the flags (needed for parsing Google Test flags). +const char kBreakOnFailureFlag[] = "break_on_failure"; +const char kCatchExceptionsFlag[] = "catch_exceptions"; +const char kFilterFlag[] = "filter"; +const char kListTestsFlag[] = "list_tests"; +const char kOutputFlag[] = "output"; +const char kColorFlag[] = "color"; +const char kRepeatFlag[] = "repeat"; + +// This class saves the values of all Google Test flags in its c'tor, and +// restores them in its d'tor. +class GTestFlagSaver { + public: + // The c'tor. + GTestFlagSaver() { + break_on_failure_ = GTEST_FLAG(break_on_failure); + catch_exceptions_ = GTEST_FLAG(catch_exceptions); + color_ = GTEST_FLAG(color); + death_test_style_ = GTEST_FLAG(death_test_style); + filter_ = GTEST_FLAG(filter); + internal_run_death_test_ = GTEST_FLAG(internal_run_death_test); + list_tests_ = GTEST_FLAG(list_tests); + output_ = GTEST_FLAG(output); + repeat_ = GTEST_FLAG(repeat); + } + + // The d'tor is not virtual. DO NOT INHERIT FROM THIS CLASS. + ~GTestFlagSaver() { + GTEST_FLAG(break_on_failure) = break_on_failure_; + GTEST_FLAG(catch_exceptions) = catch_exceptions_; + GTEST_FLAG(color) = color_; + GTEST_FLAG(death_test_style) = death_test_style_; + GTEST_FLAG(filter) = filter_; + GTEST_FLAG(internal_run_death_test) = internal_run_death_test_; + GTEST_FLAG(list_tests) = list_tests_; + GTEST_FLAG(output) = output_; + GTEST_FLAG(repeat) = repeat_; + } + private: + // Fields for saving the original values of flags. + bool break_on_failure_; + bool catch_exceptions_; + String color_; + String death_test_style_; + String filter_; + String internal_run_death_test_; + bool list_tests_; + String output_; + bool pretty_; + internal::Int32 repeat_; +} GTEST_ATTRIBUTE_UNUSED; + +// Converts a Unicode code-point to its UTF-8 encoding. +String ToUtf8String(wchar_t wchar); + +// Returns the number of active threads, or 0 when there is an error. +size_t GetThreadCount(); + +// List is a simple singly-linked list container. +// +// We cannot use std::list as Microsoft's implementation of STL has +// problems when exception is disabled. There is a hack to work +// around this, but we've seen cases where the hack fails to work. +// +// TODO(wan): switch to std::list when we have a reliable fix for the +// STL problem, e.g. when we upgrade to the next version of Visual +// C++, or (more likely) switch to STLport. +// +// The element type must support copy constructor. + +// Forward declare List +template <typename E> // E is the element type. +class List; + +// ListNode is a node in a singly-linked list. It consists of an +// element and a pointer to the next node. The last node in the list +// has a NULL value for its next pointer. +template <typename E> // E is the element type. +class ListNode { + friend class List<E>; + + private: + + E element_; + ListNode * next_; + + // The c'tor is private s.t. only in the ListNode class and in its + // friend class List we can create a ListNode object. + // + // Creates a node with a given element value. The next pointer is + // set to NULL. + // + // ListNode does NOT have a default constructor. Always use this + // constructor (with parameter) to create a ListNode object. + explicit ListNode(const E & element) : element_(element), next_(NULL) {} + + // We disallow copying ListNode + GTEST_DISALLOW_COPY_AND_ASSIGN(ListNode); + + public: + + // Gets the element in this node. + E & element() { return element_; } + const E & element() const { return element_; } + + // Gets the next node in the list. + ListNode * next() { return next_; } + const ListNode * next() const { return next_; } +}; + + +// List is a simple singly-linked list container. +template <typename E> // E is the element type. +class List { + public: + + // Creates an empty list. + List() : head_(NULL), last_(NULL), size_(0) {} + + // D'tor. + virtual ~List(); + + // Clears the list. + void Clear() { + if ( size_ > 0 ) { + // 1. Deletes every node. + ListNode<E> * node = head_; + ListNode<E> * next = node->next(); + for ( ; ; ) { + delete node; + node = next; + if ( node == NULL ) break; + next = node->next(); + } + + // 2. Resets the member variables. + head_ = last_ = NULL; + size_ = 0; + } + } + + // Gets the number of elements. + int size() const { return size_; } + + // Returns true if the list is empty. + bool IsEmpty() const { return size() == 0; } + + // Gets the first element of the list, or NULL if the list is empty. + ListNode<E> * Head() { return head_; } + const ListNode<E> * Head() const { return head_; } + + // Gets the last element of the list, or NULL if the list is empty. + ListNode<E> * Last() { return last_; } + const ListNode<E> * Last() const { return last_; } + + // Adds an element to the end of the list. A copy of the element is + // created using the copy constructor, and then stored in the list. + // Changes made to the element in the list doesn't affect the source + // object, and vice versa. + void PushBack(const E & element) { + ListNode<E> * new_node = new ListNode<E>(element); + + if ( size_ == 0 ) { + head_ = last_ = new_node; + size_ = 1; + } else { + last_->next_ = new_node; + last_ = new_node; + size_++; + } + } + + // Adds an element to the beginning of this list. + void PushFront(const E& element) { + ListNode<E>* const new_node = new ListNode<E>(element); + + if ( size_ == 0 ) { + head_ = last_ = new_node; + size_ = 1; + } else { + new_node->next_ = head_; + head_ = new_node; + size_++; + } + } + + // Removes an element from the beginning of this list. If the + // result argument is not NULL, the removed element is stored in the + // memory it points to. Otherwise the element is thrown away. + // Returns true iff the list wasn't empty before the operation. + bool PopFront(E* result) { + if (size_ == 0) return false; + + if (result != NULL) { + *result = head_->element_; + } + + ListNode<E>* const old_head = head_; + size_--; + if (size_ == 0) { + head_ = last_ = NULL; + } else { + head_ = head_->next_; + } + delete old_head; + + return true; + } + + // Inserts an element after a given node in the list. It's the + // caller's responsibility to ensure that the given node is in the + // list. If the given node is NULL, inserts the element at the + // front of the list. + ListNode<E>* InsertAfter(ListNode<E>* node, const E& element) { + if (node == NULL) { + PushFront(element); + return Head(); + } + + ListNode<E>* const new_node = new ListNode<E>(element); + new_node->next_ = node->next_; + node->next_ = new_node; + size_++; + if (node == last_) { + last_ = new_node; + } + + return new_node; + } + + // Returns the number of elements that satisfy a given predicate. + // The parameter 'predicate' is a Boolean function or functor that + // accepts a 'const E &', where E is the element type. + template <typename P> // P is the type of the predicate function/functor + int CountIf(P predicate) const { + int count = 0; + for ( const ListNode<E> * node = Head(); + node != NULL; + node = node->next() ) { + if ( predicate(node->element()) ) { + count++; + } + } + + return count; + } + + // Applies a function/functor to each element in the list. The + // parameter 'functor' is a function/functor that accepts a 'const + // E &', where E is the element type. This method does not change + // the elements. + template <typename F> // F is the type of the function/functor + void ForEach(F functor) const { + for ( const ListNode<E> * node = Head(); + node != NULL; + node = node->next() ) { + functor(node->element()); + } + } + + // Returns the first node whose element satisfies a given predicate, + // or NULL if none is found. The parameter 'predicate' is a + // function/functor that accepts a 'const E &', where E is the + // element type. This method does not change the elements. + template <typename P> // P is the type of the predicate function/functor. + const ListNode<E> * FindIf(P predicate) const { + for ( const ListNode<E> * node = Head(); + node != NULL; + node = node->next() ) { + if ( predicate(node->element()) ) { + return node; + } + } + + return NULL; + } + + template <typename P> + ListNode<E> * FindIf(P predicate) { + for ( ListNode<E> * node = Head(); + node != NULL; + node = node->next() ) { + if ( predicate(node->element() ) ) { + return node; + } + } + + return NULL; + } + + private: + ListNode<E>* head_; // The first node of the list. + ListNode<E>* last_; // The last node of the list. + int size_; // The number of elements in the list. + + // We disallow copying List. + GTEST_DISALLOW_COPY_AND_ASSIGN(List); +}; + +// The virtual destructor of List. +template <typename E> +List<E>::~List() { + Clear(); +} + +// A function for deleting an object. Handy for being used as a +// functor. +template <typename T> +static void Delete(T * x) { + delete x; +} + +// A copyable object representing a user specified test property which can be +// output as a key/value string pair. +// +// Don't inherit from TestProperty as its destructor is not virtual. +class TestProperty { + public: + // C'tor. TestProperty does NOT have a default constructor. + // Always use this constructor (with parameters) to create a + // TestProperty object. + TestProperty(const char* key, const char* value) : + key_(key), value_(value) { + } + + // Gets the user supplied key. + const char* key() const { + return key_.c_str(); + } + + // Gets the user supplied value. + const char* value() const { + return value_.c_str(); + } + + // Sets a new value, overriding the one supplied in the constructor. + void SetValue(const char* new_value) { + value_ = new_value; + } + + private: + // The key supplied by the user. + String key_; + // The value supplied by the user. + String value_; +}; + +// A predicate that checks the key of a TestProperty against a known key. +// +// TestPropertyKeyIs is copyable. +class TestPropertyKeyIs { + public: + // Constructor. + // + // TestPropertyKeyIs has NO default constructor. + explicit TestPropertyKeyIs(const char* key) + : key_(key) {} + + // Returns true iff the test name of test property matches on key_. + bool operator()(const TestProperty& test_property) const { + return String(test_property.key()).Compare(key_) == 0; + } + + private: + String key_; +}; + +// The result of a single Test. This includes a list of +// TestPartResults, a list of TestProperties, a count of how many +// death tests there are in the Test, and how much time it took to run +// the Test. +// +// TestResult is not copyable. +class TestResult { + public: + // Creates an empty TestResult. + TestResult(); + + // D'tor. Do not inherit from TestResult. + ~TestResult(); + + // Gets the list of TestPartResults. + const internal::List<TestPartResult> & test_part_results() const { + return test_part_results_; + } + + // Gets the list of TestProperties. + const internal::List<internal::TestProperty> & test_properties() const { + return test_properties_; + } + + // Gets the number of successful test parts. + int successful_part_count() const; + + // Gets the number of failed test parts. + int failed_part_count() const; + + // Gets the number of all test parts. This is the sum of the number + // of successful test parts and the number of failed test parts. + int total_part_count() const; + + // Returns true iff the test passed (i.e. no test part failed). + bool Passed() const { return !Failed(); } + + // Returns true iff the test failed. + bool Failed() const { return failed_part_count() > 0; } + + // Returns true iff the test fatally failed. + bool HasFatalFailure() const; + + // Returns the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Sets the elapsed time. + void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; } + + // Adds a test part result to the list. + void AddTestPartResult(const TestPartResult& test_part_result); + + // Adds a test property to the list. The property is validated and may add + // a non-fatal failure if invalid (e.g., if it conflicts with reserved + // key names). If a property is already recorded for the same key, the + // value will be updated, rather than storing multiple values for the same + // key. + void RecordProperty(const internal::TestProperty& test_property); + + // Adds a failure if the key is a reserved attribute of Google Test + // testcase tags. Returns true if the property is valid. + // TODO(russr): Validate attribute names are legal and human readable. + static bool ValidateTestProperty(const internal::TestProperty& test_property); + + // Returns the death test count. + int death_test_count() const { return death_test_count_; } + + // Increments the death test count, returning the new count. + int increment_death_test_count() { return ++death_test_count_; } + + // Clears the object. + void Clear(); + private: + // Protects mutable state of the property list and of owned properties, whose + // values may be updated. + internal::Mutex test_properites_mutex_; + + // The list of TestPartResults + internal::List<TestPartResult> test_part_results_; + // The list of TestProperties + internal::List<internal::TestProperty> test_properties_; + // Running count of death tests. + int death_test_count_; + // The elapsed time, in milliseconds. + TimeInMillis elapsed_time_; + + // We disallow copying TestResult. + GTEST_DISALLOW_COPY_AND_ASSIGN(TestResult); +}; // class TestResult + +class TestInfoImpl { + public: + TestInfoImpl(TestInfo* parent, const char* test_case_name, + const char* name, TypeId fixture_class_id, + TestMaker maker); + ~TestInfoImpl(); + + // Returns true if this test should run. + bool should_run() const { return should_run_; } + + // Sets the should_run member. + void set_should_run(bool should) { should_run_ = should; } + + // Returns true if this test is disabled. Disabled tests are not run. + bool is_disabled() const { return is_disabled_; } + + // Sets the is_disabled member. + void set_is_disabled(bool is) { is_disabled_ = is; } + + // Returns the test case name. + const char* test_case_name() const { return test_case_name_.c_str(); } + + // Returns the test name. + const char* name() const { return name_.c_str(); } + + // Returns the ID of the test fixture class. + TypeId fixture_class_id() const { return fixture_class_id_; } + + // Returns the test result. + internal::TestResult* result() { return &result_; } + const internal::TestResult* result() const { return &result_; } + + // Creates the test object, runs it, records its result, and then + // deletes it. + void Run(); + + // Calls the given TestInfo object's Run() method. + static void RunTest(TestInfo * test_info) { + test_info->impl()->Run(); + } + + // Clears the test result. + void ClearResult() { result_.Clear(); } + + // Clears the test result in the given TestInfo object. + static void ClearTestResult(TestInfo * test_info) { + test_info->impl()->ClearResult(); + } + + private: + // These fields are immutable properties of the test. + TestInfo* const parent_; // The owner of this object + const String test_case_name_; // Test case name + const String name_; // Test name + const TypeId fixture_class_id_; // ID of the test fixture class + bool should_run_; // True iff this test should run + bool is_disabled_; // True iff this test is disabled + const TestMaker maker_; // The function that creates the test object + + // This field is mutable and needs to be reset before running the + // test for the second time. + internal::TestResult result_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(TestInfoImpl); +}; + +} // namespace internal + +// A test case, which consists of a list of TestInfos. +// +// TestCase is not copyable. +class TestCase { + public: + // Creates a TestCase with the given name. + // + // TestCase does NOT have a default constructor. Always use this + // constructor to create a TestCase object. + // + // Arguments: + // + // name: name of the test case + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + TestCase(const char* name, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc); + + // Destructor of TestCase. + virtual ~TestCase(); + + // Gets the name of the TestCase. + const char* name() const { return name_.c_str(); } + + // Returns true if any test in this test case should run. + bool should_run() const { return should_run_; } + + // Sets the should_run member. + void set_should_run(bool should) { should_run_ = should; } + + // Gets the (mutable) list of TestInfos in this TestCase. + internal::List<TestInfo*>& test_info_list() { return *test_info_list_; } + + // Gets the (immutable) list of TestInfos in this TestCase. + const internal::List<TestInfo *> & test_info_list() const { + return *test_info_list_; + } + + // Gets the number of successful tests in this test case. + int successful_test_count() const; + + // Gets the number of failed tests in this test case. + int failed_test_count() const; + + // Gets the number of disabled tests in this test case. + int disabled_test_count() const; + + // Get the number of tests in this test case that should run. + int test_to_run_count() const; + + // Gets the number of all tests in this test case. + int total_test_count() const; + + // Returns true iff the test case passed. + bool Passed() const { return !Failed(); } + + // Returns true iff the test case failed. + bool Failed() const { return failed_test_count() > 0; } + + // Returns the elapsed time, in milliseconds. + internal::TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Adds a TestInfo to this test case. Will delete the TestInfo upon + // destruction of the TestCase object. + void AddTestInfo(TestInfo * test_info); + + // Finds and returns a TestInfo with the given name. If one doesn't + // exist, returns NULL. + TestInfo* GetTestInfo(const char* test_name); + + // Clears the results of all tests in this test case. + void ClearResult(); + + // Clears the results of all tests in the given test case. + static void ClearTestCaseResult(TestCase* test_case) { + test_case->ClearResult(); + } + + // Runs every test in this TestCase. + void Run(); + + // Runs every test in the given TestCase. + static void RunTestCase(TestCase * test_case) { test_case->Run(); } + + // Returns true iff test passed. + static bool TestPassed(const TestInfo * test_info) { + const internal::TestInfoImpl* const impl = test_info->impl(); + return impl->should_run() && impl->result()->Passed(); + } + + // Returns true iff test failed. + static bool TestFailed(const TestInfo * test_info) { + const internal::TestInfoImpl* const impl = test_info->impl(); + return impl->should_run() && impl->result()->Failed(); + } + + // Returns true iff test is disabled. + static bool TestDisabled(const TestInfo * test_info) { + return test_info->impl()->is_disabled(); + } + + // Returns true if the given test should run. + static bool ShouldRunTest(const TestInfo *test_info) { + return test_info->impl()->should_run(); + } + + private: + // Name of the test case. + internal::String name_; + // List of TestInfos. + internal::List<TestInfo*>* test_info_list_; + // Pointer to the function that sets up the test case. + Test::SetUpTestCaseFunc set_up_tc_; + // Pointer to the function that tears down the test case. + Test::TearDownTestCaseFunc tear_down_tc_; + // True iff any test in this test case should run. + bool should_run_; + // Elapsed time, in milliseconds. + internal::TimeInMillis elapsed_time_; + + // We disallow copying TestCases. + GTEST_DISALLOW_COPY_AND_ASSIGN(TestCase); +}; + +namespace internal { + +// Class UnitTestOptions. +// +// This class contains functions for processing options the user +// specifies when running the tests. It has only static members. +// +// In most cases, the user can specify an option using either an +// environment variable or a command line flag. E.g. you can set the +// test filter using either GTEST_FILTER or --gtest_filter. If both +// the variable and the flag are present, the latter overrides the +// former. +class UnitTestOptions { + public: + // Functions for processing the gtest_output flag. + + // Returns the output format, or "" for normal printed output. + static String GetOutputFormat(); + + // Returns the name of the requested output file, or the default if none + // was explicitly specified. + static String GetOutputFile(); + + // Functions for processing the gtest_filter flag. + + // Returns true iff the wildcard pattern matches the string. The + // first ':' or '\0' character in pattern marks the end of it. + // + // This recursive algorithm isn't very efficient, but is clear and + // works well enough for matching test names, which are short. + static bool PatternMatchesString(const char *pattern, const char *str); + + // Returns true iff the user-specified filter matches the test case + // name and the test name. + static bool FilterMatchesTest(const String &test_case_name, + const String &test_name); + +#ifdef GTEST_OS_WINDOWS + // Function for supporting the gtest_catch_exception flag. + + // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the + // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. + // This function is useful as an __except condition. + static int GTestShouldProcessSEH(DWORD exception_code); +#endif // GTEST_OS_WINDOWS + private: + // Returns true if "name" matches the ':' separated list of glob-style + // filters in "filter". + static bool MatchesFilter(const String& name, const char* filter); +}; + +// Returns the current application's name, removing directory path if that +// is present. Used by UnitTestOptions::GetOutputFile. +FilePath GetCurrentExecutableName(); + +// The role interface for getting the OS stack trace as a string. +class OsStackTraceGetterInterface { + public: + OsStackTraceGetterInterface() {} + virtual ~OsStackTraceGetterInterface() {} + + // Returns the current OS stack trace as a String. Parameters: + // + // max_depth - the maximum number of stack frames to be included + // in the trace. + // skip_count - the number of top frames to be skipped; doesn't count + // against max_depth. + virtual String CurrentStackTrace(int max_depth, int skip_count) = 0; + + // UponLeavingGTest() should be called immediately before Google Test calls + // user code. It saves some information about the current stack that + // CurrentStackTrace() will use to find and hide Google Test stack frames. + virtual void UponLeavingGTest() = 0; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN(OsStackTraceGetterInterface); +}; + +// A working implemenation of the OsStackTraceGetterInterface interface. +class OsStackTraceGetter : public OsStackTraceGetterInterface { + public: + OsStackTraceGetter() {} + virtual String CurrentStackTrace(int max_depth, int skip_count); + virtual void UponLeavingGTest(); + + // This string is inserted in place of stack frames that are part of + // Google Test's implementation. + static const char* const kElidedFramesMarker; + + private: + Mutex mutex_; // protects all internal state + + // We save the stack frame below the frame that calls user code. + // We do this because the address of the frame immediately below + // the user code changes between the call to UponLeavingGTest() + // and any calls to CurrentStackTrace() from within the user code. + void* caller_frame_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(OsStackTraceGetter); +}; + +// Information about a Google Test trace point. +struct TraceInfo { + const char* file; + int line; + String message; +}; + +// The private implementation of the UnitTest class. We don't protect +// the methods under a mutex, as this class is not accessible by a +// user and the UnitTest class that delegates work to this class does +// proper locking. +class UnitTestImpl : public TestPartResultReporterInterface { + public: + explicit UnitTestImpl(UnitTest* parent); + virtual ~UnitTestImpl(); + + // Reports a test part result. This method is from the + // TestPartResultReporterInterface interface. + virtual void ReportTestPartResult(const TestPartResult& result); + + // Returns the current test part result reporter. + TestPartResultReporterInterface* test_part_result_reporter(); + + // Sets the current test part result reporter. + void set_test_part_result_reporter(TestPartResultReporterInterface* reporter); + + // Gets the number of successful test cases. + int successful_test_case_count() const; + + // Gets the number of failed test cases. + int failed_test_case_count() const; + + // Gets the number of all test cases. + int total_test_case_count() const; + + // Gets the number of all test cases that contain at least one test + // that should run. + int test_case_to_run_count() const; + + // Gets the number of successful tests. + int successful_test_count() const; + + // Gets the number of failed tests. + int failed_test_count() const; + + // Gets the number of disabled tests. + int disabled_test_count() const; + + // Gets the number of all tests. + int total_test_count() const; + + // Gets the number of tests that should run. + int test_to_run_count() const; + + // Gets the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Returns true iff the unit test passed (i.e. all test cases passed). + bool Passed() const { return !Failed(); } + + // Returns true iff the unit test failed (i.e. some test case failed + // or something outside of all tests failed). + bool Failed() const { + return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed(); + } + + // Returns the TestResult for the test that's currently running, or + // the TestResult for the ad hoc test if no test is running. + internal::TestResult* current_test_result(); + + // Returns the TestResult for the ad hoc test. + const internal::TestResult* ad_hoc_test_result() const { + return &ad_hoc_test_result_; + } + + // Sets the unit test result printer. + // + // Does nothing if the input and the current printer object are the + // same; otherwise, deletes the old printer object and makes the + // input the current printer. + void set_result_printer(UnitTestEventListenerInterface * result_printer); + + // Returns the current unit test result printer if it is not NULL; + // otherwise, creates an appropriate result printer, makes it the + // current printer, and returns it. + UnitTestEventListenerInterface* result_printer(); + + // Sets the OS stack trace getter. + // + // Does nothing if the input and the current OS stack trace getter + // are the same; otherwise, deletes the old getter and makes the + // input the current getter. + void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter); + + // Returns the current OS stack trace getter if it is not NULL; + // otherwise, creates an OsStackTraceGetter, makes it the current + // getter, and returns it. + OsStackTraceGetterInterface* os_stack_trace_getter(); + + // Returns the current OS stack trace as a String. + // + // The maximum number of stack frames to be included is specified by + // the gtest_stack_trace_depth flag. The skip_count parameter + // specifies the number of top frames to be skipped, which doesn't + // count against the number of frames to be included. + // + // For example, if Foo() calls Bar(), which in turn calls + // CurrentOsStackTraceExceptTop(1), Foo() will be included in the + // trace but Bar() and CurrentOsStackTraceExceptTop() won't. + String CurrentOsStackTraceExceptTop(int skip_count); + + // Finds and returns a TestCase with the given name. If one doesn't + // exist, creates one and returns it. + // + // Arguments: + // + // test_case_name: name of the test case + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + TestCase* GetTestCase(const char* test_case_name, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc); + + // Adds a TestInfo to the unit test. + // + // Arguments: + // + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + // test_info: the TestInfo object + void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc, + TestInfo * test_info) { + GetTestCase(test_info->test_case_name(), + set_up_tc, + tear_down_tc)->AddTestInfo(test_info); + } + + // Sets the TestCase object for the test that's currently running. + void set_current_test_case(TestCase* current_test_case) { + current_test_case_ = current_test_case; + } + + // Sets the TestInfo object for the test that's currently running. If + // current_test_info is NULL, the assertion results will be stored in + // ad_hoc_test_result_. + void set_current_test_info(TestInfo* current_test_info) { + current_test_info_ = current_test_info; + } + + // Runs all tests in this UnitTest object, prints the result, and + // returns 0 if all tests are successful, or 1 otherwise. If any + // exception is thrown during a test on Windows, this test is + // considered to be failed, but the rest of the tests will still be + // run. (We disable exceptions on Linux and Mac OS X, so the issue + // doesn't apply there.) + int RunAllTests(); + + // Clears the results of all tests, including the ad hoc test. + void ClearResult() { + test_cases_.ForEach(TestCase::ClearTestCaseResult); + ad_hoc_test_result_.Clear(); + } + + // Matches the full name of each test against the user-specified + // filter to decide whether the test should run, then records the + // result in each TestCase and TestInfo object. + // Returns the number of tests that should run. + int FilterTests(); + + // Lists all the tests by name. + void ListAllTests(); + + const TestCase* current_test_case() const { return current_test_case_; } + TestInfo* current_test_info() { return current_test_info_; } + const TestInfo* current_test_info() const { return current_test_info_; } + + // Returns the list of environments that need to be set-up/torn-down + // before/after the tests are run. + internal::List<Environment*>* environments() { return &environments_; } + internal::List<Environment*>* environments_in_reverse_order() { + return &environments_in_reverse_order_; + } + + internal::List<TestCase*>* test_cases() { return &test_cases_; } + const internal::List<TestCase*>* test_cases() const { return &test_cases_; } + + // Getters for the per-thread Google Test trace stack. + internal::List<TraceInfo>* gtest_trace_stack() { + return gtest_trace_stack_.pointer(); + } + const internal::List<TraceInfo>* gtest_trace_stack() const { + return gtest_trace_stack_.pointer(); + } + +#ifdef GTEST_HAS_DEATH_TEST + // Returns a pointer to the parsed --gtest_internal_run_death_test + // flag, or NULL if that flag was not specified. + // This information is useful only in a death test child process. + const InternalRunDeathTestFlag* internal_run_death_test_flag() const { + return internal_run_death_test_flag_.get(); + } + + // Returns a pointer to the current death test factory. + internal::DeathTestFactory* death_test_factory() { + return death_test_factory_.get(); + } + + friend class ReplaceDeathTestFactory; +#endif // GTEST_HAS_DEATH_TEST + + private: + // The UnitTest object that owns this implementation object. + UnitTest* const parent_; + + // Points to (but doesn't own) the test part result reporter. + TestPartResultReporterInterface* test_part_result_reporter_; + + // The list of environments that need to be set-up/torn-down + // before/after the tests are run. environments_in_reverse_order_ + // simply mirrors environments_ in reverse order. + internal::List<Environment*> environments_; + internal::List<Environment*> environments_in_reverse_order_; + + internal::List<TestCase*> test_cases_; // The list of TestCases. + + // Points to the last death test case registered. Initially NULL. + internal::ListNode<TestCase*>* last_death_test_case_; + + // This points to the TestCase for the currently running test. It + // changes as Google Test goes through one test case after another. + // When no test is running, this is set to NULL and Google Test + // stores assertion results in ad_hoc_test_result_. Initally NULL. + TestCase* current_test_case_; + + // This points to the TestInfo for the currently running test. It + // changes as Google Test goes through one test after another. When + // no test is running, this is set to NULL and Google Test stores + // assertion results in ad_hoc_test_result_. Initially NULL. + TestInfo* current_test_info_; + + // Normally, a user only writes assertions inside a TEST or TEST_F, + // or inside a function called by a TEST or TEST_F. Since Google + // Test keeps track of which test is current running, it can + // associate such an assertion with the test it belongs to. + // + // If an assertion is encountered when no TEST or TEST_F is running, + // Google Test attributes the assertion result to an imaginary "ad hoc" + // test, and records the result in ad_hoc_test_result_. + internal::TestResult ad_hoc_test_result_; + + // The unit test result printer. Will be deleted when the UnitTest + // object is destructed. By default, a plain text printer is used, + // but the user can set this field to use a custom printer if that + // is desired. + UnitTestEventListenerInterface* result_printer_; + + // The OS stack trace getter. Will be deleted when the UnitTest + // object is destructed. By default, an OsStackTraceGetter is used, + // but the user can set this field to use a custom getter if that is + // desired. + OsStackTraceGetterInterface* os_stack_trace_getter_; + + // How long the test took to run, in milliseconds. + TimeInMillis elapsed_time_; + +#ifdef GTEST_HAS_DEATH_TEST + // The decomposed components of the gtest_internal_run_death_test flag, + // parsed when RUN_ALL_TESTS is called. + internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_; + internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_; +#endif // GTEST_HAS_DEATH_TEST + + // A per-thread stack of traces created by the SCOPED_TRACE() macro. + internal::ThreadLocal<internal::List<TraceInfo> > gtest_trace_stack_; + + GTEST_DISALLOW_COPY_AND_ASSIGN(UnitTestImpl); +}; // class UnitTestImpl + +// Convenience function for accessing the global UnitTest +// implementation object. +inline UnitTestImpl* GetUnitTestImpl() { + return UnitTest::GetInstance()->impl(); +} + +} // namespace internal +} // namespace testing + +#endif // GTEST_SRC_GTEST_INTERNAL_INL_H_ diff --git a/src/headerparser.cc b/src/headerparser.cc new file mode 100644 index 0000000..f9b14db --- /dev/null +++ b/src/headerparser.cc @@ -0,0 +1,323 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "headerparser.h" +#include "logging.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +// *** Methods for ParseableChunk + +void ParseableChunk::Advance(size_t number_of_bytes) { + if (number_of_bytes > UnparsedSize()) { + LOG(DFATAL) << "Internal error: position advanced by " << number_of_bytes + << " bytes, current unparsed size " << UnparsedSize() + << LOG_ENDL; + position_ = end_; + return; + } + position_ += number_of_bytes; +} + +void ParseableChunk::SetPosition(const char* position) { + if (position < start_) { + LOG(DFATAL) << "Internal error: new data position " << position + << " is beyond start of data " << start_ << LOG_ENDL; + position_ = start_; + return; + } + if (position > end_) { + LOG(DFATAL) << "Internal error: new data position " << position + << " is beyond end of data " << end_ << LOG_ENDL; + position_ = end_; + return; + } + position_ = position; +} + +void ParseableChunk::FinishExcept(size_t number_of_bytes) { + if (number_of_bytes > UnparsedSize()) { + LOG(DFATAL) << "Internal error: specified number of remaining bytes " + << number_of_bytes << " is greater than unparsed data size " + << UnparsedSize() << LOG_ENDL; + Finish(); + return; + } + position_ = end_ - number_of_bytes; +} + +// *** Methods for VCDiffHeaderParser + +VCDiffHeaderParser::VCDiffHeaderParser(const char* header_start, + const char* data_end) + : parseable_chunk_(header_start, data_end - header_start), + return_code_(RESULT_SUCCESS), + delta_encoding_length_(0), + delta_encoding_start_(NULL) { } + +bool VCDiffHeaderParser::ParseByte(unsigned char* value) { + if (RESULT_SUCCESS != return_code_) { + return false; + } + if (parseable_chunk_.Empty()) { + return_code_ = RESULT_END_OF_DATA; + return false; + } + *value = static_cast<unsigned char>(*parseable_chunk_.UnparsedData()); + parseable_chunk_.Advance(1); + return true; +} + +bool VCDiffHeaderParser::ParseInt32(const char* variable_description, + int32_t* value) { + if (RESULT_SUCCESS != return_code_) { + return false; + } + int32_t parsed_value = + VarintBE<int32_t>::Parse(parseable_chunk_.End(), + parseable_chunk_.UnparsedDataAddr()); + switch (parsed_value) { + case RESULT_ERROR: + LOG(ERROR) << "Expected " << variable_description + << "; found invalid variable-length integer" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + case RESULT_END_OF_DATA: + return_code_ = RESULT_END_OF_DATA; + return false; + default: + *value = parsed_value; + return true; + } +} + +// When an unsigned 32-bit integer is expected, parse a signed 64-bit value +// instead, then check the value limit. The uint32_t type can't be parsed +// directly because two negative values are given special meanings (RESULT_ERROR +// and RESULT_END_OF_DATA) and could not be expressed in an unsigned format. +bool VCDiffHeaderParser::ParseUInt32(const char* variable_description, + uint32_t* value) { + if (RESULT_SUCCESS != return_code_) { + return false; + } + int64_t parsed_value = + VarintBE<int64_t>::Parse(parseable_chunk_.End(), + parseable_chunk_.UnparsedDataAddr()); + switch (parsed_value) { + case RESULT_ERROR: + LOG(ERROR) << "Expected " << variable_description + << "; found invalid variable-length integer" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + case RESULT_END_OF_DATA: + return_code_ = RESULT_END_OF_DATA; + return false; + default: + if (parsed_value > 0xFFFFFFFF) { + LOG(ERROR) << "Value of " << variable_description << "(" << parsed_value + << ") is too large for unsigned 32-bit integer" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + *value = static_cast<uint32_t>(parsed_value); + return true; + } +} + +// A VCDChecksum represents an unsigned 32-bit value returned by adler32(), +// but isn't a uint32_t. +bool VCDiffHeaderParser::ParseChecksum(const char* variable_description, + VCDChecksum* value) { + uint32_t parsed_value = 0; + if (!ParseUInt32(variable_description, &parsed_value)) { + return false; + } + *value = static_cast<VCDChecksum>(parsed_value); + return true; +} + +bool VCDiffHeaderParser::ParseSize(const char* variable_description, + size_t* value) { + int32_t parsed_value = 0; + if (!ParseInt32(variable_description, &parsed_value)) { + return false; + } + *value = static_cast<size_t>(parsed_value); + return true; +} + +bool VCDiffHeaderParser::ParseSourceSegmentLengthAndPosition( + size_t from_size, + const char* from_boundary_name, + const char* from_name, + size_t* source_segment_length, + size_t* source_segment_position) { + // Verify the length and position values + if (!ParseSize("source segment length", source_segment_length)) { + return false; + } + // Guard against overflow by checking source length first + if (*source_segment_length > from_size) { + LOG(ERROR) << "Source segment length (" << *source_segment_length + << ") is larger than " << from_name << " (" << from_size + << ")" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + if (!ParseSize("source segment position", source_segment_position)) { + return false; + } + if ((*source_segment_position >= from_size) && + (*source_segment_length > 0)) { + LOG(ERROR) << "Source segment position (" << *source_segment_position + << ") is past " << from_boundary_name + << " (" << from_size << ")" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + const size_t source_segment_end = *source_segment_position + + *source_segment_length; + if (source_segment_end > from_size) { + LOG(ERROR) << "Source segment end position (" << source_segment_end + << ") is past " << from_boundary_name + << " (" << from_size << ")" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + return true; +} + +bool VCDiffHeaderParser::ParseWinIndicatorAndSourceSegment( + size_t dictionary_size, + size_t decoded_target_size, + unsigned char* win_indicator, + size_t* source_segment_length, + size_t* source_segment_position) { + if (!ParseByte(win_indicator)) { + return false; + } + unsigned char source_target_flags = + *win_indicator & (VCD_SOURCE | VCD_TARGET); + switch (source_target_flags) { + case VCD_SOURCE: + return ParseSourceSegmentLengthAndPosition(dictionary_size, + "end of dictionary", + "dictionary", + source_segment_length, + source_segment_position); + case VCD_TARGET: + return ParseSourceSegmentLengthAndPosition(decoded_target_size, + "current target position", + "target file", + source_segment_length, + source_segment_position); + case VCD_SOURCE | VCD_TARGET: + LOG(ERROR) << "Win_Indicator must not have both VCD_SOURCE" + " and VCD_TARGET set" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + default: + return true; + } +} + +bool VCDiffHeaderParser::ParseWindowLengths(size_t* target_window_length) { + if (delta_encoding_start_) { + LOG(DFATAL) << "Internal error: VCDiffHeaderParser::ParseWindowLengths " + "was called twice for the same delta window" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + if (!ParseSize("length of the delta encoding", &delta_encoding_length_)) { + return false; + } + delta_encoding_start_ = UnparsedData(); + if (!ParseSize("size of the target window", target_window_length)) { + return false; + } + if (*target_window_length > kMaxTargetWindowSize) { + LOG(ERROR) << "Length of target window (" << (*target_window_length) + << ") exceeds limit of " << kMaxTargetWindowSize << " bytes" + << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + return true; +} + +const char* VCDiffHeaderParser::EndOfDeltaWindow() const { + if (!delta_encoding_start_) { + LOG(DFATAL) << "Internal error: VCDiffHeaderParser::GetDeltaWindowEnd " + "was called before ParseWindowLengths" << LOG_ENDL; + return NULL; + } + return delta_encoding_start_ + delta_encoding_length_; +} + +bool VCDiffHeaderParser::ParseDeltaIndicator() { + unsigned char delta_indicator; + if (!ParseByte(&delta_indicator)) { + return false; + } + if (delta_indicator & (VCD_DATACOMP | VCD_INSTCOMP | VCD_ADDRCOMP)) { + LOG(ERROR) << "Secondary compression of delta file sections " + "is not supported" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + return true; +} + +bool VCDiffHeaderParser::ParseSectionLengths( + bool has_checksum, + size_t* add_and_run_data_length, + size_t* instructions_and_sizes_length, + size_t* addresses_length, + VCDChecksum* checksum) { + ParseSize("length of data for ADDs and RUNs", add_and_run_data_length); + ParseSize("length of instructions section", instructions_and_sizes_length); + ParseSize("length of addresses for COPYs", addresses_length); + if (has_checksum) { + ParseChecksum("Adler32 checksum value", checksum); + } + if (RESULT_SUCCESS != return_code_) { + return false; + } + if (!delta_encoding_start_) { + LOG(DFATAL) << "Internal error: VCDiffHeaderParser::ParseSectionLengths " + "was called before ParseWindowLengths" << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + const size_t delta_encoding_header_length = + UnparsedData() - delta_encoding_start_; + if (delta_encoding_length_ != + (delta_encoding_header_length + + *add_and_run_data_length + + *instructions_and_sizes_length + + *addresses_length)) { + LOG(ERROR) << "The length of the delta encoding does not match " + "the size of the header plus the sizes of the data sections" + << LOG_ENDL; + return_code_ = RESULT_ERROR; + return false; + } + return true; +} + +} // namespace open_vcdiff diff --git a/src/headerparser.h b/src/headerparser.h new file mode 100644 index 0000000..6271f0a --- /dev/null +++ b/src/headerparser.h @@ -0,0 +1,404 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_HEADERPARSER_H_ +#define OPEN_VCDIFF_HEADERPARSER_H_ + +#include <config.h> +#include <stdint.h> // int32_t, uint32_t +#include <cstddef> // NULL +#include "checksum.h" // VCDChecksum +#include "vcdiff_defs.h" // VCDiffResult + +namespace open_vcdiff { + +// This class contains a contiguous memory buffer with start and end pointers, +// as well as a position pointer which shows how much of the buffer has been +// parsed and how much remains. +// +// Because no virtual destructor is defined for ParseableChunk, a pointer to +// a child class of ParseableChunk must be destroyed using its specific type, +// rather than as a ParseableChunk*. +class ParseableChunk { + public: + ParseableChunk(const char* data_start, size_t data_size) { + SetDataBuffer(data_start, data_size); + } + + const char* End() const { return end_; } + + // The number of bytes remaining to be parsed. This is not necessarily the + // same as the initial size of the buffer; it changes with each call to + // Advance(). + size_t UnparsedSize() const { + return end_ - position_; + } + + // The number of bytes that have already been parsed. + size_t ParsedSize() const { + return position_ - start_; + } + + bool Empty() const { return 0 == UnparsedSize(); } + + // The start of the data remaining to be parsed. + const char* UnparsedData() const { return position_; } + + // Returns a pointer to the start of the data remaining to be parsed. + const char** UnparsedDataAddr() { return &position_; } + + // Moves the parsing position forward by number_of_bytes. + void Advance(size_t number_of_bytes); + + // Jumps the parsing position to a new location. + void SetPosition(const char* position); + + // Jumps the parsing position to the end of the data chunk. + void Finish() { + position_ = end_; + } + + // Jumps the parsing position so that there are now number_of_bytes + // bytes left to parse. This number should be smaller than the size of data + // to be parsed before the function was called. + void FinishExcept(size_t number_of_bytes); + + void SetDataBuffer(const char* data_start, size_t data_size) { + start_ = data_start; + end_ = data_start + data_size; + position_ = start_; + } + + private: + const char* start_; + const char* end_; + + // The current parsing position within the data chunk. + // Must always respect start_ <= position_ <= end_. + const char* position_; + + // Making these private avoids implicit copy constructor & assignment operator + ParseableChunk(const ParseableChunk&); + void operator=(const ParseableChunk&); +}; + +// Represents one of the three sections in the delta window, as described in +// RFC section 4.3: +// * Data section for ADDs and RUNs +// * Instructions and sizes section +// * Addresses section for COPYs +// When using the interleaved format, data and addresses are pulled from the +// instructions and sizes section rather than being stored in separate sections. +// For that reason, this class allows one DeltaWindowSection to be based on +// another, such that the same position pointer is shared by both sections; +// i.e., UnparsedDataAddr() returns the same value for both objects. +// To achieve this end, one extra level of indirection (a pointer to a +// ParseableChunk object) is added. +class DeltaWindowSection { + public: + DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { } + + ~DeltaWindowSection() { + FreeChunk(); + } + + void Init(const char* data_start, size_t data_size) { + if (owned_ && parseable_chunk_) { + // Reuse the already-allocated ParseableChunk object. + parseable_chunk_->SetDataBuffer(data_start, data_size); + } else { + parseable_chunk_ = new ParseableChunk(data_start, data_size); + owned_ = true; + } + } + + void Init(DeltaWindowSection* original) { + FreeChunk(); + parseable_chunk_ = original->parseable_chunk_; + owned_ = false; + } + + void Invalidate() { FreeChunk(); } + + bool IsOwned() const { return owned_; } + + // The following functions just pass their arguments to the underlying + // ParseableChunk object. + + const char* End() const { + return parseable_chunk_->End(); + } + + size_t UnparsedSize() const { + return parseable_chunk_->UnparsedSize(); + } + + size_t ParsedSize() const { + return parseable_chunk_->ParsedSize(); + } + + bool Empty() const { + return parseable_chunk_->Empty(); + } + + const char* UnparsedData() const { + return parseable_chunk_->UnparsedData(); + } + + const char** UnparsedDataAddr() { + return parseable_chunk_->UnparsedDataAddr(); + } + + void Advance(size_t number_of_bytes) { + return parseable_chunk_->Advance(number_of_bytes); + } + private: + void FreeChunk() { + if (owned_) { + delete parseable_chunk_; + } + parseable_chunk_ = NULL; + } + + // Will be NULL until Init() has been called. If owned_ is true, this will + // point to a ParseableChunk object that has been allocated with "new" and + // must be deleted by this DeltaWindowSection object. If owned_ is false, + // this points at the parseable_chunk_ owned by a different DeltaWindowSection + // object. In this case, it is important to free the DeltaWindowSection which + // does not own the ParseableChunk before (or simultaneously to) freeing the + // DeltaWindowSection that owns it, or else deleted memory may be accessed. + ParseableChunk* parseable_chunk_; + bool owned_; + + // Making these private avoids implicit copy constructor & assignment operator + DeltaWindowSection(const DeltaWindowSection&); + void operator=(const DeltaWindowSection&); +}; + +// Used to parse the bytes and Varints that make up the delta file header +// or delta window header. +class VCDiffHeaderParser { + public: + // The maximum allowable size of a target window. This restricts the amount + // of memory that can be allocated by the decoder. A maliciously formulated + // delta file can create a target window of any arbitrary size, so the + // decoder needs to be sure that it can allocate this much memory using + // std::string::reserve(). + // + static const size_t kMaxTargetWindowSize = 1 << 26; // 64 MB + + // header_start should be the start of the header to be parsed; + // data_end is the position just after the last byte of available data + // (which may extend far past the end of the header.) + VCDiffHeaderParser(const char* header_start, const char* data_end); + + // One of these functions should be called for each element of the header. + // variable_description is a description of the value that we are attempting + // to parse, and will only be used to create descriptive error messages. + // If the function returns true, then the element was parsed successfully + // and its value has been placed in *value. If the function returns false, + // then *value is unchanged, and GetResult() can be called to return the + // reason that the element could not be parsed, which will be either + // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end + // was reached before the end of the element to be parsed.) Once one of these + // functions has returned false, further calls to any of the Parse... + // functions will also return false without performing any additional actions. + // Typical usage is as follows: + // int32_t segment_length = 0; + // if (!header_parser.ParseInt32("segment length", &segment_length)) { + // return header_parser.GetResult(); + // } + // + // The following example takes advantage of the fact that calling a Parse... + // function after an error or end-of-data condition is legal and does nothing. + // It can thus parse more than one element in a row and check the status + // afterwards. If the first call to ParseInt32() fails, the second will have + // no effect: + // + // int32_t segment_length = 0, segment_position = 0; + // header_parser.ParseInt32("segment length", &segment_length)); + // header_parser.ParseInt32("segment position", &segment_position)); + // if (RESULT_SUCCESS != header_parser.GetResult()) { + // return header_parser.GetResult(); + // } + // + bool ParseByte(unsigned char* value); + bool ParseInt32(const char* variable_description, int32_t* value); + bool ParseUInt32(const char* variable_description, uint32_t* value); + bool ParseChecksum(const char* variable_description, VCDChecksum* value); + bool ParseSize(const char* variable_description, size_t* value); + + // Parses the first three elements of the delta window header: + // + // Win_Indicator - byte + // [Source segment size] - integer (VarintBE format) + // [Source segment position] - integer (VarintBE format) + // + // Returns true if the values were parsed successfully and the values were + // found to be acceptable. Returns false otherwise, in which case + // GetResult() can be called to return the reason that the two values + // could not be validated. This will be either RESULT_ERROR (an error + // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was + // reached before the end of the values to be parsed.) If return value is + // true, then *win_indicator, *source_segment_length, and + // *source_segment_position are populated with the parsed values. Otherwise, + // the values of these output arguments are undefined. + // + // dictionary_size: The size of the dictionary (source) file. Used to + // validate the limits of source_segment_length and + // source_segment_position if the source segment is taken from the + // dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.) + // decoded_target_size: The size of the target data that has been decoded + // so far, including all target windows. Used to validate the limits of + // source_segment_length and source_segment_position if the source segment + // is taken from the target (i.e., if the parsed *win_indicator equals + // VCD_TARGET.) + // win_indicator (output): Points to a single unsigned char (not an array) + // that will receive the parsed value of Win_Indicator. + // source_segment_length (output): The parsed length of the source segment. + // source_segment_position (output): The parsed zero-based index in the + // source/target file from which the source segment is to be taken. + bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size, + size_t decoded_target_size, + unsigned char* win_indicator, + size_t* source_segment_length, + size_t* source_segment_position); + + // Parses the following two elements of the delta window header: + // + // Length of the delta encoding - integer (VarintBE format) + // Size of the target window - integer (VarintBE format) + // + // Return conditions and values are the same as for + // ParseWinIndicatorAndSourceSegment(), above. + // + bool ParseWindowLengths(size_t* target_window_length); + + // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS. + // Returns a pointer to the end of the delta window (which might not point to + // a valid memory location if there is insufficient input data.) + // + const char* EndOfDeltaWindow() const; + + // Parses the following element of the delta window header: + // + // Delta_Indicator - byte + // + // Because none of the bits in Delta_Indicator are used by this implementation + // of VCDIFF, this function does not have an output argument to return the + // value of that field. It may return RESULT_SUCCESS, RESULT_ERROR, or + // RESULT_END_OF_DATA as with the other Parse...() functions. + // + bool ParseDeltaIndicator(); + + // Parses the following 3 elements of the delta window header: + // + // Length of data for ADDs and RUNs - integer (VarintBE format) + // Length of instructions and sizes - integer (VarintBE format) + // Length of addresses for COPYs - integer (VarintBE format) + // + // If has_checksum is true, it also looks for the following element: + // + // Adler32 checksum - unsigned 32-bit integer (VarintBE format) + // + // Return conditions and values are the same as for + // ParseWinIndicatorAndSourceSegment(), above. + // + bool ParseSectionLengths(bool has_checksum, + size_t* add_and_run_data_length, + size_t* instructions_and_sizes_length, + size_t* addresses_length, + VCDChecksum* checksum); + + // If one of the Parse... functions returned false, this function + // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA) + // describing the reason for the most recent parse failure. If none of the + // Parse... functions has returned false, returns RESULT_SUCCESS. + VCDiffResult GetResult() const { + return return_code_; + } + + // The following functions just pass their arguments to the underlying + // ParseableChunk object. + + const char* End() const { + return parseable_chunk_.End(); + } + + size_t UnparsedSize() const { + return parseable_chunk_.UnparsedSize(); + } + + size_t ParsedSize() const { + return parseable_chunk_.ParsedSize(); + } + + const char* UnparsedData() const { + return parseable_chunk_.UnparsedData(); + } + + private: + // Parses two variable-length integers representing the source segment length + // and source segment position (== offset.) Checks whether the source segment + // length and position would cause it to exceed the size of the source file or + // target file. Returns true if the values were parsed successfully and the + // values were found to be acceptable. Returns false otherwise, in which case + // GetResult() can be called to return the reason that the two values could + // not be validated, which will be either RESULT_ERROR (an error occurred and + // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before + // the end of the integers to be parsed.) + // from_size: The requested size of the source segment. + // from_boundary_name: A NULL-terminated string naming the end of the + // source or target file, used in error messages. + // from_name: A NULL-terminated string naming the source or target file, + // also used in error messages. + // source_segment_length (output): The parsed length of the source segment. + // source_segment_position (output): The parsed zero-based index in the + // source/target file from which the source segment is to be taken. + // + bool ParseSourceSegmentLengthAndPosition(size_t from_size, + const char* from_boundary_name, + const char* from_name, + size_t* source_segment_length, + size_t* source_segment_position); + + ParseableChunk parseable_chunk_; + + // Contains the result code of the last Parse...() operation that failed + // (RESULT_ERROR or RESULT_END_OF_DATA). If no Parse...() method has been + // called, or if all calls to Parse...() were successful, then this contains + // RESULT_SUCCESS. + VCDiffResult return_code_; + + // Will be zero until ParseWindowLengths() has been called. After + // ParseWindowLengths() has been called successfully, this contains the + // parsed length of the delta encoding. + size_t delta_encoding_length_; + + // Will be NULL until ParseWindowLengths() has been called. After + // ParseWindowLengths() has been called successfully, this points to the + // beginning of the section of the current window titled "The delta encoding" + // in the RFC, i.e., to the position just after the length of the delta + // encoding. + const char* delta_encoding_start_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffHeaderParser(const VCDiffHeaderParser&); + void operator=(const VCDiffHeaderParser&); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_HEADERPARSER_H_ diff --git a/src/headerparser_test.cc b/src/headerparser_test.cc new file mode 100644 index 0000000..30a7519 --- /dev/null +++ b/src/headerparser_test.cc @@ -0,0 +1,209 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "headerparser.h" +#include <cstdlib> // rand, srand +#include <string> +#include <vector> +#include "testing.h" +#include "varint_bigendian.h" + +namespace open_vcdiff { +namespace { // anonymous + +using std::string; +using std::vector; + +class VCDiffHeaderParserTest : public testing::Test { + protected: + static const int kTestSize = 1024; + + VCDiffHeaderParserTest() : parser(NULL) { } + + virtual ~VCDiffHeaderParserTest() { + delete parser; + } + + virtual void SetUp() { + srand(1); // make sure each test uses the same data set + } + + void StartParsing() { + parser = new VCDiffHeaderParser( + encoded_buffer_.data(), + encoded_buffer_.data() + encoded_buffer_.size()); + EXPECT_EQ(encoded_buffer_.data(), parser->UnparsedData()); + } + + void VerifyByte(unsigned char expected_value) { + unsigned char decoded_byte = 0; + const char* prior_position = parser->UnparsedData(); + EXPECT_TRUE(parser->ParseByte(&decoded_byte)); + EXPECT_EQ(expected_value, decoded_byte); + EXPECT_EQ(RESULT_SUCCESS, parser->GetResult()); + EXPECT_EQ(prior_position + sizeof(unsigned char), + parser->UnparsedData()); + } + + void VerifyInt32(int32_t expected_value) { + int32_t decoded_integer = 0; + const char* prior_position = parser->UnparsedData(); + EXPECT_TRUE(parser->ParseInt32("decoded int32", &decoded_integer)); + EXPECT_EQ(expected_value, decoded_integer); + EXPECT_EQ(RESULT_SUCCESS, parser->GetResult()); + EXPECT_EQ(prior_position + VarintBE<int32_t>::Length(decoded_integer), + parser->UnparsedData()); + } + + void VerifyUInt32(uint32_t expected_value) { + uint32_t decoded_integer = 0; + const char* prior_position = parser->UnparsedData(); + EXPECT_TRUE(parser->ParseUInt32("decoded uint32", &decoded_integer)); + EXPECT_EQ(expected_value, decoded_integer); + EXPECT_EQ(RESULT_SUCCESS, parser->GetResult()); + EXPECT_EQ(prior_position + VarintBE<int64_t>::Length(decoded_integer), + parser->UnparsedData()); + } + + void VerifyChecksum(VCDChecksum expected_value) { + VCDChecksum decoded_checksum = 0; + const char* prior_position = parser->UnparsedData(); + EXPECT_TRUE(parser->ParseChecksum("decoded checksum", &decoded_checksum)); + EXPECT_EQ(expected_value, decoded_checksum); + EXPECT_EQ(RESULT_SUCCESS, parser->GetResult()); + EXPECT_EQ(prior_position + VarintBE<int64_t>::Length(decoded_checksum), + parser->UnparsedData()); + } + + string encoded_buffer_; + VCDiffHeaderParser* parser; +}; + +TEST_F(VCDiffHeaderParserTest, ParseRandomBytes) { + vector<unsigned char> byte_values; + for (int i = 0; i < kTestSize; ++i) { + unsigned char random_byte = PortableRandomInRange<unsigned char>(0xFF); + encoded_buffer_.push_back(random_byte); + byte_values.push_back(random_byte); + } + StartParsing(); + for (int position = 0; position < kTestSize; ++position) { + VerifyByte(byte_values[position]); + } + unsigned char decoded_byte = 0; + EXPECT_FALSE(parser->ParseByte(&decoded_byte)); + EXPECT_EQ(RESULT_END_OF_DATA, parser->GetResult()); + EXPECT_EQ(encoded_buffer_.data() + encoded_buffer_.size(), + parser->UnparsedData()); +} + +TEST_F(VCDiffHeaderParserTest, ParseRandomInt32) { + vector<int32_t> integer_values; + for (int i = 0; i < kTestSize; ++i) { + int32_t random_integer = PortableRandomInRange<int32_t>(0x7FFFFFFF); + VarintBE<int32_t>::AppendToString(random_integer, &encoded_buffer_); + integer_values.push_back(random_integer); + } + StartParsing(); + for (int i = 0; i < kTestSize; ++i) { + VerifyInt32(integer_values[i]); + } + int32_t decoded_integer = 0; + EXPECT_FALSE(parser->ParseInt32("decoded integer", &decoded_integer)); + EXPECT_EQ(RESULT_END_OF_DATA, parser->GetResult()); + EXPECT_EQ(encoded_buffer_.data() + encoded_buffer_.size(), + parser->UnparsedData()); +} + +TEST_F(VCDiffHeaderParserTest, ParseRandomUInt32) { + vector<uint32_t> integer_values; + for (int i = 0; i < kTestSize; ++i) { + uint32_t random_integer = PortableRandomInRange<uint32_t>(0xFFFFFFFF); + VarintBE<int64_t>::AppendToString(random_integer, &encoded_buffer_); + integer_values.push_back(random_integer); + } + StartParsing(); + uint32_t decoded_integer = 0; + for (int i = 0; i < kTestSize; ++i) { + VerifyUInt32(integer_values[i]); + } + EXPECT_FALSE(parser->ParseUInt32("decoded integer", &decoded_integer)); + EXPECT_EQ(RESULT_END_OF_DATA, parser->GetResult()); + EXPECT_EQ(encoded_buffer_.data() + encoded_buffer_.size(), + parser->UnparsedData()); +} + +TEST_F(VCDiffHeaderParserTest, ParseRandomChecksum) { + vector<VCDChecksum> checksum_values; + for (int i = 0; i < kTestSize; ++i) { + VCDChecksum random_checksum = + PortableRandomInRange<VCDChecksum>(0xFFFFFFFF); + VarintBE<int64_t>::AppendToString(random_checksum, &encoded_buffer_); + checksum_values.push_back(random_checksum); + } + StartParsing(); + for (int i = 0; i < kTestSize; ++i) { + VerifyChecksum(checksum_values[i]); + } + VCDChecksum decoded_checksum = 0; + EXPECT_FALSE(parser->ParseChecksum("decoded checksum", &decoded_checksum)); + EXPECT_EQ(RESULT_END_OF_DATA, parser->GetResult()); + EXPECT_EQ(encoded_buffer_.data() + encoded_buffer_.size(), + parser->UnparsedData()); +} + +TEST_F(VCDiffHeaderParserTest, ParseMixed) { + VarintBE<int64_t>::AppendToString(0xCAFECAFE, &encoded_buffer_); + encoded_buffer_.push_back(0xFF); + VarintBE<int32_t>::AppendToString(0x02020202, &encoded_buffer_); + VarintBE<int64_t>::AppendToString(0xCAFECAFE, &encoded_buffer_); + encoded_buffer_.push_back(0xFF); + encoded_buffer_.push_back(0xFF); + StartParsing(); + VerifyUInt32(0xCAFECAFE); + VerifyByte(0xFF); + VerifyInt32(0x02020202); + VerifyChecksum(0xCAFECAFE); + int32_t incomplete_int32 = 0; + EXPECT_FALSE(parser->ParseInt32("incomplete Varint", &incomplete_int32)); + EXPECT_EQ(0, incomplete_int32); + EXPECT_EQ(RESULT_END_OF_DATA, parser->GetResult()); + EXPECT_EQ(encoded_buffer_.data() + encoded_buffer_.size() - 2, + parser->UnparsedData()); +} + +TEST_F(VCDiffHeaderParserTest, ParseInvalidVarint) { + // Start with a byte that has the continuation bit plus a high-order bit set + encoded_buffer_.append(1, static_cast<char>(0xC0)); + // Add too many bytes with continuation bits + encoded_buffer_.append(6, static_cast<char>(0x80)); + StartParsing(); + int32_t invalid_int32 = 0; + EXPECT_FALSE(parser->ParseInt32("invalid Varint", &invalid_int32)); + EXPECT_EQ(0, invalid_int32); + EXPECT_EQ(RESULT_ERROR, parser->GetResult()); + EXPECT_EQ(encoded_buffer_.data(), parser->UnparsedData()); + // After the parse failure, any other call to Parse... should return an error, + // even though there is still a byte that could be read as valid. + unsigned char decoded_byte = 0; + EXPECT_FALSE(parser->ParseByte(&decoded_byte)); + EXPECT_EQ(0, decoded_byte); + EXPECT_EQ(RESULT_ERROR, parser->GetResult()); + EXPECT_EQ(encoded_buffer_.data(), parser->UnparsedData()); +} + +} // namespace open_vcdiff +} // anonymous namespace diff --git a/src/instruction_map.cc b/src/instruction_map.cc new file mode 100644 index 0000000..9d5fd5f --- /dev/null +++ b/src/instruction_map.cc @@ -0,0 +1,196 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "instruction_map.h" +#include "addrcache.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +// VCDiffInstructionMap members and methods + +VCDiffInstructionMap* VCDiffInstructionMap::default_instruction_map = NULL; + +VCDiffInstructionMap* VCDiffInstructionMap::GetDefaultInstructionMap() { + if (!default_instruction_map) { + default_instruction_map = new VCDiffInstructionMap( + VCDiffCodeTableData::kDefaultCodeTableData, + VCDiffAddressCache::DefaultLastMode()); + } + return default_instruction_map; +} + +static unsigned char FindMaxSize( + const unsigned char size_array[VCDiffCodeTableData::kCodeTableSize]) { + unsigned char max_size = size_array[0]; + for (int i = 1; i < VCDiffCodeTableData::kCodeTableSize; ++i) { + if (size_array[i] > max_size) { + max_size = size_array[i]; + } + } + return max_size; +} + +static void ClearSizeOpcodeArray(int length, OpcodeOrNone* array) { + for (int i = 0; i < length; ++i) { + array[i] = kNoOpcode; + } +} + +static OpcodeOrNone* NewSizeOpcodeArray(int length) { + OpcodeOrNone* array = new OpcodeOrNone[length]; + ClearSizeOpcodeArray(length, array); + return array; +} + +VCDiffInstructionMap::FirstInstructionMap::FirstInstructionMap( + int num_insts_and_modes, + int max_size_1) + : num_instruction_type_modes_(num_insts_and_modes), + max_size_1_(max_size_1) { + first_opcodes_ = new OpcodeOrNone*[num_instruction_type_modes_]; + for (int i = 0; i < num_instruction_type_modes_; ++i) { + // There must be at least (max_size_1_ + 1) elements in first_opcodes_ + // because the element first_opcodes[max_size_1_] will be referenced. + first_opcodes_[i] = NewSizeOpcodeArray(max_size_1_ + 1); + } +} + +VCDiffInstructionMap::FirstInstructionMap::~FirstInstructionMap() { + for (int i = 0; i < num_instruction_type_modes_; ++i) { + delete[] first_opcodes_[i]; + } + delete[] first_opcodes_; +} + +VCDiffInstructionMap::SecondInstructionMap::SecondInstructionMap( + int num_insts_and_modes, + int max_size_2) + : num_instruction_type_modes_(num_insts_and_modes), + max_size_2_(max_size_2) { + memset(second_opcodes_, 0, sizeof(second_opcodes_)); +} + + +VCDiffInstructionMap::SecondInstructionMap::~SecondInstructionMap() { + for (int opcode = 0; opcode < VCDiffCodeTableData::kCodeTableSize; ++opcode) { + if (second_opcodes_[opcode] != NULL) { + for (int inst_mode = 0; + inst_mode < num_instruction_type_modes_; + ++inst_mode) { + // No need to check for NULL + delete[] second_opcodes_[opcode][inst_mode]; + } + delete[] second_opcodes_[opcode]; + } + } +} + +void VCDiffInstructionMap::SecondInstructionMap::Add( + unsigned char first_opcode, + unsigned char inst, + unsigned char size, + unsigned char mode, + unsigned char second_opcode) { + OpcodeOrNone**& inst_mode_array = second_opcodes_[first_opcode]; + if (!inst_mode_array) { + inst_mode_array = new OpcodeOrNone*[num_instruction_type_modes_]; + memset(inst_mode_array, + 0, + num_instruction_type_modes_ * sizeof(inst_mode_array[0])); + } + OpcodeOrNone*& size_array = inst_mode_array[inst + mode]; + if (!size_array) { + // There must be at least (max_size_2_ + 1) elements in size_array + // because the element size_array[max_size_2_] will be referenced. + size_array = NewSizeOpcodeArray(max_size_2_ + 1); + } + if (size_array[size] == kNoOpcode) { + size_array[size] = second_opcode; + } +} + +OpcodeOrNone VCDiffInstructionMap::SecondInstructionMap::Lookup( + unsigned char first_opcode, + unsigned char inst, + unsigned char size, + unsigned char mode) const { + if (size > max_size_2_) { + return kNoOpcode; + } + const OpcodeOrNone* const * const inst_mode_array = + second_opcodes_[first_opcode]; + if (!inst_mode_array) { + return kNoOpcode; + } + int inst_mode = (inst == VCD_COPY) ? (inst + mode) : inst; + const OpcodeOrNone* const size_array = inst_mode_array[inst_mode]; + if (!size_array) { + return kNoOpcode; + } + return size_array[size]; +} + +// Because a constructor should never fail, the caller must already +// have run ValidateCodeTable() against the code table data. +// +VCDiffInstructionMap::VCDiffInstructionMap( + const VCDiffCodeTableData& code_table_data, + unsigned char max_mode) + : first_instruction_map_(VCD_LAST_INSTRUCTION_TYPE + max_mode + 1, + FindMaxSize(code_table_data.size1)), + second_instruction_map_(VCD_LAST_INSTRUCTION_TYPE + max_mode + 1, + FindMaxSize(code_table_data.size2)) { + // First pass to fill up first_instruction_map_ + for (int opcode = 0; opcode < VCDiffCodeTableData::kCodeTableSize; ++opcode) { + if (code_table_data.inst2[opcode] == VCD_NOOP) { + // Single instruction. If there is more than one opcode for the same + // inst, mode, and size, then the lowest-numbered opcode will always + // be used by the encoder, because of the descending loop. + first_instruction_map_.Add(code_table_data.inst1[opcode], + code_table_data.size1[opcode], + code_table_data.mode1[opcode], + opcode); + } else if (code_table_data.inst1[opcode] == VCD_NOOP) { + // An unusual case where inst1 == NOOP and inst2 == ADD, RUN, or COPY. + // This is valid under the standard, but unlikely to be used. + // Add it to the first instruction map as if inst1 and inst2 were swapped. + first_instruction_map_.Add(code_table_data.inst2[opcode], + code_table_data.size2[opcode], + code_table_data.mode2[opcode], + opcode); + } + } + // Second pass to fill up second_instruction_map_ (depends on first pass) + for (int opcode = 0; opcode < VCDiffCodeTableData::kCodeTableSize; ++opcode) { + if ((code_table_data.inst1[opcode] != VCD_NOOP) && + (code_table_data.inst2[opcode] != VCD_NOOP)) { + // Double instruction. Find the corresponding single instruction opcode + const OpcodeOrNone single_opcode = + LookupFirstOpcode(code_table_data.inst1[opcode], + code_table_data.size1[opcode], + code_table_data.mode1[opcode]); + if (single_opcode == kNoOpcode) continue; // No single opcode found + second_instruction_map_.Add(static_cast<unsigned char>(single_opcode), + code_table_data.inst2[opcode], + code_table_data.size2[opcode], + code_table_data.mode2[opcode], + opcode); + } + } +} + +}; // namespace open_vcdiff diff --git a/src/instruction_map.h b/src/instruction_map.h new file mode 100644 index 0000000..00747a9 --- /dev/null +++ b/src/instruction_map.h @@ -0,0 +1,208 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// There are two different representations of a Code Table's contents: +// VCDiffCodeTableData is the same as the format given in section 7 +// of the RFC, and is used for transmission and decoding. However, +// on the encoding side, it is useful to have a representation that +// can map efficiently from delta instructions to opcodes: +// VCDiffInstructionMap. A VCDiffInstructionMap is constructed +// using a VCDiffCodeTableData. For a custom code table, it is recommended +// that the VCDiffCodeTableData be defined as a static struct and that the +// VCDiffInstructionMap be a static pointer that gets initialized only once. + +#ifndef OPEN_VCDIFF_INSTRUCTION_MAP_H_ +#define OPEN_VCDIFF_INSTRUCTION_MAP_H_ + +#include <config.h> +#include "codetable.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +// An alternate representation of the data in a VCDiffCodeTableData that +// optimizes for fast encoding, that is, for taking a delta instruction +// inst (also known as instruction type), size, and mode and arriving at +// the corresponding opcode. +// +class VCDiffInstructionMap { + public: + // Create a VCDiffInstructionMap from the information in code_table_data. + // Does not save a pointer to code_table_data after using its contents + // to create the instruction->opcode mappings. The caller *must* have + // verified that code_table_data->Validate() returned true before + // attempting to use this constructor. + // max_mode is the maximum value for the mode of a COPY instruction. + // + VCDiffInstructionMap(const VCDiffCodeTableData& code_table_data, + unsigned char max_mode); + + static VCDiffInstructionMap* GetDefaultInstructionMap(); + + // Finds an opcode that has the given inst, size, and mode for its first + // instruction and NOOP for its second instruction (or vice versa.) + // Returns kNoOpcode if the code table does not have any matching + // opcode. Otherwise, returns an opcode value between 0 and 255. + // + // If this function returns kNoOpcode for size > 0, the caller will + // usually want to try again with size == 0 to find an opcode that + // doesn't have a fixed size value. + // + // If this function returns kNoOpcode for size == 0, it is an error condition, + // because any code table that passed the Validate() check should have a way + // of expressing all combinations of inst and mode with size=0. + // + OpcodeOrNone LookupFirstOpcode(unsigned char inst, + unsigned char size, + unsigned char mode) const { + return first_instruction_map_.Lookup(inst, size, mode); + } + + // Given a first opcode (presumed to have been returned by a previous call to + // lookupFirstOpcode), finds an opcode that has the same first instruction as + // the first opcode, and has the given inst, size, and mode for its second + // instruction. + // + // If this function returns kNoOpcode for size > 0, the caller will + // usually want to try again with size == 0 to find an opcode that + // doesn't have a fixed size value. + // + OpcodeOrNone LookupSecondOpcode(unsigned char first_opcode, + unsigned char inst, + unsigned char size, + unsigned char mode) const { + return second_instruction_map_.Lookup(first_opcode, inst, size, mode); + } + + private: + // Data structure used to implement LookupFirstOpcode efficiently. + // + class FirstInstructionMap { + public: + FirstInstructionMap(int num_insts_and_modes, int max_size_1); + ~FirstInstructionMap(); + + void Add(unsigned char inst, + unsigned char size, + unsigned char mode, + unsigned char opcode) { + OpcodeOrNone* opcode_slot = &first_opcodes_[inst + mode][size]; + if (*opcode_slot == kNoOpcode) { + *opcode_slot = opcode; + } + } + + // See comments for LookupFirstOpcode, above. + // + OpcodeOrNone Lookup(unsigned char inst, + unsigned char size, + unsigned char mode) const { + int inst_mode = (inst == VCD_COPY) ? (inst + mode) : inst; + if (size > max_size_1_) { + return kNoOpcode; + } + // Lookup specific-sized opcode + return first_opcodes_[inst_mode][size]; + } + + private: + // The number of possible combinations of inst (a VCDiffInstructionType) and + // mode. Since the mode is only used for COPY instructions, this number + // is not (number of VCDiffInstructionType values) * (number of modes), but + // rather (number of VCDiffInstructionType values other than VCD_COPY) + // + (number of COPY modes). + // + // Compressing inst and mode into a single integer relies on + // VCD_COPY being the last instruction type. The inst+mode values are: + // 0 (NOOP), 1 (ADD), 2 (RUN), 3 (COPY mode 0), 4 (COPY mode 1), ... + // + const int num_instruction_type_modes_; + + // The maximum value of a size1 element in code_table_data + // + const int max_size_1_; + + // There are two levels to first_opcodes_: + // 1) A dynamically-allocated pointer array of size + // num_instruction_type_modes_ (one element for each combination of inst + // and mode.) Every element of this array is non-NULL and contains + // a pointer to: + // 2) A dynamically-allocated array of OpcodeOrNone values, with one element + // for each possible first instruction size (size1) in the code table. + // (In the default code table, for example, the maximum size used is 18, + // so these arrays would have 19 elements representing values 0 + // through 18.) + // + OpcodeOrNone** first_opcodes_; + + // Making these private avoids implicit copy constructor + // and assignment operator + FirstInstructionMap(const FirstInstructionMap&); // NOLINT + void operator=(const FirstInstructionMap&); + } first_instruction_map_; + + // Data structure used to implement LookupSecondOpcode efficiently. + // + class SecondInstructionMap { + public: + SecondInstructionMap(int num_insts_and_modes, int max_size_2); + ~SecondInstructionMap(); + void Add(unsigned char first_opcode, + unsigned char inst, + unsigned char size, + unsigned char mode, + unsigned char second_opcode); + + // See comments for LookupSecondOpcode, above. + OpcodeOrNone Lookup(unsigned char first_opcode, + unsigned char inst, + unsigned char size, + unsigned char mode) const; + private: + // See the member of the same name in FirstInstructionMap. + const int num_instruction_type_modes_; + + // The maximum value of a size2 element in code_table_data + const int max_size_2_; + + // There are three levels to second_opcodes_: + // 1) A statically-allocated pointer array with one element + // for each possible opcode. Each element can be NULL, or can point to: + // 2) A dynamically-allocated pointer array of size + // num_instruction_type_modes_ (one element for each combination of inst + // and mode.) Each element can be NULL, or can point to: + // 3) A dynamically-allocated array with one element for each possible + // second instruction size in the code table. (In the default code + // table, for example, the maximum size used is 6, so these arrays would + // have 7 elements representing values 0 through 6.) + // + OpcodeOrNone** second_opcodes_[VCDiffCodeTableData::kCodeTableSize]; + + // Making these private avoids implicit copy constructor + // and assignment operator + SecondInstructionMap(const SecondInstructionMap&); // NOLINT + void operator=(const SecondInstructionMap&); + } second_instruction_map_; + + static VCDiffInstructionMap* default_instruction_map; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffInstructionMap(const VCDiffInstructionMap&); // NOLINT + void operator=(const VCDiffInstructionMap&); +}; + +}; // namespace open_vcdiff + +#endif // OPEN_VCDIFF_INSTRUCTION_MAP_H_ diff --git a/src/instruction_map_test.cc b/src/instruction_map_test.cc new file mode 100644 index 0000000..c505e49 --- /dev/null +++ b/src/instruction_map_test.cc @@ -0,0 +1,603 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Unit tests for the class VCDiffInstructionMap, found in instruction_map.h. + +#include <config.h> +#include "instruction_map.h" +#include "codetable.h" +#include "testing.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { +namespace { + +class InstructionMapTest : public testing::Test { + protected: + virtual ~InstructionMapTest() { } + + static void AddExerciseOpcode(unsigned char inst1, + unsigned char mode1, + unsigned char size1, + unsigned char inst2, + unsigned char mode2, + unsigned char size2, + int opcode) { + g_exercise_code_table_->inst1[opcode] = inst1; + g_exercise_code_table_->mode1[opcode] = mode1; + g_exercise_code_table_->size1[opcode] = (inst1 == VCD_NOOP) ? 0 : size1; + g_exercise_code_table_->inst2[opcode] = inst2; + g_exercise_code_table_->mode2[opcode] = mode2; + g_exercise_code_table_->size2[opcode] = (inst2 == VCD_NOOP) ? 0 : size2; + } + + static void SetUpTestCase() { + g_exercise_code_table_ = new VCDiffCodeTableData; + int opcode = 0; + for (unsigned char inst_mode1 = 0; + inst_mode1 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode1) { + unsigned char inst1 = inst_mode1; + unsigned char mode1 = 0; + if (inst_mode1 > VCD_COPY) { + inst1 = VCD_COPY; + mode1 = inst_mode1 - VCD_COPY; + } + for (unsigned char inst_mode2 = 0; + inst_mode2 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode2) { + unsigned char inst2 = inst_mode2; + unsigned char mode2 = 0; + if (inst_mode2 > VCD_COPY) { + inst2 = VCD_COPY; + mode2 = inst_mode2 - VCD_COPY; + } + AddExerciseOpcode(inst1, mode1, 0, inst2, mode2, 0, opcode++); + AddExerciseOpcode(inst1, mode1, 0, inst2, mode2, 255, opcode++); + AddExerciseOpcode(inst1, mode1, 255, inst2, mode2, 0, opcode++); + AddExerciseOpcode(inst1, mode1, 255, inst2, mode2, 255, opcode++); + } + } + // This is a CHECK rather than an EXPECT because it validates only + // the logic of the test, not of the code being tested. + CHECK_EQ(VCDiffCodeTableData::kCodeTableSize, opcode); + + EXPECT_TRUE(VCDiffCodeTableData::kDefaultCodeTableData.Validate()); + EXPECT_TRUE(g_exercise_code_table_->Validate(kLastExerciseMode)); + default_map = VCDiffInstructionMap::GetDefaultInstructionMap(); + exercise_map = new VCDiffInstructionMap(*g_exercise_code_table_, + kLastExerciseMode); + } + + static void TearDownTestCase() { + delete exercise_map; + delete g_exercise_code_table_; + } + + void VerifyExerciseFirstInstruction(unsigned char expected_opcode, + unsigned char inst, + unsigned char size, + unsigned char mode) { + int found_opcode = exercise_map->LookupFirstOpcode(inst, size, mode); + if (g_exercise_code_table_->inst1[found_opcode] == VCD_NOOP) { + // The opcode is backwards: (VCD_NOOP, [instruction]) + EXPECT_GE(expected_opcode, found_opcode); + EXPECT_EQ(inst, g_exercise_code_table_->inst2[found_opcode]); + EXPECT_EQ(size, g_exercise_code_table_->size2[found_opcode]); + EXPECT_EQ(mode, g_exercise_code_table_->mode2[found_opcode]); + EXPECT_EQ(VCD_NOOP, g_exercise_code_table_->inst1[found_opcode]); + EXPECT_EQ(0, g_exercise_code_table_->size1[found_opcode]); + EXPECT_EQ(0, g_exercise_code_table_->mode1[found_opcode]); + } else { + EXPECT_EQ(expected_opcode, found_opcode); + EXPECT_EQ(inst, g_exercise_code_table_->inst1[found_opcode]); + EXPECT_EQ(size, g_exercise_code_table_->size1[found_opcode]); + EXPECT_EQ(mode, g_exercise_code_table_->mode1[found_opcode]); + EXPECT_EQ(VCD_NOOP, g_exercise_code_table_->inst2[found_opcode]); + EXPECT_EQ(0, g_exercise_code_table_->size2[found_opcode]); + EXPECT_EQ(0, g_exercise_code_table_->mode2[found_opcode]); + } + } + + void VerifyExerciseSecondInstruction(unsigned char expected_opcode, + unsigned char inst1, + unsigned char size1, + unsigned char mode1, + unsigned char inst2, + unsigned char size2, + unsigned char mode2) { + int first_opcode = exercise_map->LookupFirstOpcode(inst1, size1, mode1); + EXPECT_NE(kNoOpcode, first_opcode); + EXPECT_EQ(expected_opcode, + exercise_map->LookupSecondOpcode(first_opcode, + inst2, + size2, + mode2)); + } + + // This value is designed so that the total number of inst values and modes + // will equal 8 (VCD_NOOP, VCD_ADD, VCD_RUN, VCD_COPY modes 0 - 4). + // Eight combinations of inst and mode, times two possible size values, + // squared (because there are two instructions per opcode), makes + // exactly 256 possible instruction combinations, which fits kCodeTableSize + // (the number of opcodes in the table.) + static const int kLastExerciseMode = 4; + + // A code table that exercises as many combinations as possible: + // 2 instructions, each is a NOOP, ADD, RUN, or one of 5 copy modes + // (== 8 total combinations of inst and mode), and each has + // size == 0 or 255 (2 possibilities.) + static VCDiffCodeTableData* g_exercise_code_table_; + + // The instruction map corresponding to kDefaultCodeTableData. + static const VCDiffInstructionMap* default_map; + + // The instruction map corresponding to g_exercise_code_table_. + static const VCDiffInstructionMap* exercise_map; + + size_t out_index; +}; + +VCDiffCodeTableData* InstructionMapTest::g_exercise_code_table_ = NULL; +const VCDiffInstructionMap* InstructionMapTest::default_map = NULL; +const VCDiffInstructionMap* InstructionMapTest::exercise_map = NULL; + +TEST_F(InstructionMapTest, DefaultMapLookupFirstNoop) { + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_NOOP, 0, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_NOOP, 0, 255)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_NOOP, 255, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_NOOP, 255, 255)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstAdd) { + EXPECT_EQ(2, default_map->LookupFirstOpcode(VCD_ADD, 1, 0)); + EXPECT_EQ(3, default_map->LookupFirstOpcode(VCD_ADD, 2, 0)); + EXPECT_EQ(4, default_map->LookupFirstOpcode(VCD_ADD, 3, 0)); + EXPECT_EQ(5, default_map->LookupFirstOpcode(VCD_ADD, 4, 0)); + EXPECT_EQ(6, default_map->LookupFirstOpcode(VCD_ADD, 5, 0)); + EXPECT_EQ(7, default_map->LookupFirstOpcode(VCD_ADD, 6, 0)); + EXPECT_EQ(8, default_map->LookupFirstOpcode(VCD_ADD, 7, 0)); + EXPECT_EQ(9, default_map->LookupFirstOpcode(VCD_ADD, 8, 0)); + EXPECT_EQ(10, default_map->LookupFirstOpcode(VCD_ADD, 9, 0)); + EXPECT_EQ(11, default_map->LookupFirstOpcode(VCD_ADD, 10, 0)); + EXPECT_EQ(12, default_map->LookupFirstOpcode(VCD_ADD, 11, 0)); + EXPECT_EQ(13, default_map->LookupFirstOpcode(VCD_ADD, 12, 0)); + EXPECT_EQ(14, default_map->LookupFirstOpcode(VCD_ADD, 13, 0)); + EXPECT_EQ(15, default_map->LookupFirstOpcode(VCD_ADD, 14, 0)); + EXPECT_EQ(16, default_map->LookupFirstOpcode(VCD_ADD, 15, 0)); + EXPECT_EQ(17, default_map->LookupFirstOpcode(VCD_ADD, 16, 0)); + EXPECT_EQ(18, default_map->LookupFirstOpcode(VCD_ADD, 17, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_ADD, 100, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_ADD, 255, 0)); + EXPECT_EQ(1, default_map->LookupFirstOpcode(VCD_ADD, 0, 0)); + // Value of "mode" should not matter + EXPECT_EQ(2, default_map->LookupFirstOpcode(VCD_ADD, 1, 2)); + EXPECT_EQ(2, default_map->LookupFirstOpcode(VCD_ADD, 1, 255)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstRun) { + EXPECT_EQ(0, default_map->LookupFirstOpcode(VCD_RUN, 0, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_RUN, 1, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_RUN, 255, 0)); + // Value of "mode" should not matter + EXPECT_EQ(0, default_map->LookupFirstOpcode(VCD_RUN, 0, 2)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode0) { + EXPECT_EQ(19, default_map->LookupFirstOpcode(VCD_COPY, 0, 0)); + EXPECT_EQ(20, default_map->LookupFirstOpcode(VCD_COPY, 4, 0)); + EXPECT_EQ(21, default_map->LookupFirstOpcode(VCD_COPY, 5, 0)); + EXPECT_EQ(22, default_map->LookupFirstOpcode(VCD_COPY, 6, 0)); + EXPECT_EQ(23, default_map->LookupFirstOpcode(VCD_COPY, 7, 0)); + EXPECT_EQ(24, default_map->LookupFirstOpcode(VCD_COPY, 8, 0)); + EXPECT_EQ(25, default_map->LookupFirstOpcode(VCD_COPY, 9, 0)); + EXPECT_EQ(26, default_map->LookupFirstOpcode(VCD_COPY, 10, 0)); + EXPECT_EQ(27, default_map->LookupFirstOpcode(VCD_COPY, 11, 0)); + EXPECT_EQ(28, default_map->LookupFirstOpcode(VCD_COPY, 12, 0)); + EXPECT_EQ(29, default_map->LookupFirstOpcode(VCD_COPY, 13, 0)); + EXPECT_EQ(30, default_map->LookupFirstOpcode(VCD_COPY, 14, 0)); + EXPECT_EQ(31, default_map->LookupFirstOpcode(VCD_COPY, 15, 0)); + EXPECT_EQ(32, default_map->LookupFirstOpcode(VCD_COPY, 16, 0)); + EXPECT_EQ(33, default_map->LookupFirstOpcode(VCD_COPY, 17, 0)); + EXPECT_EQ(34, default_map->LookupFirstOpcode(VCD_COPY, 18, 0)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode1) { + EXPECT_EQ(35, default_map->LookupFirstOpcode(VCD_COPY, 0, 1)); + EXPECT_EQ(36, default_map->LookupFirstOpcode(VCD_COPY, 4, 1)); + EXPECT_EQ(37, default_map->LookupFirstOpcode(VCD_COPY, 5, 1)); + EXPECT_EQ(38, default_map->LookupFirstOpcode(VCD_COPY, 6, 1)); + EXPECT_EQ(39, default_map->LookupFirstOpcode(VCD_COPY, 7, 1)); + EXPECT_EQ(40, default_map->LookupFirstOpcode(VCD_COPY, 8, 1)); + EXPECT_EQ(41, default_map->LookupFirstOpcode(VCD_COPY, 9, 1)); + EXPECT_EQ(42, default_map->LookupFirstOpcode(VCD_COPY, 10, 1)); + EXPECT_EQ(43, default_map->LookupFirstOpcode(VCD_COPY, 11, 1)); + EXPECT_EQ(44, default_map->LookupFirstOpcode(VCD_COPY, 12, 1)); + EXPECT_EQ(45, default_map->LookupFirstOpcode(VCD_COPY, 13, 1)); + EXPECT_EQ(46, default_map->LookupFirstOpcode(VCD_COPY, 14, 1)); + EXPECT_EQ(47, default_map->LookupFirstOpcode(VCD_COPY, 15, 1)); + EXPECT_EQ(48, default_map->LookupFirstOpcode(VCD_COPY, 16, 1)); + EXPECT_EQ(49, default_map->LookupFirstOpcode(VCD_COPY, 17, 1)); + EXPECT_EQ(50, default_map->LookupFirstOpcode(VCD_COPY, 18, 1)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode2) { + EXPECT_EQ(51, default_map->LookupFirstOpcode(VCD_COPY, 0, 2)); + EXPECT_EQ(52, default_map->LookupFirstOpcode(VCD_COPY, 4, 2)); + EXPECT_EQ(53, default_map->LookupFirstOpcode(VCD_COPY, 5, 2)); + EXPECT_EQ(54, default_map->LookupFirstOpcode(VCD_COPY, 6, 2)); + EXPECT_EQ(55, default_map->LookupFirstOpcode(VCD_COPY, 7, 2)); + EXPECT_EQ(56, default_map->LookupFirstOpcode(VCD_COPY, 8, 2)); + EXPECT_EQ(57, default_map->LookupFirstOpcode(VCD_COPY, 9, 2)); + EXPECT_EQ(58, default_map->LookupFirstOpcode(VCD_COPY, 10, 2)); + EXPECT_EQ(59, default_map->LookupFirstOpcode(VCD_COPY, 11, 2)); + EXPECT_EQ(60, default_map->LookupFirstOpcode(VCD_COPY, 12, 2)); + EXPECT_EQ(61, default_map->LookupFirstOpcode(VCD_COPY, 13, 2)); + EXPECT_EQ(62, default_map->LookupFirstOpcode(VCD_COPY, 14, 2)); + EXPECT_EQ(63, default_map->LookupFirstOpcode(VCD_COPY, 15, 2)); + EXPECT_EQ(64, default_map->LookupFirstOpcode(VCD_COPY, 16, 2)); + EXPECT_EQ(65, default_map->LookupFirstOpcode(VCD_COPY, 17, 2)); + EXPECT_EQ(66, default_map->LookupFirstOpcode(VCD_COPY, 18, 2)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode3) { + EXPECT_EQ(67, default_map->LookupFirstOpcode(VCD_COPY, 0, 3)); + EXPECT_EQ(68, default_map->LookupFirstOpcode(VCD_COPY, 4, 3)); + EXPECT_EQ(69, default_map->LookupFirstOpcode(VCD_COPY, 5, 3)); + EXPECT_EQ(70, default_map->LookupFirstOpcode(VCD_COPY, 6, 3)); + EXPECT_EQ(71, default_map->LookupFirstOpcode(VCD_COPY, 7, 3)); + EXPECT_EQ(72, default_map->LookupFirstOpcode(VCD_COPY, 8, 3)); + EXPECT_EQ(73, default_map->LookupFirstOpcode(VCD_COPY, 9, 3)); + EXPECT_EQ(74, default_map->LookupFirstOpcode(VCD_COPY, 10, 3)); + EXPECT_EQ(75, default_map->LookupFirstOpcode(VCD_COPY, 11, 3)); + EXPECT_EQ(76, default_map->LookupFirstOpcode(VCD_COPY, 12, 3)); + EXPECT_EQ(77, default_map->LookupFirstOpcode(VCD_COPY, 13, 3)); + EXPECT_EQ(78, default_map->LookupFirstOpcode(VCD_COPY, 14, 3)); + EXPECT_EQ(79, default_map->LookupFirstOpcode(VCD_COPY, 15, 3)); + EXPECT_EQ(80, default_map->LookupFirstOpcode(VCD_COPY, 16, 3)); + EXPECT_EQ(81, default_map->LookupFirstOpcode(VCD_COPY, 17, 3)); + EXPECT_EQ(82, default_map->LookupFirstOpcode(VCD_COPY, 18, 3)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode4) { + EXPECT_EQ(83, default_map->LookupFirstOpcode(VCD_COPY, 0, 4)); + EXPECT_EQ(84, default_map->LookupFirstOpcode(VCD_COPY, 4, 4)); + EXPECT_EQ(85, default_map->LookupFirstOpcode(VCD_COPY, 5, 4)); + EXPECT_EQ(86, default_map->LookupFirstOpcode(VCD_COPY, 6, 4)); + EXPECT_EQ(87, default_map->LookupFirstOpcode(VCD_COPY, 7, 4)); + EXPECT_EQ(88, default_map->LookupFirstOpcode(VCD_COPY, 8, 4)); + EXPECT_EQ(89, default_map->LookupFirstOpcode(VCD_COPY, 9, 4)); + EXPECT_EQ(90, default_map->LookupFirstOpcode(VCD_COPY, 10, 4)); + EXPECT_EQ(91, default_map->LookupFirstOpcode(VCD_COPY, 11, 4)); + EXPECT_EQ(92, default_map->LookupFirstOpcode(VCD_COPY, 12, 4)); + EXPECT_EQ(93, default_map->LookupFirstOpcode(VCD_COPY, 13, 4)); + EXPECT_EQ(94, default_map->LookupFirstOpcode(VCD_COPY, 14, 4)); + EXPECT_EQ(95, default_map->LookupFirstOpcode(VCD_COPY, 15, 4)); + EXPECT_EQ(96, default_map->LookupFirstOpcode(VCD_COPY, 16, 4)); + EXPECT_EQ(97, default_map->LookupFirstOpcode(VCD_COPY, 17, 4)); + EXPECT_EQ(98, default_map->LookupFirstOpcode(VCD_COPY, 18, 4)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode5) { + EXPECT_EQ(99, default_map->LookupFirstOpcode(VCD_COPY, 0, 5)); + EXPECT_EQ(100, default_map->LookupFirstOpcode(VCD_COPY, 4, 5)); + EXPECT_EQ(101, default_map->LookupFirstOpcode(VCD_COPY, 5, 5)); + EXPECT_EQ(102, default_map->LookupFirstOpcode(VCD_COPY, 6, 5)); + EXPECT_EQ(103, default_map->LookupFirstOpcode(VCD_COPY, 7, 5)); + EXPECT_EQ(104, default_map->LookupFirstOpcode(VCD_COPY, 8, 5)); + EXPECT_EQ(105, default_map->LookupFirstOpcode(VCD_COPY, 9, 5)); + EXPECT_EQ(106, default_map->LookupFirstOpcode(VCD_COPY, 10, 5)); + EXPECT_EQ(107, default_map->LookupFirstOpcode(VCD_COPY, 11, 5)); + EXPECT_EQ(108, default_map->LookupFirstOpcode(VCD_COPY, 12, 5)); + EXPECT_EQ(109, default_map->LookupFirstOpcode(VCD_COPY, 13, 5)); + EXPECT_EQ(110, default_map->LookupFirstOpcode(VCD_COPY, 14, 5)); + EXPECT_EQ(111, default_map->LookupFirstOpcode(VCD_COPY, 15, 5)); + EXPECT_EQ(112, default_map->LookupFirstOpcode(VCD_COPY, 16, 5)); + EXPECT_EQ(113, default_map->LookupFirstOpcode(VCD_COPY, 17, 5)); + EXPECT_EQ(114, default_map->LookupFirstOpcode(VCD_COPY, 18, 5)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode6) { + EXPECT_EQ(115, default_map->LookupFirstOpcode(VCD_COPY, 0, 6)); + EXPECT_EQ(116, default_map->LookupFirstOpcode(VCD_COPY, 4, 6)); + EXPECT_EQ(117, default_map->LookupFirstOpcode(VCD_COPY, 5, 6)); + EXPECT_EQ(118, default_map->LookupFirstOpcode(VCD_COPY, 6, 6)); + EXPECT_EQ(119, default_map->LookupFirstOpcode(VCD_COPY, 7, 6)); + EXPECT_EQ(120, default_map->LookupFirstOpcode(VCD_COPY, 8, 6)); + EXPECT_EQ(121, default_map->LookupFirstOpcode(VCD_COPY, 9, 6)); + EXPECT_EQ(122, default_map->LookupFirstOpcode(VCD_COPY, 10, 6)); + EXPECT_EQ(123, default_map->LookupFirstOpcode(VCD_COPY, 11, 6)); + EXPECT_EQ(124, default_map->LookupFirstOpcode(VCD_COPY, 12, 6)); + EXPECT_EQ(125, default_map->LookupFirstOpcode(VCD_COPY, 13, 6)); + EXPECT_EQ(126, default_map->LookupFirstOpcode(VCD_COPY, 14, 6)); + EXPECT_EQ(127, default_map->LookupFirstOpcode(VCD_COPY, 15, 6)); + EXPECT_EQ(128, default_map->LookupFirstOpcode(VCD_COPY, 16, 6)); + EXPECT_EQ(129, default_map->LookupFirstOpcode(VCD_COPY, 17, 6)); + EXPECT_EQ(130, default_map->LookupFirstOpcode(VCD_COPY, 18, 6)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode7) { + EXPECT_EQ(131, default_map->LookupFirstOpcode(VCD_COPY, 0, 7)); + EXPECT_EQ(132, default_map->LookupFirstOpcode(VCD_COPY, 4, 7)); + EXPECT_EQ(133, default_map->LookupFirstOpcode(VCD_COPY, 5, 7)); + EXPECT_EQ(134, default_map->LookupFirstOpcode(VCD_COPY, 6, 7)); + EXPECT_EQ(135, default_map->LookupFirstOpcode(VCD_COPY, 7, 7)); + EXPECT_EQ(136, default_map->LookupFirstOpcode(VCD_COPY, 8, 7)); + EXPECT_EQ(137, default_map->LookupFirstOpcode(VCD_COPY, 9, 7)); + EXPECT_EQ(138, default_map->LookupFirstOpcode(VCD_COPY, 10, 7)); + EXPECT_EQ(139, default_map->LookupFirstOpcode(VCD_COPY, 11, 7)); + EXPECT_EQ(140, default_map->LookupFirstOpcode(VCD_COPY, 12, 7)); + EXPECT_EQ(141, default_map->LookupFirstOpcode(VCD_COPY, 13, 7)); + EXPECT_EQ(142, default_map->LookupFirstOpcode(VCD_COPY, 14, 7)); + EXPECT_EQ(143, default_map->LookupFirstOpcode(VCD_COPY, 15, 7)); + EXPECT_EQ(144, default_map->LookupFirstOpcode(VCD_COPY, 16, 7)); + EXPECT_EQ(145, default_map->LookupFirstOpcode(VCD_COPY, 17, 7)); + EXPECT_EQ(146, default_map->LookupFirstOpcode(VCD_COPY, 18, 7)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyMode8) { + EXPECT_EQ(147, default_map->LookupFirstOpcode(VCD_COPY, 0, 8)); + EXPECT_EQ(148, default_map->LookupFirstOpcode(VCD_COPY, 4, 8)); + EXPECT_EQ(149, default_map->LookupFirstOpcode(VCD_COPY, 5, 8)); + EXPECT_EQ(150, default_map->LookupFirstOpcode(VCD_COPY, 6, 8)); + EXPECT_EQ(151, default_map->LookupFirstOpcode(VCD_COPY, 7, 8)); + EXPECT_EQ(152, default_map->LookupFirstOpcode(VCD_COPY, 8, 8)); + EXPECT_EQ(153, default_map->LookupFirstOpcode(VCD_COPY, 9, 8)); + EXPECT_EQ(154, default_map->LookupFirstOpcode(VCD_COPY, 10, 8)); + EXPECT_EQ(155, default_map->LookupFirstOpcode(VCD_COPY, 11, 8)); + EXPECT_EQ(156, default_map->LookupFirstOpcode(VCD_COPY, 12, 8)); + EXPECT_EQ(157, default_map->LookupFirstOpcode(VCD_COPY, 13, 8)); + EXPECT_EQ(158, default_map->LookupFirstOpcode(VCD_COPY, 14, 8)); + EXPECT_EQ(159, default_map->LookupFirstOpcode(VCD_COPY, 15, 8)); + EXPECT_EQ(160, default_map->LookupFirstOpcode(VCD_COPY, 16, 8)); + EXPECT_EQ(161, default_map->LookupFirstOpcode(VCD_COPY, 17, 8)); + EXPECT_EQ(162, default_map->LookupFirstOpcode(VCD_COPY, 18, 8)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupFirstCopyInvalid) { + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_COPY, 3, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_COPY, 3, 3)); + EXPECT_EQ(kNoOpcode, default_map->LookupFirstOpcode(VCD_COPY, 255, 0)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondNoop) { + // The second opcode table does not store entries for NOOP instructions. + // Just make sure that a NOOP does not crash the lookup code. + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_NOOP, 0, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_NOOP, 0, 255)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_NOOP, 255, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_NOOP, 255, 255)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondAdd) { + EXPECT_EQ(247, default_map->LookupSecondOpcode(20, VCD_ADD, 1, 0)); + EXPECT_EQ(248, default_map->LookupSecondOpcode(36, VCD_ADD, 1, 0)); + EXPECT_EQ(249, default_map->LookupSecondOpcode(52, VCD_ADD, 1, 0)); + EXPECT_EQ(250, default_map->LookupSecondOpcode(68, VCD_ADD, 1, 0)); + EXPECT_EQ(251, default_map->LookupSecondOpcode(84, VCD_ADD, 1, 0)); + EXPECT_EQ(252, default_map->LookupSecondOpcode(100, VCD_ADD, 1, 0)); + EXPECT_EQ(253, default_map->LookupSecondOpcode(116, VCD_ADD, 1, 0)); + EXPECT_EQ(254, default_map->LookupSecondOpcode(132, VCD_ADD, 1, 0)); + EXPECT_EQ(255, default_map->LookupSecondOpcode(148, VCD_ADD, 1, 0)); + // Value of "mode" should not matter + EXPECT_EQ(247, default_map->LookupSecondOpcode(20, VCD_ADD, 1, 2)); + EXPECT_EQ(247, default_map->LookupSecondOpcode(20, VCD_ADD, 1, 255)); + // Only valid 2nd ADD opcode has size 1 + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_ADD, 0, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_ADD, 0, 255)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_ADD, 255, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(0, VCD_ADD, 1, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(1, VCD_ADD, 1, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(247, VCD_ADD, 1, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(255, VCD_ADD, 1, 0)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondRun) { + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(0, VCD_RUN, 0, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_RUN, 0, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_RUN, 0, 255)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_RUN, 255, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(20, VCD_RUN, 255, 255)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(255, VCD_RUN, 0, 0)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode0) { + EXPECT_EQ(163, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 0)); + EXPECT_EQ(164, default_map->LookupSecondOpcode(2, VCD_COPY, 5, 0)); + EXPECT_EQ(165, default_map->LookupSecondOpcode(2, VCD_COPY, 6, 0)); + EXPECT_EQ(166, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 0)); + EXPECT_EQ(167, default_map->LookupSecondOpcode(3, VCD_COPY, 5, 0)); + EXPECT_EQ(168, default_map->LookupSecondOpcode(3, VCD_COPY, 6, 0)); + EXPECT_EQ(169, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 0)); + EXPECT_EQ(170, default_map->LookupSecondOpcode(4, VCD_COPY, 5, 0)); + EXPECT_EQ(171, default_map->LookupSecondOpcode(4, VCD_COPY, 6, 0)); + EXPECT_EQ(172, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 0)); + EXPECT_EQ(173, default_map->LookupSecondOpcode(5, VCD_COPY, 5, 0)); + EXPECT_EQ(174, default_map->LookupSecondOpcode(5, VCD_COPY, 6, 0)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode1) { + EXPECT_EQ(175, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 1)); + EXPECT_EQ(176, default_map->LookupSecondOpcode(2, VCD_COPY, 5, 1)); + EXPECT_EQ(177, default_map->LookupSecondOpcode(2, VCD_COPY, 6, 1)); + EXPECT_EQ(178, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 1)); + EXPECT_EQ(179, default_map->LookupSecondOpcode(3, VCD_COPY, 5, 1)); + EXPECT_EQ(180, default_map->LookupSecondOpcode(3, VCD_COPY, 6, 1)); + EXPECT_EQ(181, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 1)); + EXPECT_EQ(182, default_map->LookupSecondOpcode(4, VCD_COPY, 5, 1)); + EXPECT_EQ(183, default_map->LookupSecondOpcode(4, VCD_COPY, 6, 1)); + EXPECT_EQ(184, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 1)); + EXPECT_EQ(185, default_map->LookupSecondOpcode(5, VCD_COPY, 5, 1)); + EXPECT_EQ(186, default_map->LookupSecondOpcode(5, VCD_COPY, 6, 1)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode2) { + EXPECT_EQ(187, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 2)); + EXPECT_EQ(188, default_map->LookupSecondOpcode(2, VCD_COPY, 5, 2)); + EXPECT_EQ(189, default_map->LookupSecondOpcode(2, VCD_COPY, 6, 2)); + EXPECT_EQ(190, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 2)); + EXPECT_EQ(191, default_map->LookupSecondOpcode(3, VCD_COPY, 5, 2)); + EXPECT_EQ(192, default_map->LookupSecondOpcode(3, VCD_COPY, 6, 2)); + EXPECT_EQ(193, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 2)); + EXPECT_EQ(194, default_map->LookupSecondOpcode(4, VCD_COPY, 5, 2)); + EXPECT_EQ(195, default_map->LookupSecondOpcode(4, VCD_COPY, 6, 2)); + EXPECT_EQ(196, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 2)); + EXPECT_EQ(197, default_map->LookupSecondOpcode(5, VCD_COPY, 5, 2)); + EXPECT_EQ(198, default_map->LookupSecondOpcode(5, VCD_COPY, 6, 2)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode3) { + EXPECT_EQ(199, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 3)); + EXPECT_EQ(200, default_map->LookupSecondOpcode(2, VCD_COPY, 5, 3)); + EXPECT_EQ(201, default_map->LookupSecondOpcode(2, VCD_COPY, 6, 3)); + EXPECT_EQ(202, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 3)); + EXPECT_EQ(203, default_map->LookupSecondOpcode(3, VCD_COPY, 5, 3)); + EXPECT_EQ(204, default_map->LookupSecondOpcode(3, VCD_COPY, 6, 3)); + EXPECT_EQ(205, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 3)); + EXPECT_EQ(206, default_map->LookupSecondOpcode(4, VCD_COPY, 5, 3)); + EXPECT_EQ(207, default_map->LookupSecondOpcode(4, VCD_COPY, 6, 3)); + EXPECT_EQ(208, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 3)); + EXPECT_EQ(209, default_map->LookupSecondOpcode(5, VCD_COPY, 5, 3)); + EXPECT_EQ(210, default_map->LookupSecondOpcode(5, VCD_COPY, 6, 3)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode4) { + EXPECT_EQ(211, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 4)); + EXPECT_EQ(212, default_map->LookupSecondOpcode(2, VCD_COPY, 5, 4)); + EXPECT_EQ(213, default_map->LookupSecondOpcode(2, VCD_COPY, 6, 4)); + EXPECT_EQ(214, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 4)); + EXPECT_EQ(215, default_map->LookupSecondOpcode(3, VCD_COPY, 5, 4)); + EXPECT_EQ(216, default_map->LookupSecondOpcode(3, VCD_COPY, 6, 4)); + EXPECT_EQ(217, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 4)); + EXPECT_EQ(218, default_map->LookupSecondOpcode(4, VCD_COPY, 5, 4)); + EXPECT_EQ(219, default_map->LookupSecondOpcode(4, VCD_COPY, 6, 4)); + EXPECT_EQ(220, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 4)); + EXPECT_EQ(221, default_map->LookupSecondOpcode(5, VCD_COPY, 5, 4)); + EXPECT_EQ(222, default_map->LookupSecondOpcode(5, VCD_COPY, 6, 4)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode5) { + EXPECT_EQ(223, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 5)); + EXPECT_EQ(224, default_map->LookupSecondOpcode(2, VCD_COPY, 5, 5)); + EXPECT_EQ(225, default_map->LookupSecondOpcode(2, VCD_COPY, 6, 5)); + EXPECT_EQ(226, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 5)); + EXPECT_EQ(227, default_map->LookupSecondOpcode(3, VCD_COPY, 5, 5)); + EXPECT_EQ(228, default_map->LookupSecondOpcode(3, VCD_COPY, 6, 5)); + EXPECT_EQ(229, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 5)); + EXPECT_EQ(230, default_map->LookupSecondOpcode(4, VCD_COPY, 5, 5)); + EXPECT_EQ(231, default_map->LookupSecondOpcode(4, VCD_COPY, 6, 5)); + EXPECT_EQ(232, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 5)); + EXPECT_EQ(233, default_map->LookupSecondOpcode(5, VCD_COPY, 5, 5)); + EXPECT_EQ(234, default_map->LookupSecondOpcode(5, VCD_COPY, 6, 5)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode6) { + EXPECT_EQ(235, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 6)); + EXPECT_EQ(236, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 6)); + EXPECT_EQ(237, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 6)); + EXPECT_EQ(238, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 6)); + EXPECT_EQ(239, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 7)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode7) { + EXPECT_EQ(240, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 7)); + EXPECT_EQ(241, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 7)); + EXPECT_EQ(242, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 7)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyMode8) { + EXPECT_EQ(243, default_map->LookupSecondOpcode(2, VCD_COPY, 4, 8)); + EXPECT_EQ(244, default_map->LookupSecondOpcode(3, VCD_COPY, 4, 8)); + EXPECT_EQ(245, default_map->LookupSecondOpcode(4, VCD_COPY, 4, 8)); + EXPECT_EQ(246, default_map->LookupSecondOpcode(5, VCD_COPY, 4, 8)); +} + +TEST_F(InstructionMapTest, DefaultMapLookupSecondCopyInvalid) { + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(2, VCD_COPY, 0, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(2, VCD_COPY, 255, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(2, VCD_COPY, 255, 255)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(0, VCD_COPY, 4, 0)); + EXPECT_EQ(kNoOpcode, default_map->LookupSecondOpcode(255, VCD_COPY, 4, 0)); +} + +TEST_F(InstructionMapTest, ExerciseTableLookup) { + int opcode = 0; + // This loop has the same bounds as the one in SetUpTestCase. + // Look up each instruction type and make sure it returns + // the proper opcode. + for (unsigned char inst_mode1 = 0; + inst_mode1 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode1) { + unsigned char inst1 = inst_mode1; + unsigned char mode1 = 0; + if (inst_mode1 > VCD_COPY) { + inst1 = VCD_COPY; + mode1 = inst_mode1 - VCD_COPY; + } + for (unsigned char inst_mode2 = 0; + inst_mode2 <= VCD_LAST_INSTRUCTION_TYPE + kLastExerciseMode; + ++inst_mode2) { + unsigned char inst2 = inst_mode2; + unsigned char mode2 = 0; + if (inst_mode2 > VCD_COPY) { + inst2 = VCD_COPY; + mode2 = inst_mode2 - VCD_COPY; + } + if (inst2 == VCD_NOOP) { + VerifyExerciseFirstInstruction(opcode, inst1, 0, mode1); + VerifyExerciseFirstInstruction(opcode + 2, + inst1, + ((inst1 == VCD_NOOP) ? 0 : 255), + mode1); + } else if (inst1 != VCD_NOOP) { + VerifyExerciseSecondInstruction(opcode, + inst1, + 0, + mode1, + inst2, + 0, + mode2); + VerifyExerciseSecondInstruction(opcode + 1, + inst1, + 0, + mode1, + inst2, + 255, + mode2); + VerifyExerciseSecondInstruction(opcode + 2, + inst1, + 255, + mode1, + inst2, + 0, + mode2); + VerifyExerciseSecondInstruction(opcode + 3, + inst1, + 255, + mode1, + inst2, + 255, + mode2); + } + opcode += 4; + } + } + // This is a CHECK rather than an EXPECT because it validates only + // the logic of the test, not of the code being tested. + CHECK_EQ(VCDiffCodeTableData::kCodeTableSize, opcode); +} + +} // unnamed namespace +} // namespace open_vcdiff diff --git a/src/logging.cc b/src/logging.cc new file mode 100644 index 0000000..cdcc42b --- /dev/null +++ b/src/logging.cc @@ -0,0 +1,26 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "logging.h" + +namespace open_vcdiff { + +bool g_fatal_error_occurred = false; + +static void DefaultExitFatal() { exit(1); } +void (*ExitFatal)() = &DefaultExitFatal; + +} // namespace open_vcdiff diff --git a/src/logging.h b/src/logging.h new file mode 100644 index 0000000..4b7dff6 --- /dev/null +++ b/src/logging.h @@ -0,0 +1,66 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_LOGGING_H_ +#define OPEN_VCDIFF_LOGGING_H_ + +#include <config.h> +#include <iostream> +#include <vector> + +// Windows API defines ERROR +#ifdef ERROR +#undef ERROR +#endif // ERROR + +namespace open_vcdiff { + +enum LogLevel { + INFO, + WARNING, + ERROR, + FATAL +}; + +#ifndef NDEBUG +#define DFATAL FATAL +#else // NDEBUG +#define DFATAL ERROR +#endif // !NDEBUG + +extern bool g_fatal_error_occurred; +extern void (*ExitFatal)(); + +inline std::ostream& LogMessage(LogLevel level, const char* level_name) { + if (level == FATAL) { + g_fatal_error_occurred = true; + } + return std::cerr << level_name << ": "; +} + +inline void CheckFatalError() { + if (g_fatal_error_occurred) { + g_fatal_error_occurred = false; + (*ExitFatal)(); + } +} + +} // namespace open_vcdiff + +#define LOG(level) LogMessage(open_vcdiff::level, #level) +#define LOG_ENDL std::endl; \ + open_vcdiff::CheckFatalError(); + +#endif // OPEN_VCDIFF_LOGGING_H_ diff --git a/src/output_string_crope.h b/src/output_string_crope.h new file mode 100644 index 0000000..555f407 --- /dev/null +++ b/src/output_string_crope.h @@ -0,0 +1,40 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Contains a class that demonstrates how to specialize OutputString for the +// crope type, so that cropes can be passed as output arguments to the encoder +// and decoder. + +#ifndef OPEN_VCDIFF_OUTPUT_STRING_CROPE_H_ +#define OPEN_VCDIFF_OUTPUT_STRING_CROPE_H_ +#include <config.h> +#ifdef HAVE_EXT_ROPE +#include <ext/rope> +#include "google/output_string.h" + +namespace open_vcdiff { + +// *** OutputString interface for crope (OutputCrope) + +// crope::reserve(), if defined, does nothing +template <> +void OutputString<__gnu_cxx::crope>::ReserveAdditionalBytes( + size_t /*res_arg*/) { } + +typedef OutputString<__gnu_cxx::crope> OutputCrope; + +} // namespace open_vcdiff +#endif // HAVE_EXT_ROPE +#endif // OPEN_VCDIFF_OUTPUT_STRING_CROPE_H_ diff --git a/src/output_string_test.cc b/src/output_string_test.cc new file mode 100644 index 0000000..86df075 --- /dev/null +++ b/src/output_string_test.cc @@ -0,0 +1,117 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include <string> +#include "google/output_string.h" +#include "testing.h" + +#ifdef HAVE_EXT_ROPE +#include <ext/rope> +#include "output_string_crope.h" +#endif // HAVE_EXT_ROPE + +namespace open_vcdiff { + +namespace { + +using std::string; +#ifdef HAVE_EXT_ROPE +using __gnu_cxx::crope; +#endif // HAVE_EXT_ROPE + +class OutputStringTest : public testing::Test { + public: + OutputStringTest() : string_("ab"), output_string_(&string_) { } + + virtual ~OutputStringTest() { } + + protected: + string string_; + OutputString<string> output_string_; +}; + +TEST_F(OutputStringTest, Append) { + output_string_.append("cdef", 2); + EXPECT_EQ("abcd", string_); +} + +TEST_F(OutputStringTest, Clear) { + output_string_.clear(); + EXPECT_EQ("", string_); +} + +TEST_F(OutputStringTest, PushBack) { + output_string_.push_back('c'); + EXPECT_EQ("abc", string_); +} + +TEST_F(OutputStringTest, Reserve) { + const size_t initial_capacity = string_.capacity(); + string_.resize(string_.capacity()); + EXPECT_EQ(initial_capacity, string_.capacity()); + output_string_.ReserveAdditionalBytes(1); + EXPECT_LE(initial_capacity + 1, string_.capacity()); +} + +TEST_F(OutputStringTest, Size) { + EXPECT_EQ(string_.size(), output_string_.size()); + string_.push_back('c'); + EXPECT_EQ(string_.size(), output_string_.size()); + string_.clear(); + EXPECT_EQ(string_.size(), output_string_.size()); +} + +#ifdef HAVE_EXT_ROPE +class OutputCRopeTest : public testing::Test { + public: + OutputCRopeTest() : crope_("ab"), output_crope_(&crope_) { } + + virtual ~OutputCRopeTest() { } + + protected: + crope crope_; + OutputCrope output_crope_; +}; + +TEST_F(OutputCRopeTest, Append) { + output_crope_.append("cdef", 2); + crope expected_abcd("abcd"); + EXPECT_EQ(expected_abcd, crope_); +} + +TEST_F(OutputCRopeTest, Clear) { + output_crope_.clear(); + crope expected_empty; + EXPECT_EQ(expected_empty, crope_); +} + +TEST_F(OutputCRopeTest, PushBack) { + output_crope_.push_back('c'); + crope expected_abc("abc"); + EXPECT_EQ(expected_abc, crope_); +} + +TEST_F(OutputCRopeTest, Size) { + EXPECT_EQ(crope_.size(), output_crope_.size()); + crope_.push_back('c'); + EXPECT_EQ(crope_.size(), output_crope_.size()); + crope_.clear(); + EXPECT_EQ(crope_.size(), output_crope_.size()); +} +#endif // HAVE_EXT_ROPE + +} // anonymous namespace +} // namespace open_vcdiff diff --git a/src/rolling_hash.h b/src/rolling_hash.h new file mode 100644 index 0000000..c757c83 --- /dev/null +++ b/src/rolling_hash.h @@ -0,0 +1,237 @@ +// Copyright 2007, 2008 Google Inc. +// Authors: Jeff Dean, Sanjay Ghemawat, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_ROLLING_HASH_H_ +#define OPEN_VCDIFF_ROLLING_HASH_H_ + +#include <config.h> +#include <stdint.h> // uint32_t +#include "logging.h" + +namespace open_vcdiff { + +// Rabin-Karp hasher module -- this is a faster version with different +// constants, so it's not quite Rabin-Karp fingerprinting, but its behavior is +// close enough for most applications. + +// Definitions common to all hash window sizes. +class RollingHashUtil { + public: + // Multiplier for incremental hashing. The compiler should be smart enough to + // convert (val * kMult) into ((val << 8) + val). + static const uint32_t kMult = 257; + + // All hashes are returned modulo "kBase". Current implementation requires + // kBase <= 2^32/kMult to avoid overflow. Also, kBase must be a power of two + // so that we can compute modulus efficiently. + static const uint32_t kBase = (1 << 23); + + // Returns operand % kBase, assuming that kBase is a power of two. + static inline uint32_t ModBase(uint32_t operand) { + return operand & (kBase - 1); + } + + // Given an unsigned integer "operand", returns an unsigned integer "result" + // such that + // result < kBase + // and + // ModBase(operand + result) == 0 + static inline uint32_t FindModBaseInverse(uint32_t operand) { + // The subtraction (0 - operand) produces an unsigned underflow for any + // operand except 0. The underflow results in a (very large) unsigned + // number. Binary subtraction is used instead of unary negation because + // some compilers (e.g. Visual Studio 7+) produce a warning if an unsigned + // value is negated. + // + // The C++ mod operation (operand % kBase) may produce different results for + // different compilers if operand is negative. That is not a problem in + // this case, since all numbers used are unsigned, and ModBase does its work + // using bitwise arithmetic rather than the % operator. + return ModBase(uint32_t(0) - operand); + } + + // Here's the heart of the hash algorithm. Start with a partial_hash value of + // 0, and run this HashStep once against each byte in the data window to be + // hashed. The result will be the hash value for the entire data window. The + // Hash() function, below, does exactly this, albeit with some refinements. + static inline uint32_t HashStep(uint32_t partial_hash, + unsigned char next_byte) { + return ModBase((partial_hash * kMult) + next_byte); + } + + // Use this function to start computing a new hash value based on the first + // two bytes in the window. It is equivalent to calling + // HashStep(HashStep(0, ptr[0]), ptr[1]) + // but takes advantage of the fact that the maximum value of + // (ptr[0] * kMult) + ptr[1] is not large enough to exceed kBase, thus + // avoiding an unnecessary ModBase operation. + static inline uint32_t HashFirstTwoBytes(const char* ptr) { + return (static_cast<unsigned char>(ptr[0]) * kMult) + + static_cast<unsigned char>(ptr[1]); + } + private: + // Making these private avoids copy constructor and assignment operator. + // No objects of this type should be constructed. + RollingHashUtil(); + RollingHashUtil(const RollingHashUtil&); // NOLINT + void operator=(const RollingHashUtil&); +}; + +// window_size must be >= 2. +template<int window_size> +class RollingHash { + public: + // Perform global initialization that is required in order to instantiate a + // RollingHash. This function *must* be called (preferably on startup) by any + // program that uses a RollingHash. It is harmless to call this function more + // than once. It is not thread-safe, but calling it from two different + // threads at the same time can only cause a memory leak, not incorrect + // behavior. Make sure to call it before spawning any threads that could use + // RollingHash. The function returns true if initialization succeeds, or + // false if initialization fails, in which case the caller should not proceed + // to construct any objects of type RollingHash. + static bool Init(); + + // Initialize hasher to maintain a window of the specified size. You need an + // instance of this type to use UpdateHash(), but Hash() does not depend on + // remove_table_, so it is static. + RollingHash() { + if (!remove_table_) { + LOG(DFATAL) << "RollingHash object instantiated" + " before calling RollingHash::Init()" << LOG_ENDL; + } + } + + // Compute a hash of the window "ptr[0, window_size - 1]". + static uint32_t Hash(const char* ptr) { + uint32_t h = RollingHashUtil::HashFirstTwoBytes(ptr); + for (int i = 2; i < window_size; ++i) { + h = RollingHashUtil::HashStep(h, ptr[i]); + } + return h; + } + + // Update a hash by removing the oldest byte and adding a new byte. + // + // UpdateHash takes the hash value of buffer[0] ... buffer[window_size -1] + // along with the value of buffer[0] (the "old_first_byte" argument) + // and the value of buffer[window_size] (the "new_last_byte" argument). + // It quickly computes the hash value of buffer[1] ... buffer[window_size] + // without having to run Hash() on the entire window. + // + // The larger the window, the more advantage comes from using UpdateHash() + // (which runs in time independent of window_size) instead of Hash(). + // Each time window_size doubles, the time to execute Hash() also doubles, + // while the time to execute UpdateHash() remains constant. Empirical tests + // have borne out this statement. + uint32_t UpdateHash(uint32_t old_hash, + const char old_first_byte, + const char new_last_byte) const { + uint32_t partial_hash = RemoveFirstByteFromHash(old_hash, old_first_byte); + return RollingHashUtil::HashStep(partial_hash, new_last_byte); + } + + protected: + // Given a full hash value for buffer[0] ... buffer[window_size -1], plus the + // value of the first byte buffer[0], this function returns a *partial* hash + // value for buffer[1] ... buffer[window_size -1]. See the comments in + // Init(), below, for a description of how the contents of remove_table_ are + // computed. + static uint32_t RemoveFirstByteFromHash(uint32_t full_hash, + unsigned char first_byte) { + return RollingHashUtil::ModBase(full_hash + remove_table_[first_byte]); + } + + private: + // We keep a table that maps from any byte "b" to + // (- b * pow(kMult, window_size - 1)) % kBase + static const uint32_t* remove_table_; +}; + +// For each window_size, fill a 256-entry table such that +// the hash value of buffer[0] ... buffer[window_size - 1] +// + remove_table_[buffer[0]] +// == the hash value of buffer[1] ... buffer[window_size - 1] +// See the comments in Init(), below, for a description of how the contents of +// remove_table_ are computed. +template<int window_size> +const uint32_t* RollingHash<window_size>::remove_table_ = NULL; + +// Init() checks to make sure that the static object remove_table_ has been +// initialized; if not, it does the considerable work of populating it. Once +// it's ready, the table can be used for any number of RollingHash objects of +// the same window_size. +// +template<int window_size> +bool RollingHash<window_size>::Init() { + if (window_size < 2) { + LOG(ERROR) << "RollingHash window size " << window_size + << " is too small" << LOG_ENDL; + return false; + } + if (remove_table_ == NULL) { + // The new object is placed into a local pointer instead of directly into + // remove_table_, for two reasons: + // 1. remove_table_ is a pointer to const. The table is populated using + // the non-const local pointer and then assigned to the global const + // pointer once it's ready. + // 2. No other thread will ever see remove_table_ pointing to a + // partially-initialized table. If two threads happen to call Init() + // at the same time, two tables with the same contents may be created + // (causing a memory leak), but the results will be consistent + // no matter which of the two tables is used. + uint32_t* new_remove_table = new uint32_t[256]; + // Compute multiplier. Concisely, it is: + // pow(kMult, (window_size - 1)) % kBase, + // but we compute the power in integer form. + uint32_t multiplier = 1; + for (int i = 0; i < window_size - 1; ++i) { + multiplier = + RollingHashUtil::ModBase(multiplier * RollingHashUtil::kMult); + } + // For each character removed_byte, compute + // remove_table_[removed_byte] == + // (- (removed_byte * pow(kMult, (window_size - 1)))) % kBase + // where the power operator "pow" is taken in integer form. + // + // If you take a hash value fp representing the hash of + // buffer[0] ... buffer[window_size - 1] + // and add the value of remove_table_[buffer[0]] to it, the result will be + // a partial hash value for + // buffer[1] ... buffer[window_size - 1] + // that is to say, it no longer includes buffer[0]. + // + // The following byte at buffer[window_size] can then be merged with this + // partial hash value to arrive quickly at the hash value for a window that + // has advanced by one byte, to + // buffer[1] ... buffer[window_size] + // In fact, that is precisely what happens in UpdateHash, above. + uint32_t byte_times_multiplier = 0; + for (int removed_byte = 0; removed_byte < 256; ++removed_byte) { + new_remove_table[removed_byte] = + RollingHashUtil::FindModBaseInverse(byte_times_multiplier); + // Iteratively adding the multiplier in this loop is equivalent to + // computing (removed_byte * multiplier), and is faster + byte_times_multiplier = + RollingHashUtil::ModBase(byte_times_multiplier + multiplier); + } + remove_table_ = new_remove_table; + } + return true; +} + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_ROLLING_HASH_H_ diff --git a/src/rolling_hash_test.cc b/src/rolling_hash_test.cc new file mode 100644 index 0000000..1a7da9a --- /dev/null +++ b/src/rolling_hash_test.cc @@ -0,0 +1,231 @@ +// Copyright 2007 Google Inc. +// Authors: Jeff Dean, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "rolling_hash.h" +#include <stdint.h> // uint32_t +#include <cstdlib> // rand, srand +#include <vector> +#include "testing.h" + +namespace open_vcdiff { +namespace { + +static const uint32_t kBase = RollingHashUtil::kBase; + +class RollingHashSimpleTest : public testing::Test { + protected: + RollingHashSimpleTest() { } + virtual ~RollingHashSimpleTest() { } + + void TestModBase(uint32_t operand) { + EXPECT_EQ(operand % kBase, RollingHashUtil::ModBase(operand)); + EXPECT_EQ(static_cast<uint32_t>((-static_cast<int32_t>(operand)) % kBase), + RollingHashUtil::FindModBaseInverse(operand)); + EXPECT_EQ(0U, RollingHashUtil::ModBase( + operand + RollingHashUtil::FindModBaseInverse(operand))); + } + + void TestHashFirstTwoBytes(char first_value, char second_value) { + char buf[2]; + buf[0] = first_value; + buf[1] = second_value; + EXPECT_EQ(RollingHashUtil::HashFirstTwoBytes(buf), + RollingHashUtil::HashStep(RollingHashUtil::HashStep(0, + first_value), + second_value)); + EXPECT_EQ(RollingHashUtil::HashFirstTwoBytes(buf), + RollingHashUtil::HashStep(static_cast<unsigned char>(first_value), + second_value)); + } +}; + +#ifdef GTEST_HAS_DEATH_TEST +typedef RollingHashSimpleTest RollingHashDeathTest; +#endif // GTEST_HAS_DEATH_TEST + +TEST_F(RollingHashSimpleTest, KBaseIsAPowerOfTwo) { + EXPECT_EQ(0U, kBase & (kBase - 1)); +} + +TEST_F(RollingHashSimpleTest, TestModBaseForValues) { + TestModBase(0); + TestModBase(10); + TestModBase(static_cast<uint32_t>(-10)); + TestModBase(kBase - 1); + TestModBase(kBase); + TestModBase(kBase + 1); + TestModBase(0x7FFFFFFF); + TestModBase(0x80000000); + TestModBase(0xFFFFFFFE); + TestModBase(0xFFFFFFFF); +} + +TEST_F(RollingHashSimpleTest, VerifyHashFirstTwoBytes) { + TestHashFirstTwoBytes(0x00, 0x00); + TestHashFirstTwoBytes(0x00, 0xFF); + TestHashFirstTwoBytes(0xFF, 0x00); + TestHashFirstTwoBytes(0xFF, 0xFF); + TestHashFirstTwoBytes(0x00, 0x80); + TestHashFirstTwoBytes(0x7F, 0xFF); + TestHashFirstTwoBytes(0x7F, 0x80); + TestHashFirstTwoBytes(0x01, 0x8F); +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(RollingHashDeathTest, InstantiateBlockHashWithoutCallingInit) { + EXPECT_DEBUG_DEATH(RollingHash<16> bad_hash, "Init"); +} +#endif // GTEST_HAS_DEATH_TEST + +class RollingHashTest : public testing::Test { + public: + static const int kUpdateHashBlocks = 1000; + static const int kLargestBlockSize = 128; + + static void MakeRandomBuffer(char* buffer, int buffer_size) { + for (int i = 0; i < buffer_size; ++i) { + buffer[i] = PortableRandomInRange<unsigned char>(0xFF); + } + } + + template<int kBlockSize> static void BM_DefaultHash(int iterations, + const char *buffer) { + RollingHash<kBlockSize> hasher; + static uint32_t result_array[kUpdateHashBlocks]; + for (int iter = 0; iter < iterations; ++iter) { + for (int i = 0; i < kUpdateHashBlocks; ++i) { + result_array[i] = hasher.Hash(&buffer[i]); + } + } + } + + template<int kBlockSize> static void BM_UpdateHash(int iterations, + const char *buffer) { + RollingHash<kBlockSize> hasher; + static uint32_t result_array[kUpdateHashBlocks]; + for (int iter = 0; iter < iterations; ++iter) { + uint32_t running_hash = hasher.Hash(buffer); + for (int i = 0; i < kUpdateHashBlocks; ++i) { + running_hash = hasher.UpdateHash(running_hash, + buffer[i], + buffer[i + kBlockSize]); + result_array[i] = running_hash; + } + } + } + + protected: + static const int kUpdateHashTestIterations = 400; + static const int kTimingTestSize = 1 << 14; // 16K iterations + + RollingHashTest() { } + virtual ~RollingHashTest() { } + + template<int kBlockSize> void UpdateHashMatchesHashForBlockSize() { + RollingHash<kBlockSize>::Init(); + RollingHash<kBlockSize> hasher; + for (int x = 0; x < kUpdateHashTestIterations; ++x) { + int random_buffer_size = + PortableRandomInRange(kUpdateHashBlocks - 1) + kBlockSize; + MakeRandomBuffer(buffer_, random_buffer_size); + uint32_t running_hash = hasher.Hash(buffer_); + for (int i = kBlockSize; i < random_buffer_size; ++i) { + // UpdateHash() calculates the hash value incrementally. + running_hash = hasher.UpdateHash(running_hash, + buffer_[i - kBlockSize], + buffer_[i]); + // Hash() calculates the hash value from scratch. Verify that both + // methods return the same hash value. + EXPECT_EQ(running_hash, hasher.Hash(&buffer_[i + 1 - kBlockSize])); + } + } + } + + template<int kBlockSize> double DefaultHashTimingTest() { + // Execution time is expected to be O(kBlockSize) per hash operation, + // so scale the number of iterations accordingly + const int kTimingTestIterations = kTimingTestSize / kBlockSize; + CycleTimer timer; + timer.Start(); + BM_DefaultHash<kBlockSize>(kTimingTestIterations, buffer_); + timer.Stop(); + return static_cast<double>(timer.GetInUsec()) + / (kTimingTestIterations * kUpdateHashBlocks); + } + + template<int kBlockSize> double RollingTimingTest() { + // Execution time is expected to be O(1) per hash operation, + // so leave the number of iterations constant + const int kTimingTestIterations = kTimingTestSize; + CycleTimer timer; + timer.Start(); + BM_UpdateHash<kBlockSize>(kTimingTestIterations, buffer_); + timer.Stop(); + return static_cast<double>(timer.GetInUsec()) + / (kTimingTestIterations * kUpdateHashBlocks); + } + + double FindPercentage(double original, double modified) { + if (original < 0.0001) { + return 0.0; + } else { + return ((modified - original) / original) * 100.0; + } + } + + template<int kBlockSize> void RunTimingTestForBlockSize() { + RollingHash<kBlockSize>::Init(); + MakeRandomBuffer(buffer_, sizeof(buffer_)); + const double time_for_default_hash = DefaultHashTimingTest<kBlockSize>(); + const double time_for_rolling_hash = RollingTimingTest<kBlockSize>(); + printf("%d\t%f\t%f (%f%%)\n", + kBlockSize, + time_for_default_hash, + time_for_rolling_hash, + FindPercentage(time_for_default_hash, time_for_rolling_hash)); + CHECK_GT(time_for_default_hash, 0.0); + CHECK_GT(time_for_rolling_hash, 0.0); + if (kBlockSize > 16) { + EXPECT_GT(time_for_default_hash, time_for_rolling_hash); + } + } + + char buffer_[kUpdateHashBlocks + kLargestBlockSize]; +}; + +TEST_F(RollingHashTest, UpdateHashMatchesHashFromScratch) { + srand(1); // test should be deterministic, including calls to rand() + UpdateHashMatchesHashForBlockSize<4>(); + UpdateHashMatchesHashForBlockSize<8>(); + UpdateHashMatchesHashForBlockSize<16>(); + UpdateHashMatchesHashForBlockSize<32>(); + UpdateHashMatchesHashForBlockSize<64>(); + UpdateHashMatchesHashForBlockSize<128>(); +} + +TEST_F(RollingHashTest, TimingTests) { + srand(1); // test should be deterministic, including calls to rand() + printf("BlkSize\tHash (us)\tUpdateHash (us)\n"); + RunTimingTestForBlockSize<4>(); + RunTimingTestForBlockSize<8>(); + RunTimingTestForBlockSize<16>(); + RunTimingTestForBlockSize<32>(); + RunTimingTestForBlockSize<64>(); + RunTimingTestForBlockSize<128>(); +} + +} // anonymous namespace +} // namespace open_vcdiff diff --git a/src/testing.h b/src/testing.h new file mode 100644 index 0000000..205871b --- /dev/null +++ b/src/testing.h @@ -0,0 +1,180 @@ +// Copyright 2008 Google Inc. +// Authors: Craig Silverstein, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_TESTING_H_ +#define OPEN_VCDIFF_TESTING_H_ + +#include <config.h> +#include <stdint.h> // int64_t, uint64_t +#include <time.h> // gettimeofday +#include <cassert> +#include <cstdlib> // rand +#include "gtest/gtest.h" + +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> // struct timeval +#endif // HAVE_SYS_TIME_H + +#ifdef HAVE_WINDOWS_H +#include <windows.h> // QueryPerformanceCounter +#endif // HAVE_WINDOWS_H + +// CHECK is used for assertions that verify the consistency of the test itself, +// rather than correctness of the code that is being tested. +// +// It is better to use a preprocessor macro for CHECK +// than an inline function, because assert() may report +// the source file and line where the failure occurred. +// +// Putting parentheses around the macro arguments +// (e.g. "assert((X) == (Y))") would be good practice +// but would produce error messages that are inconsistent +// with those expected in the unit tests. + +#define CHECK(CONDITION) assert(CONDITION) +#define CHECK_EQ(X, Y) assert(X == Y) +#define CHECK_NE(X, Y) assert(X != Y) +#define CHECK_GE(X, Y) assert(X >= Y) +#define CHECK_GT(X, Y) assert(X > Y) +#define CHECK_LE(X, Y) assert(X <= Y) +#define CHECK_LT(X, Y) assert(X < Y) + +namespace open_vcdiff { + +// Support for timing tests +#if defined(HAVE_GETTIMEOFDAY) +class CycleTimer { + public: + inline CycleTimer() { + Reset(); + } + + inline void Reset() { + start_time_.tv_sec = 0; + start_time_.tv_usec = 0; + cumulative_time_in_usec_ = 0; + } + + inline void Start() { + CHECK(!IsStarted()); + gettimeofday(&start_time_, NULL); + } + + inline void Restart() { + Reset(); + Start(); + } + + inline void Stop() { + struct timeval end_time; + gettimeofday(&end_time, NULL); + CHECK(IsStarted()); + cumulative_time_in_usec_ += + (1000000 * (end_time.tv_sec - start_time_.tv_sec)) + + end_time.tv_usec - start_time_.tv_usec; + start_time_.tv_sec = 0; + start_time_.tv_usec = 0; + } + + inline int64_t GetInUsec() { + return cumulative_time_in_usec_; + } + + private: + inline bool IsStarted() { + return (start_time_.tv_usec > 0) || (start_time_.tv_sec > 0); + } + + struct timeval start_time_; + int64_t cumulative_time_in_usec_; +}; +#elif defined(HAVE_QUERYPERFORMANCECOUNTER) +class CycleTimer { + public: + inline CycleTimer() { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); // counts per second + usecs_per_count_ = 1000000.0 / static_cast<double>(frequency.QuadPart); + Reset(); + } + + inline void Reset() { + start_time_.QuadPart = 0; + cumulative_time_in_usec_ = 0; + } + + inline void Start() { + CHECK(!IsStarted()); + QueryPerformanceCounter(&start_time_); + } + + inline void Restart() { + Reset(); + Start(); + } + + inline void Stop() { + LARGE_INTEGER end_time; + QueryPerformanceCounter(&end_time); + CHECK(IsStarted()); + double count_diff = static_cast<double>( + end_time.QuadPart - start_time_.QuadPart); + cumulative_time_in_usec_ += + static_cast<int64_t>(count_diff * usecs_per_count_); + start_time_.QuadPart = 0; + } + + inline int64_t GetInUsec() { + return cumulative_time_in_usec_; + } + + private: + inline bool IsStarted() { + return start_time_.QuadPart > 0; + } + + LARGE_INTEGER start_time_; + int64_t cumulative_time_in_usec_; + double usecs_per_count_; +}; +#else +#error CycleTimer needs an implementation that does not use gettimeofday or QueryPerformanceCounter +#endif // HAVE_GETTIMEOFDAY + +// This function returns a pseudo-random value of type IntType between 0 and +// limit. It uses the standard rand() function to produce the value, and makes +// as many calls to rand() as needed to ensure that the values returned can fall +// within the full range specified. It is slow, so don't include calls to this +// function when calculating the execution time of tests. +// +template<typename IntType> +inline IntType PortableRandomInRange(IntType limit) { + uint64_t value = rand(); + double rand_limit = RAND_MAX; // The maximum possible value + while (rand_limit < limit) { + // value is multiplied by (RAND_MAX + 1) each iteration. This factor will be + // canceled out when we divide by rand_limit to get scaled_value, below. + value = (value * (static_cast<uint64_t>(RAND_MAX) + 1)) + rand(); + rand_limit = (rand_limit * (RAND_MAX + 1.0)) + RAND_MAX; + } + // Translate the random 64-bit integer into a floating-point value between + // 0.0 (inclusive) and 1.0 (inclusive). + const double scaled_value = value / rand_limit; + return static_cast<IntType>(limit * scaled_value); +} + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_TESTING_H_ diff --git a/src/varint_bigendian.cc b/src/varint_bigendian.cc new file mode 100644 index 0000000..4dbf7bb --- /dev/null +++ b/src/varint_bigendian.cc @@ -0,0 +1,187 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "varint_bigendian.h" +#include <stdint.h> // int32_t, int64_t +#include <string> +#include "logging.h" +#include "google/output_string.h" + +namespace open_vcdiff { + +using std::string; + +template<> const int32_t VarintBE<int32_t>::kMaxVal = 0x7FFFFFFF; +template<> const int64_t VarintBE<int64_t>::kMaxVal = 0x7FFFFFFFFFFFFFFFULL; + +// Check whether the variable-length integer runs off the end +// of the available input. If there are few bytes left in the input, +// then AND together their most significant bits (the continuation bits) +// and see if they are all set; if so, the integer runs off the end of the +// available input. Only kMaxBytes - 1 bytes need be checked; if there are +// kMaxBytes bytes with their continuation bits set, it is an error condition +// and it will be caught later. +// +// The function is specific to a particular integer type (number of bits.) +// +template <> +inline bool VarintBE<int32_t>::ReachedEndOfData(const char* parse_ptr, + const char* limit) { + unsigned char reached_end_of_data_flag = 0x80; + const unsigned char* const bitwise_ptr = + reinterpret_cast<const unsigned char*>(parse_ptr); + switch (limit - parse_ptr) { + case 4: reached_end_of_data_flag &= bitwise_ptr[3]; + // fall through + case 3: reached_end_of_data_flag &= bitwise_ptr[2]; + // fall through + case 2: reached_end_of_data_flag &= bitwise_ptr[1]; + // fall through + case 1: reached_end_of_data_flag &= bitwise_ptr[0]; + return (reached_end_of_data_flag == 0x80); + case 0: return true; + } + return false; +} + +template <> +inline bool VarintBE<int64_t>::ReachedEndOfData(const char* parse_ptr, + const char* limit) { + unsigned char reached_end_of_data_flag = 0x80; + const unsigned char* const bitwise_ptr = + reinterpret_cast<const unsigned char*>(parse_ptr); + switch (limit - parse_ptr) { + case 8: reached_end_of_data_flag &= bitwise_ptr[7]; + // fall through + case 7: reached_end_of_data_flag &= bitwise_ptr[6]; + // fall through + case 6: reached_end_of_data_flag &= bitwise_ptr[5]; + // fall through + case 5: reached_end_of_data_flag &= bitwise_ptr[4]; + // fall through + case 4: reached_end_of_data_flag &= bitwise_ptr[3]; + // fall through + case 3: reached_end_of_data_flag &= bitwise_ptr[2]; + // fall through + case 2: reached_end_of_data_flag &= bitwise_ptr[1]; + // fall through + case 1: reached_end_of_data_flag &= bitwise_ptr[0]; + return (reached_end_of_data_flag == 0x80); + case 0: return true; + } + return false; +} + +// Reads a variable-length integer from **varint_ptr +// and returns it in a fixed-length representation. Increments +// *varint_ptr by the number of bytes read. Will only read +// a maximum of kMaxBytes bytes from **varint_ptr even if the input data +// has the continuation bit set for more bytes. Will not read +// past limit. Returns RESULT_ERROR if the value parsed +// does not fit in a non-negative signed integer. +// Returns RESULT_END_OF_DATA if address_stream_end is reached +// before the whole integer can be decoded. +// +template <typename SignedIntegerType> +SignedIntegerType VarintBE<SignedIntegerType>::Parse(const char* limit, + const char** varint_ptr) { + const char* parse_ptr = *varint_ptr; + if (ReachedEndOfData(parse_ptr, limit)) { + return RESULT_END_OF_DATA; + } + SignedIntegerType result = *parse_ptr & 0x7F; + while (*parse_ptr & 0x80) { + if (result > (kMaxVal >> 7)) { + // Shifting result by 7 bits would produce a number too large + // to be stored in a non-negative SignedIntegerType (an overflow.) + return RESULT_ERROR; + } + ++parse_ptr; + result = (result << 7) + (*parse_ptr & 0x7F); + } + *varint_ptr = parse_ptr + 1; + return result; +} + +template <typename SignedIntegerType> +int VarintBE<SignedIntegerType>::EncodeInternal(SignedIntegerType v, + char* varint_buf) { + if (v < 0) { + LOG(DFATAL) << "Negative value " << v + << " passed to VarintBE::EncodeInternal," + " which requires non-negative argument" << LOG_ENDL; + return 0; + } + int length = 1; + char* buf_ptr = &varint_buf[kMaxBytes - 1]; + *buf_ptr = static_cast<char>(v & 0x7F); + --buf_ptr; + v >>= 7; + while (v) { + *buf_ptr = static_cast<char>((v & 0x7F) | 0x80); // add continuation bit + --buf_ptr; + ++length; + v >>= 7; + } + return length; +} + +template <typename SignedIntegerType> +int VarintBE<SignedIntegerType>::Encode(SignedIntegerType v, char* ptr) { + char varint_buf[kMaxBytes]; + const int length = EncodeInternal(v, varint_buf); + memcpy(ptr, &varint_buf[kMaxBytes - length], length); + return length; +} + +template <typename SignedIntegerType> +void VarintBE<SignedIntegerType>::AppendToString(SignedIntegerType value, + string* s) { + char varint_buf[kMaxBytes]; + const int length = EncodeInternal(value, varint_buf); + s->append(&varint_buf[kMaxBytes - length], length); +} + +template <typename SignedIntegerType> +void VarintBE<SignedIntegerType>::AppendToOutputString( + SignedIntegerType value, + OutputStringInterface* output_string) { + char varint_buf[kMaxBytes]; + const int length = EncodeInternal(value, varint_buf); + output_string->append(&varint_buf[kMaxBytes - length], length); +} + +// Returns the encoding length of the specified value. +template <typename SignedIntegerType> +int VarintBE<SignedIntegerType>::Length(SignedIntegerType v) { + if (v < 0) { + LOG(DFATAL) << "Negative value " << v + << " passed to VarintBE::Length," + " which requires non-negative argument" << LOG_ENDL; + return 0; + } + int length = 0; + do { + v >>= 7; + ++length; + } while (v); + return length; +} + +template class VarintBE<int32_t>; +template class VarintBE<int64_t>; + +} // namespace open_vcdiff diff --git a/src/varint_bigendian.h b/src/varint_bigendian.h new file mode 100644 index 0000000..34e5035 --- /dev/null +++ b/src/varint_bigendian.h @@ -0,0 +1,140 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_VARINT_BIGENDIAN_H_ +#define OPEN_VCDIFF_VARINT_BIGENDIAN_H_ + +// Functions for manipulating variable-length integers as described in +// RFC 3284, section 2. (See http://www.ietf.org/rfc/rfc3284.txt) +// This is the same format used by the Sfio library +// and by the public-domain Sqlite package. +// +// The implementation found in this file contains buffer bounds checks +// (not available in sqlite) and its goal is to improve speed +// by using as few test-and-branch instructions as possible. +// +// The Sqlite format has the refinement that, if a 64-bit value is expected, +// the ninth byte of the varint does not have a continuation bit, but instead +// encodes 8 bits of information. This allows 64 bits to be encoded compactly +// in nine bytes. However, that refinement does not appear in the format +// description in RFC 3284, and so it is not used here. In any case, +// this header file deals only with *signed* integer types, and so a +// "64-bit" integer is allowed to have only 63 significant bits; an additional +// 64th bit would indicate a negative value and therefore an error. +// + +#include <config.h> +#include <stdint.h> // int32_t, int64_t +#include <string> +#include "vcdiff_defs.h" // RESULT_ERROR + +namespace open_vcdiff { + +using std::string; + +class OutputStringInterface; + +// This helper class is needed in order to ensure that +// VarintBE<SignedIntegerType>::kMaxBytes is treated +// as a compile-time constant when it is used as the size +// of a static array. +template <typename SignedIntegerType> class VarintMaxBytes; + +// 31 bits of data / 7 bits per byte <= 5 bytes +template<> class VarintMaxBytes<int32_t> { + public: + static const int kMaxBytes = 5; +}; + +// 63 bits of data / 7 bits per byte == 9 bytes +template<> class VarintMaxBytes<int64_t> { + public: + static const int kMaxBytes = 9; +}; + +// Objects of type VarintBE should not be instantiated. The class is a +// container for big-endian constant values and functions used to parse +// and write a particular signed integer type. +// Example: to parse a 32-bit integer value stored as a big-endian varint, use +// int32_t value = VarintBE<int32_t>::Parse(&ptr, limit); +// Only 32-bit and 64-bit signed integers (int32_t and int64_t) are supported. +// Using a different type as the template argument will likely result +// in a link-time error for an undefined Parse() or Append() function. +// +template <typename SignedIntegerType> +class VarintBE { // BE stands for Big-Endian + public: + // The maximum positive value represented by a SignedIntegerType. + static const SignedIntegerType kMaxVal; + + // Returns the maximum number of bytes needed to store a varint + // representation of a <SignedIntegerType> value. + static const int kMaxBytes = VarintMaxBytes<SignedIntegerType>::kMaxBytes; + + // Attempts to parse a big-endian varint from a prefix of the bytes + // in [ptr,limit-1] and convert it into a signed, non-negative 32-bit + // integer. Never reads a character at or beyond limit, and never reads + // a character at or beyond (*ptr + kMaxBytes). + // If a parsed varint would exceed the maximum value of + // a <SignedIntegerType>, returns RESULT_ERROR and does not modify *ptr. + // If parsing a varint at *ptr (without exceeding the capacity of + // a <SignedIntegerType>) would require reading past limit, + // returns RESULT_END_OF_DATA and does not modify *ptr. + // If limit == NULL, or limit < *ptr, no error will be signalled, but it is + // recommended that a limit always be supplied for security reasons. + static SignedIntegerType Parse(const char* limit, const char** ptr); + + // Returns the encoding length of the specified value. + static int Length(SignedIntegerType v); + + // Encodes "v" into "ptr" (which points to a buffer of length sufficient + // to hold "v")and returns the length of the encoding. + // The value of v must not be negative. + static int Encode(SignedIntegerType v, char* ptr); + + // Appends the varint representation of "value" to "*s". + // The value of v must not be negative. + static void AppendToString(SignedIntegerType value, string* s); + + // Appends the varint representation of "value" to output_string. + // The value of v must not be negative. + static void AppendToOutputString(SignedIntegerType value, + OutputStringInterface* output_string); + + private: + // Encodes "v" into the LAST few bytes of varint_buf (which is a char array + // of size kMaxBytes) and returns the length of the encoding. + // The result will be stored in buf[(kMaxBytes - length) : (kMaxBytes - 1)], + // rather than in buf[0 : length]. + // The value of v must not be negative. + static int EncodeInternal(SignedIntegerType v, char* varint_buf); + + // Returns true if bytes_left <= kMaxBytes AND the next bytes_left bytes + // starting at parse_ptr all have their continuation bit (most significant + // bit) set. This means that there may or may not be a valid encoded + // varint at parse_ptr, but it cannot be read or validated until more + // input is available. + static inline bool ReachedEndOfData(const char* parse_ptr, + const char* limit); + + // These are private to avoid constructing any objects of this type + VarintBE(); + VarintBE(const VarintBE&); // NOLINT + void operator=(const VarintBE&); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_VARINT_BIGENDIAN_H_ diff --git a/src/varint_bigendian_test.cc b/src/varint_bigendian_test.cc new file mode 100644 index 0000000..83e180e --- /dev/null +++ b/src/varint_bigendian_test.cc @@ -0,0 +1,322 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "varint_bigendian.h" +#include <cstdlib> // rand, srand +#include <string> +#include <vector> +#include "logging.h" +#include "testing.h" + +namespace open_vcdiff { +namespace { + +using std::string; + +class VarintBETestCommon : public testing::Test { + protected: + VarintBETestCommon() + : varint_buf_(VarintBE<int64_t>::kMaxBytes), + verify_encoded_byte_index_(0), + verify_expected_length_(0), + parse_data_ptr_(parse_data_all_FFs) { + } + + virtual ~VarintBETestCommon() { } + + void ExpectEncodedByte(char expected_byte) { + EXPECT_EQ(expected_byte, varint_buf_[verify_encoded_byte_index_]); + EXPECT_EQ(expected_byte, s_[verify_encoded_byte_index_]); + ++verify_encoded_byte_index_; + } + + static const char parse_data_all_FFs[]; + static const char parse_data_CADA1[]; + + std::vector<char> varint_buf_; + string s_; + int verify_encoded_byte_index_; + int verify_expected_length_; + const char* parse_data_ptr_; +}; + +template <typename SignedIntegerType> +class VarintBETestTemplate : public VarintBETestCommon { + protected: + VarintBETestTemplate() { } + + virtual ~VarintBETestTemplate() { } + + typedef SignedIntegerType SignedIntType; + typedef VarintBE<SignedIntegerType> VarintType; + + void StartEncodingTest(SignedIntegerType v, int expected_length) { + verify_expected_length_ = expected_length; + EXPECT_EQ(expected_length, VarintType::Length(v)); + EXPECT_EQ(expected_length, VarintType::Encode(v, &varint_buf_[0])); + VarintType::AppendToString(v, &s_); + EXPECT_EQ(static_cast<size_t>(expected_length), s_.length()); + } + + void TestEncodeInvalid(SignedIntegerType v) { + EXPECT_DEATH(VarintType::Length(v), "v >= 0"); + EXPECT_DEATH(VarintType::Encode(v, &varint_buf_[0]), "v >= 0"); + EXPECT_DEATH(VarintType::AppendToString(v, &s_), ">= 0"); + } + + // Need one function for each test type that will be applied to + // multiple classes + void TemplateTestDISABLED_EncodeNegative(); + void TemplateTestEncodeZero(); + void TemplateTestEncodeEightBits(); + void TemplateTestEncodeCADAD1A(); + void TemplateTestEncode32BitMaxInt(); + void TemplateTestEncodeDoesNotOverwriteExistingString(); + void TemplateTestParseNullPointer(); + void TemplateTestEndPointerPrecedesBeginning(); + void TemplateTestParseVarintTooLong(); + void TemplateTestParseIncompleteVarint(); + void TemplateTestParseZero(); + void TemplateTestParseCADA1(); + void TemplateTestParseEmpty(); + void TemplateTestParse123456789(); + void TemplateTestDecode31Bits(); + void TemplateTestEncodeDecodeRandom(); +}; + +typedef VarintBETestTemplate<int32_t> VarintBEInt32Test; +typedef VarintBETestTemplate<int64_t> VarintBEInt64Test; + +#ifdef GTEST_HAS_DEATH_TEST +// These synonyms are needed for the tests that use ASSERT_DEATH +typedef VarintBEInt32Test VarintBEInt32DeathTest; +typedef VarintBEInt64Test VarintBEInt64DeathTest; +#endif // GTEST_HAS_DEATH_TEST + +const char VarintBETestCommon::parse_data_all_FFs[] = + { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + +const char VarintBETestCommon::parse_data_CADA1[] = + { 0xCA, 0xDA, 0x01 }; + +// A macro to allow defining tests once and having them run against +// both VarintBE<int32_t> and VarintBE<int64_t>. +// +#define TEMPLATE_TEST_F(TEST_TYPE, TEST_NAME) \ + TEST_F(VarintBEInt32##TEST_TYPE, TEST_NAME) { \ + TemplateTest##TEST_NAME(); \ + } \ + TEST_F(VarintBEInt64##TEST_TYPE, TEST_NAME) { \ + TemplateTest##TEST_NAME(); \ + } \ + template <class CacheType> \ + void VarintBETestTemplate<CacheType>::TemplateTest##TEST_NAME() + +// Encoding tests: Length(), Encode(), AppendToString(), AppendToBuffer() + +#ifdef GTEST_HAS_DEATH_TEST +// This test hangs for non-debug build (DeathTest threading problem) +TEMPLATE_TEST_F(DeathTest, DISABLED_EncodeNegative) { + TestEncodeInvalid(-1); +} +#endif // GTEST_HAS_DEATH_TEST + +TEMPLATE_TEST_F(Test, EncodeZero) { + StartEncodingTest(/* value */ 0x00, /* expected length */ 1); + ExpectEncodedByte(0x00); + EXPECT_EQ(verify_expected_length_, verify_encoded_byte_index_); +} + +TEMPLATE_TEST_F(Test, EncodeEightBits) { + StartEncodingTest(/* value */ 0xFF, /* expected length */ 2); + ExpectEncodedByte(0x81); + ExpectEncodedByte(0x7F); + EXPECT_EQ(verify_expected_length_, verify_encoded_byte_index_); +} + +TEMPLATE_TEST_F(Test, EncodeCADAD1A) { + StartEncodingTest(/* value */ 0x0CADAD1A, /* expected length */ 4); + ExpectEncodedByte(0xE5); + ExpectEncodedByte(0xB6); + ExpectEncodedByte(0xDA); + ExpectEncodedByte(0x1A); + EXPECT_EQ(verify_expected_length_, verify_encoded_byte_index_); +} + +TEMPLATE_TEST_F(Test, Encode32BitMaxInt) { + StartEncodingTest(/* value */ 0x7FFFFFFF, /* expected length */ 5); + ExpectEncodedByte(0x87); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0x7F); + EXPECT_EQ(verify_expected_length_, verify_encoded_byte_index_); +} + +#ifdef GTEST_HAS_DEATH_TEST +// This test hangs for non-debug build (DeathTest threading problem) +TEST_F(VarintBEInt32DeathTest, DISABLED_Encode32BitsTooBig) { + TestEncodeInvalid(0x80000000); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST_F(VarintBEInt64Test, Encode32Bits) { + StartEncodingTest(/* value */ 0x80000000, /* expected length */ 5); + ExpectEncodedByte(0x88); + ExpectEncodedByte(0x80); + ExpectEncodedByte(0x80); + ExpectEncodedByte(0x80); + ExpectEncodedByte(0x00); + EXPECT_EQ(verify_expected_length_, verify_encoded_byte_index_); +} + +TEST_F(VarintBEInt64Test, Encode63Bits) { + StartEncodingTest(/* value */ 0x7FFFFFFFFFFFFFFFULL, /* expected length */ 9); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0xFF); + ExpectEncodedByte(0x7F); + EXPECT_EQ(verify_expected_length_, verify_encoded_byte_index_); +} + +#ifdef GTEST_HAS_DEATH_TEST +// This test hangs for non-debug build (DeathTest threading problem) +TEST_F(VarintBEInt64DeathTest, DISABLED_Encode64BitsTooBig) { + TestEncodeInvalid(0x8000000000000000ULL); +} +#endif // GTEST_HAS_DEATH_TEST + +TEMPLATE_TEST_F(Test, EncodeDoesNotOverwriteExistingString) { + s_.append("Test"); + VarintType::AppendToString('1', &s_); + EXPECT_EQ(strlen("Test1"), s_.length()); + EXPECT_EQ("Test1", s_); +} + +// Decoding tests: Parse(), ParseFromBuffer() + +TEMPLATE_TEST_F(Test, ParseVarintTooLong) { + EXPECT_EQ(RESULT_ERROR, + VarintType::Parse(parse_data_ptr_ + VarintType::kMaxBytes, + &parse_data_ptr_)); +} + +TEMPLATE_TEST_F(Test, ParseIncompleteVarint) { + EXPECT_EQ(RESULT_END_OF_DATA, + VarintType::Parse(parse_data_ptr_ + VarintType::kMaxBytes - 1, + &parse_data_ptr_)); +} + +TEMPLATE_TEST_F(Test, ParseZero) { + const char zero_data[] = { 0x00 }; + parse_data_ptr_ = zero_data; + EXPECT_EQ(0x00, VarintType::Parse(parse_data_ptr_ + 1, &parse_data_ptr_)); + EXPECT_EQ(zero_data + 1, parse_data_ptr_); +} + +TEMPLATE_TEST_F(Test, ParseCADA1) { + parse_data_ptr_ = parse_data_CADA1; + EXPECT_EQ(0x12AD01, + VarintType::Parse(parse_data_CADA1 + sizeof(parse_data_CADA1), + &parse_data_ptr_)); + EXPECT_EQ(parse_data_CADA1 + 3, parse_data_ptr_); +} + +#ifdef GTEST_HAS_DEATH_TEST +TEMPLATE_TEST_F(DeathTest, ParseNullPointer) { + // limit == NULL is not an error + parse_data_ptr_ = parse_data_CADA1; + EXPECT_EQ(0x12AD01, VarintType::Parse((const char*) NULL, &parse_data_ptr_)); +} +#endif // GTEST_HAS_DEATH_TEST + +TEMPLATE_TEST_F(Test, EndPointerPrecedesBeginning) { + // This is not an error. + parse_data_ptr_ = parse_data_CADA1; + EXPECT_EQ(0x12AD01, VarintType::Parse(parse_data_ptr_ - 1, &parse_data_ptr_)); +} + +TEMPLATE_TEST_F(Test, ParseEmpty) { + EXPECT_EQ(RESULT_END_OF_DATA, + VarintType::Parse(parse_data_ptr_, &parse_data_ptr_)); +} + +// This example is taken from the Varint description in RFC 3284, section 2. +TEMPLATE_TEST_F(Test, Parse123456789) { + const char parse_data_123456789[] = { 0x80 + 58, 0x80 + 111, 0x80 + 26, 21 }; + parse_data_ptr_ = parse_data_123456789; + EXPECT_EQ(123456789, VarintType::Parse(parse_data_123456789 + + sizeof(parse_data_123456789), + &parse_data_ptr_)); +} + +TEMPLATE_TEST_F(Test, Decode31Bits) { + const char parse_data_31_bits[] = { 0x87, 0xFF, 0xFF, 0xFF, 0x7F }; + parse_data_ptr_ = parse_data_31_bits; + EXPECT_EQ(0x7FFFFFFF, + VarintType::Parse(parse_data_31_bits + sizeof(parse_data_31_bits), + &parse_data_ptr_)); +} + +TEST_F(VarintBEInt32Test, Decode32Bits) { + const char parse_data_32_bits[] = { 0x88, 0x80, 0x80, 0x80, 0x00 }; + parse_data_ptr_ = parse_data_32_bits; + EXPECT_EQ(RESULT_ERROR, + VarintType::Parse(parse_data_32_bits + sizeof(parse_data_32_bits), + &parse_data_ptr_)); +} + +TEST_F(VarintBEInt64Test, Decode32Bits) { + const char parse_data_32_bits[] = { 0x88, 0x80, 0x80, 0x80, 0x00 }; + parse_data_ptr_ = parse_data_32_bits; + EXPECT_EQ(0x80000000, + VarintType::Parse(parse_data_32_bits + sizeof(parse_data_32_bits), + &parse_data_ptr_)); +} + +TEMPLATE_TEST_F(Test, EncodeDecodeRandom) { + const int test_size = 1024; // 1K random encode/decode operations + char encode_buffer[VarintType::kMaxBytes]; + srand(1); + for (int i = 0; i < test_size; ++i) { + SignedIntType value = PortableRandomInRange(VarintType::kMaxVal); + int length = VarintType::Encode(value, encode_buffer); + EXPECT_EQ(length, VarintType::Length(value)); + const char* parse_pointer = encode_buffer; + EXPECT_EQ(value, VarintType::Parse(encode_buffer + sizeof(encode_buffer), + &parse_pointer)); + EXPECT_EQ(encode_buffer + length, parse_pointer); + } + for (int i = 0; i < test_size; ++i) { + s_.clear(); + SignedIntType value = PortableRandomInRange(VarintType::kMaxVal); + VarintType::AppendToString(value, &s_); + const int varint_length = static_cast<int>(s_.length()); + EXPECT_EQ(VarintType::Length(value), varint_length); + const char* parse_pointer = s_.c_str(); + const char* const buffer_end_pointer = s_.c_str() + s_.length(); + EXPECT_EQ(value, VarintType::Parse(buffer_end_pointer, &parse_pointer)); + EXPECT_EQ(buffer_end_pointer, parse_pointer); + } +} + +} // anonymous namespace +} // namespace open_vcdiff diff --git a/src/vcdecoder.cc b/src/vcdecoder.cc new file mode 100644 index 0000000..b5ce997 --- /dev/null +++ b/src/vcdecoder.cc @@ -0,0 +1,1401 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Implements a Decoder for the format described in +// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html +// +// The RFC describes the possibility of using a secondary compressor +// to further reduce the size of each section of the VCDIFF output. +// That feature is not supported in this implementation of the encoder +// and decoder. +// No secondary compressor types have been publicly registered with +// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids +// in the more than five years since the registry was created, so there +// is no standard set of compressor IDs which would be generated by other +// encoders or accepted by other decoders. + +#include <config.h> +#include "google/vcdecoder.h" +#include <stdint.h> // int32_t +#include <cstddef> // size_t, ptrdiff_t +#include <memory> // auto_ptr +#include <string> +#include "addrcache.h" +#include "checksum.h" +#include "codetable.h" +#include "decodetable.h" +#include "headerparser.h" +#include "logging.h" +#include "google/output_string.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +namespace { + +using std::string; + +enum VCDiffAnnotationType { + VCD_ANNOTATION_LITERAL, + VCD_ANNOTATION_DMATCH, + VCD_ANNOTATION_BMATCH +}; + +static const char* kAnnotationStartTags[] = { + "<literal>", + "<dmatch>", + "<bmatch>" +}; + +static const char* kAnnotationEndTags[] = { + "</literal>", + "</dmatch>", + "</bmatch>" +}; + +} // anonymous namespace + +// This class is used to parse delta file windows as described +// in RFC sections 4.2 and 4.3. Its methods are not thread-safe. +// +// Here is the window format copied from the RFC: +// +// Window1 +// Win_Indicator - byte +// [Source segment size] - integer +// [Source segment position] - integer +// The delta encoding of the target window +// Length of the delta encoding - integer +// The delta encoding +// Size of the target window - integer +// Delta_Indicator - byte +// Length of data for ADDs and RUNs - integer +// Length of instructions and sizes - integer +// Length of addresses for COPYs - integer +// Data section for ADDs and RUNs - array of bytes +// Instructions and sizes section - array of bytes +// Addresses section for COPYs - array of bytes +// Window2 +// ... +// +// Sample usage: +// +// VCDiffDeltaFileWindow delta_window_; +// delta_window_.Init(parent); +// ParseableChunk parseable_chunk(input_buffer, +// input_size, +// leftover_unencoded_bytes); +// switch (delta_window_.DecodeWindows(&parseable_chunk)) { +// case RESULT_END_OF_DATA: +// <Read more input and retry DecodeWindows later.> +// case RESULT_ERROR: +// <Handle error case. An error log message has already been generated.> +// } +// +// DecodeWindows consumes as many windows from the input as it can. It only +// needs to be placed within a loop if the loop is used to obtain more input +// (delta file) data. +// +class VCDiffDeltaFileWindow { + public: + VCDiffDeltaFileWindow(); + ~VCDiffDeltaFileWindow(); + + // Init() should be called immediately after constructing the + // VCDiffDeltaFileWindow(). It must be called before DecodeWindows() can be + // invoked, or an error will occur. + void Init(VCDiffStreamingDecoderImpl* parent); + + // Resets the pointers to the data sections in the current window. + void Reset(); + + bool UseCodeTable(const VCDiffCodeTableData& code_table_data, + unsigned char max_mode) { + return reader_.UseCodeTable(code_table_data, max_mode); + } + + // Decodes as many delta windows as possible using the input data from + // *parseable_chunk. Appends the decoded target windows to + // parent_->decoded_target(). If annotated output is enabled, appends + // annotated output to parent_->annotated_output(). Returns RESULT_SUCCESS on + // success, or RESULT_END_OF_DATA if the end of input was reached before the + // entire window could be decoded and more input is expected (only possible if + // IsInterleaved() is true), or RESULT_ERROR if an error occurred during + // decoding. In the RESULT_ERROR case, the value of parseable_chunk->pointer_ + // is undefined; otherwise, parseable_chunk->Advance() is called to point to + // the input data position just after the data that has been decoded. + // + // If expected_target_bytes is not set to kUnlimitedBytes, then the decoder + // expects *exactly* this number of target bytes to be decoded from one or + // more delta file windows. If this number is met exactly after finishing a + // delta window, this function will return RESULT_SUCCESS without processing + // any more bytes from data_pointer. If this number is exceeded while + // decoding a window, but was not met before starting that window, + // then RESULT_ERROR will be returned. + // + VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk); + + bool FoundWindowHeader() const { + return found_header_; + } + + bool MoreDataExpected() const { + // When parsing an interleaved-format delta file, + // every time DecodeBody() exits, interleaved_bytes_expected_ + // will be decremented by the number of bytes parsed. If it + // reaches zero, then there is no more data expected because + // the size of the interleaved section (given in the window + // header) has been reached. + return IsInterleaved() && (interleaved_bytes_expected_ > 0); + } + + // Returns the number of bytes remaining to be decoded in the target window. + // If not in the process of decoding a window, returns 0. + size_t TargetBytesRemaining(); + + void EnableAnnotatedOutput() { + if (!annotated_output_.get()) { + annotated_output_.reset(new string); + } + } + + void DisableAnnotatedOutput() { + annotated_output_.reset(NULL); + } + + private: + // Reads the header of the window section as described in RFC sections 4.2 and + // 4.3, up to and including the value "Length of addresses for COPYs". If the + // entire header is found, this function sets up the DeltaWindowSections + // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so + // that the decoder can begin decoding the opcodes in these sections. Returns + // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of + // available data was reached before the entire header could be read. (The + // latter may be an error condition if there is no more data available.) + // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the + // parsed header. + // + VCDiffResult ReadHeader(ParseableChunk* parseable_chunk); + + // After the window header has been parsed as far as the Delta_Indicator, + // this function is called to parse the following delta window header fields: + // + // Length of data for ADDs and RUNs - integer (VarintBE format) + // Length of instructions and sizes - integer (VarintBE format) + // Length of addresses for COPYs - integer (VarintBE format) + // + // If has_checksum_ is true, it also looks for the following element: + // + // Adler32 checksum - unsigned 32-bit integer (VarintBE format) + // + // It sets up the DeltaWindowSections instructions_and_sizes_, + // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format + // is being used, all three sections will include the entire window body; if + // the standard format is used, three non-overlapping window sections will be + // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA + // if standard format is being used and there is not enough input data to read + // the entire window body. Otherwise, returns RESULT_SUCCESS. + VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser); + + // Decodes the body of the window section as described in RFC sections 4.3, + // including the sections "Data section for ADDs and RUNs", "Instructions + // and sizes section", and "Addresses section for COPYs". These sections + // must already have been set up by ReadWindowHeader(). Returns a + // non-negative value on success, or RESULT_END_OF_DATA if the end of input + // was reached before the entire window could be decoded (only possible if + // IsInterleaved() is true), or RESULT_ERROR if an error occurred during + // decoding. Appends as much of the decoded target window as possible to + // parent->decoded_target(). + // + int DecodeBody(ParseableChunk* parseable_chunk); + + // Returns the number of bytes already decoded into the target window. + size_t TargetBytesDecoded(); + + // Decodes a single ADD instruction, updating parent_->decoded_target_. + VCDiffResult DecodeAdd(size_t size); + + // Decodes a single RUN instruction, updating parent_->decoded_target_. + VCDiffResult DecodeRun(size_t size); + + // Decodes a single COPY instruction, updating parent_->decoded_target_. + VCDiffResult DecodeCopy(size_t size, unsigned char mode); + + // When using the interleaved format, this function is called both on parsing + // the header and on resuming after a RESULT_END_OF_DATA was returned from a + // previous call to DecodeBody(). It sets up all three section pointers to + // reference the same interleaved stream of instructions, sizes, addresses, + // and data. These pointers must be reset every time that work resumes on a + // delta window, because the input data string may have been changed or + // resized since DecodeBody() last returned. + void UpdateInterleavedSectionPointers(const char* data_pos, + const char* data_end) { + const ptrdiff_t available_data = data_end - data_pos; + // Don't read past the end of currently-available data + if (available_data > interleaved_bytes_expected_) { + instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_); + } else { + instructions_and_sizes_.Init(data_pos, available_data); + } + data_for_add_and_run_.Init(&instructions_and_sizes_); + addresses_for_copy_.Init(&instructions_and_sizes_); + } + + // If true, the interleaved format described in AllowInterleaved() is used + // for the current delta file. Only valid after ReadWindowHeader() has been + // called and returned a positive number (i.e., the whole header was parsed), + // but before the window has finished decoding. + // + bool IsInterleaved() const { + // If the sections are interleaved, both addresses_for_copy_ and + // data_for_add_and_run_ should point at instructions_and_sizes_. + return !addresses_for_copy_.IsOwned(); + } + + // Executes a single COPY or ADD instruction, appending data to + // parent_->decoded_target(). + void CopyBytes(const char* data, + size_t size, + VCDiffAnnotationType annotation_type); + + // Executes a single RUN instruction, appending data to + // parent_->decoded_target(). + void RunByte(unsigned char byte, size_t size); + + void AppendAnnotatedOutput(string* annotated_output) { + if (annotated_output_.get()) { + annotated_output->append(*annotated_output_.get()); + } + } + + // Advance *parseable_chunk to point to the current position in the + // instructions/sizes section. If interleaved format is used, then + // decrement the number of expected bytes in the instructions/sizes section + // by the number of instruction/size bytes parsed. + void UpdateInstructionPointer(ParseableChunk* parseable_chunk); + + // The parent object which was passed to Init(). + VCDiffStreamingDecoderImpl* parent_; + + // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader() + // has been called and succeeded in parsing the delta window header, but the + // entire window has not yet been decoded. + bool found_header_; + + // Contents and length of the current source window. source_segment_ptr_ + // will be non-NULL if (a) the window section header for the current window + // has been read, but the window has not yet finished decoding; or + // (b) the window did not specify a source segment. + const char* source_segment_ptr_; + size_t source_segment_length_; + + // The delta encoding window sections as defined in RFC section 4.3. + // The pointer for each section will be incremented as data is consumed and + // decoded from that section. If the interleaved format is used, + // data_for_add_and_run_ and addresses_for_copy_ will both point to + // instructions_and_sizes_; otherwise, they will be separate data sections. + // + DeltaWindowSection instructions_and_sizes_; + DeltaWindowSection data_for_add_and_run_; + DeltaWindowSection addresses_for_copy_; + + // The expected bytes left to decode in instructions_and_sizes_. Only used + // for the interleaved format. + int interleaved_bytes_expected_; + + // The expected length of the target window once it has been decoded. + size_t target_window_length_; + + // The index in decoded_target at which the first byte of the current + // target window was/will be written. + size_t target_window_start_pos_; + + // If has_checksum_ is true, then expected_checksum_ contains an Adler32 + // checksum of the target window data. This is an extension included in the + // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard. + bool has_checksum_; + VCDChecksum expected_checksum_; + + VCDiffCodeTableReader reader_; + + // This value is initialized to NULL, which means that annotated output is + // disabled. If EnableAnnotatedOutput() is called, it will be set to point + // to a new string object, and annotated output will be gathered into that + // string. + std::auto_ptr<string> annotated_output_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT + void operator=(const VCDiffDeltaFileWindow&); +}; + +class VCDiffStreamingDecoderImpl { + public: + // A constant that is the default value for expected_target_bytes_, + // indicating that the decoder does not have an expected length + // for the target data. + static const size_t kUnlimitedBytes = static_cast<size_t>(-3); + + VCDiffStreamingDecoderImpl(); + ~VCDiffStreamingDecoderImpl(); + + // Resets all member variables to their initial states. + void Reset(); + + // These functions are identical to their counterparts + // in VCDiffStreamingDecoder. + // + void StartDecoding(const char* dictionary_ptr, size_t dictionary_size); + + bool DecodeChunk(const char* data, + size_t len, + OutputStringInterface* output_string); + + bool FinishDecoding(); + + // If true, the version of VCDIFF used in the current delta file allows + // for the interleaved format, in which instructions, addresses and data + // are all sent interleaved in the instructions section of each window + // rather than being sent in separate sections. This is not part of + // the VCDIFF draft standard, so we've defined a special version code + // 'S' which implies that this feature is available. Even if interleaving + // is supported, it is not mandatory; interleaved format will be implied + // if the address and data sections are both zero-length. + // + bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; } + + // If true, the version of VCDIFF used in the current delta file allows + // each delta window to contain an Adler32 checksum of the target window data. + // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then + // this checksum will appear as a variable-length integer, just after the + // "length of addresses for COPYs" value and before the window data sections. + // It is possible for some windows in a delta file to use the checksum feature + // and for others not to use it (and leave the flag bit set to 0.) + // Just as with AllowInterleaved(), this extension is not part of the draft + // standard and is only available when the version code 'S' is specified. + // + bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; } + + // See description of expected_target_bytes_, below. + bool HasTargetByteLimit() const { + return expected_target_bytes_ != kUnlimitedBytes; + } + + void SetTargetByteLimit(size_t expected_target_bytes) { + expected_target_bytes_ = expected_target_bytes; + } + + // Checks to see whether the decoded target data has reached the expected + // size. + bool MetTargetByteLimit() const { + if (!HasTargetByteLimit()) { + return false; + } + // The target byte limit should not have been exceeded, because each target + // window size is checked against that limit in ReadHeader(), and + // DecodeBody() will return RESULT_ERROR if the actual decoded output ever + // exceeds the advertised target window size. + if (decoded_target_.size() > expected_target_bytes_) { + LOG(DFATAL) << "Internal error: Decoded data size " + << decoded_target_.size() + << " exceeds target byte limit " + << expected_target_bytes_ << LOG_ENDL; + return true; + } + return decoded_target_.size() == expected_target_bytes_; + } + + // Checks to see whether adding a new target window of the specified size + // would exceed the expected target size. If so, logs an error and returns + // true; otherwise, returns false. + bool TargetWindowWouldExceedTargetByteLimit(size_t window_size) const { + if (!HasTargetByteLimit()) { + return false; + } + // The logical expression to check would be: + // + // decoded_target_.size() + target_bytes_to_add > expected_target_bytes_ + // + // but the addition might cause an integer overflow if target_bytes_to_add + // is very large. So it is better to check target_bytes_to_add against + // the remaining expected target bytes. + size_t remaining_expected_target_bytes = + expected_target_bytes_ - decoded_target_.size(); + if (window_size > remaining_expected_target_bytes) { + LOG(ERROR) << "Length of target window (" << window_size + << " bytes) plus previous windows (" << decoded_target_.size() + << " bytes) would exceed expected size of " + << expected_target_bytes_ << " bytes" << LOG_ENDL; + return true; + } else { + return false; + } + } + + // Returns the amount of input data passed to the last DecodeChunk() + // that was not consumed by the decoder. This is essential if + // SetExpectedTargetBytes() is being used, in order to preserve + // the input data stream beyond the expected encoding. + size_t GetUnconsumedDataSize() const { + return unparsed_bytes_.size(); + } + + // This function will return true if the decoder has parsed a complete delta + // file header plus zero or more delta file windows, with no data left over. + // It will also return true if no delta data at all was decoded. If these + // conditions are not met, then FinishDecoding() should not be called. + bool IsDecodingComplete() const { + if (!FoundFileHeader()) { + // No complete delta file header has been parsed yet. DecodeChunk() + // may have received some data that it hasn't yet parsed, in which case + // decoding is incomplete. + return unparsed_bytes_.empty(); + } else if (custom_code_table_decoder_.get()) { + // The decoder is in the middle of parsing a custom code table. + return false; + } else if (delta_window_.FoundWindowHeader()) { + // The decoder is in the middle of parsing an interleaved format delta + // window. + return false; + } else if (MetTargetByteLimit()) { + // The decoder found exactly the expected number of bytes. In this case + // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover + // data after the end of the delta file. + return true; + } else { + // No complete delta file window has been parsed yet. DecodeChunk() + // may have received some data that it hasn't yet parsed, in which case + // decoding is incomplete. + return unparsed_bytes_.empty(); + } + } + + const char* dictionary_ptr() const { return dictionary_ptr_; } + + size_t dictionary_size() const { return dictionary_size_; } + + VCDiffAddressCache* addr_cache() { return addr_cache_.get(); } + + string* decoded_target() { return &decoded_target_; } + + string* annotated_output() { return &annotated_output_; } + + // The variable that determines whether annotated output is enabled is + // delta_window_.annotated_output_. If that member is NULL, then the feature + // is disabled. + void EnableAnnotatedOutput() { + delta_window_.EnableAnnotatedOutput(); + } + + void DisableAnnotatedOutput() { + delta_window_.DisableAnnotatedOutput(); + } + + void GetAnnotatedOutput(OutputStringInterface* annotated_output) { + // We could use annotated_output->assign(), but that method is not defined + // for some output string types, so use clear() + append() to accomplish the + // same thing. + annotated_output->clear(); + annotated_output->append(annotated_output_.data(), + annotated_output_.size()); + } + + private: + // Reads the VCDiff delta file header section as described in RFC section 4.1, + // except the custom code table data. Returns RESULT_ERROR if an error + // occurred, or RESULT_END_OF_DATA if the end of available data was reached + // before the entire header could be read. (The latter may be an error + // condition if there is no more data available.) Otherwise, advances + // data->position_ past the header and returns RESULT_SUCCESS. + // + VCDiffResult ReadDeltaFileHeader(ParseableChunk* data); + + // Indicates whether or not the header has already been read. + bool FoundFileHeader() const { return addr_cache_.get() != NULL; } + + // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta + // file header, this function parses the custom cache sizes and initializes + // a nested VCDiffStreamingDecoderImpl object that will be used to parse the + // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an + // error occurred, or RESULT_END_OF_DATA if the end of available data was + // reached before the custom cache sizes could be read. Otherwise, returns + // the number of bytes read. + // + int InitCustomCodeTable(const char* data_start, const char* data_end); + + // If a custom code table was specified in the header section that was parsed + // by ReadDeltaFileHeader(), this function makes a recursive call to another + // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the + // custom code table is expected to be supplied as an embedded VCDIFF + // encoding that uses the standard code table. Returns RESULT_ERROR if an + // error occurs, or RESULT_END_OF_DATA if the end of available data was + // reached before the entire custom code table could be read. Otherwise, + // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded + // custom code table. If the function returns RESULT_SUCCESS or + // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes. + // + VCDiffResult ReadCustomCodeTable(ParseableChunk* data); + + // Contents and length of the source (dictionary) data. + const char* dictionary_ptr_; + size_t dictionary_size_; + + // This string will be used to store any unparsed bytes left over when + // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA. + // It will also be used to concatenate those unparsed bytes with the data + // supplied to the next call to DecodeChunk(), so that they appear in + // contiguous memory. + string unparsed_bytes_; + + // The portion of the target file that has been decoded so far. This will be + // used to fill the output string for DecodeChunk(), and will also be used to + // execute COPY instructions that reference target data. Since the source + // window can come from a range of addresses in the previously decoded target + // data, the entire target file needs to be available to the decoder, not just + // the current target window. + string decoded_target_; + + // The VCDIFF version byte (also known as "header4") from the + // delta file header. + unsigned char vcdiff_version_code_; + + VCDiffDeltaFileWindow delta_window_; + + std::auto_ptr<VCDiffAddressCache> addr_cache_; + + // Will be NULL unless a custom code table has been defined. + std::auto_ptr<VCDiffCodeTableData> custom_code_table_; + + // Used to receive the decoded custom code table. + string custom_code_table_string_; + + // If a custom code table is specified, it will be expressed + // as an embedded VCDIFF delta file which uses the default code table + // as the source file (dictionary). Use a child decoder object + // to decode that delta file. + std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_; + + // If set, then the decoder is expecting *exactly* this number of + // target bytes to be decoded from one or more delta file windows. + // If this number is exceeded while decoding a window, but was not met + // before starting on that window, an error will be reported. + // If FinishDecoding() is called before this number is met, an error + // will also be reported. This feature is used for decoding the + // embedded code table data within a VCDIFF delta file; we want to + // stop processing the embedded data once the entire code table has + // been decoded, and treat the rest of the available data as part + // of the enclosing delta file. + size_t expected_target_bytes_; + + // This string will always be empty until EnableAnnotatedOutput() is called, + // at which point it will start to accumulate annotated delta windows each + // time DecodeChunk() finishes a window. It will be cleared each time that + // StartDecoding() is called. + string annotated_output_; + + // This value is used to ensure the correct order of calls to the interface + // functions, i.e., a single call to StartDecoding(), followed by zero or + // more calls to DecodeChunk(), followed by a single call to + // FinishDecoding(). + bool start_decoding_was_called_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT + void operator=(const VCDiffStreamingDecoderImpl&); +}; + +// *** Methods for VCDiffStreamingDecoderImpl + +VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl() { + delta_window_.Init(this); + Reset(); +} + +// Reset() will delete the component objects without reallocating them. +VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); } + +void VCDiffStreamingDecoderImpl::Reset() { + start_decoding_was_called_ = false; + dictionary_ptr_ = NULL; + dictionary_size_ = 0; + vcdiff_version_code_ = '\0'; + expected_target_bytes_ = kUnlimitedBytes; + addr_cache_.reset(); + custom_code_table_.reset(); + custom_code_table_decoder_.reset(); + delta_window_.Reset(); +} + +void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr, + size_t dictionary_size) { + if (start_decoding_was_called_) { + LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()" + << LOG_ENDL; + return; + } + unparsed_bytes_.clear(); + decoded_target_.clear(); // delta_window_.Reset() depends on this + annotated_output_.clear(); + Reset(); + dictionary_ptr_ = dictionary_ptr; + dictionary_size_ = dictionary_size; + start_decoding_was_called_ = true; +} + +// Reads the VCDiff delta file header section as described in RFC section 4.1: +// +// Header1 - byte = 0xD6 (ASCII 'V' | 0x80) +// Header2 - byte = 0xC3 (ASCII 'C' | 0x80) +// Header3 - byte = 0xC4 (ASCII 'D' | 0x80) +// Header4 - byte +// Hdr_Indicator - byte +// [Secondary compressor ID] - byte +// [Length of code table data] - integer +// [Code table data] +// +// Initializes the code table and address cache objects. Returns RESULT_ERROR +// if an error occurred, and RESULT_END_OF_DATA if the end of available data was +// reached before the entire header could be read. (The latter may be an error +// condition if there is no more data available.) Otherwise, returns +// RESULT_SUCCESS, and removes the header bytes from the data string. +// +// It's relatively inefficient to expect this function to parse any number of +// input bytes available, down to 1 byte, but it is necessary in case the input +// is not a properly formatted VCDIFF delta file. If the entire input consists +// of two bytes "12", then we should recognize that it does not match the +// initial VCDIFF magic number "VCD" and report an error, rather than waiting +// indefinitely for more input that will never arrive. +// +VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader( + ParseableChunk* data) { + if (FoundFileHeader()) { + return RESULT_SUCCESS; + } + size_t data_size = data->UnparsedSize(); + const DeltaFileHeader* header = + reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData()); + bool wrong_magic_number = false; + switch (data_size) { + // Verify only the bytes that are available. + default: + // Found header contents up to and including VCDIFF version + vcdiff_version_code_ = header->header4; + if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284) + (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol + LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL; + return RESULT_ERROR; + } + // fall through + case 3: + if (header->header3 != 0xC4) { // magic value 'D' | 0x80 + wrong_magic_number = true; + } + // fall through + case 2: + if (header->header2 != 0xC3) { // magic value 'C' | 0x80 + wrong_magic_number = true; + } + // fall through + case 1: + if (header->header1 != 0xD6) { // magic value 'V' | 0x80 + wrong_magic_number = true; + } + // fall through + case 0: + if (wrong_magic_number) { + LOG(ERROR) << "Did not find VCDIFF header bytes; " + "input is not a VCDIFF delta file" << LOG_ENDL; + return RESULT_ERROR; + } + if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA; + } + // Secondary compressor not supported. + if (header->hdr_indicator & VCD_DECOMPRESS) { + LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL; + return RESULT_ERROR; + } + if (header->hdr_indicator & VCD_CODETABLE) { + int bytes_parsed = InitCustomCodeTable( + data->UnparsedData() + sizeof(DeltaFileHeader), + data->End()); + switch (bytes_parsed) { + case RESULT_ERROR: + return RESULT_ERROR; + case RESULT_END_OF_DATA: + return RESULT_END_OF_DATA; + default: + data->Advance(sizeof(DeltaFileHeader) + bytes_parsed); + } + } else { + addr_cache_.reset(new VCDiffAddressCache); + // addr_cache_->Init() will be called + // from VCDiffStreamingDecoderImpl::DecodeChunk() + data->Advance(sizeof(DeltaFileHeader)); + } + return RESULT_SUCCESS; +} + +int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start, + const char* data_end) { + // A custom code table is being specified. Parse the variable-length + // cache sizes and begin parsing the encoded custom code table. + int32_t near_cache_size = 0, same_cache_size = 0; + VCDiffHeaderParser header_parser(data_start, data_end); + if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) { + return header_parser.GetResult(); + } + if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) { + return header_parser.GetResult(); + } + custom_code_table_.reset(new struct VCDiffCodeTableData); + memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData)); + custom_code_table_string_.clear(); + addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size)); + // addr_cache_->Init() will be called + // from VCDiffStreamingDecoderImpl::DecodeChunk() + + // If we reach this point (the start of the custom code table) + // without encountering a RESULT_END_OF_DATA condition, then we won't call + // ReadDeltaFileHeader() again for this delta file. + // + // Instantiate a recursive decoder to interpret the custom code table + // as a VCDIFF encoding of the default code table. + custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl); + custom_code_table_decoder_->StartDecoding( + reinterpret_cast<const char*>( + &VCDiffCodeTableData::kDefaultCodeTableData), + sizeof(VCDiffCodeTableData::kDefaultCodeTableData)); + custom_code_table_decoder_->SetTargetByteLimit(sizeof(*custom_code_table_)); + return static_cast<int>(header_parser.ParsedSize()); +} + +VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable( + ParseableChunk* data) { + if (!custom_code_table_decoder_.get()) { + return RESULT_SUCCESS; + } + if (!custom_code_table_.get()) { + LOG(DFATAL) << "Internal error: custom_code_table_decoder_ is set," + " but custom_code_table_ is NULL" << LOG_ENDL; + return RESULT_ERROR; + } + OutputString<string> output_string(&custom_code_table_string_); + if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(), + data->UnparsedSize(), + &output_string)) { + return RESULT_ERROR; + } + if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) { + // Skip over the consumed data. + data->Finish(); + return RESULT_END_OF_DATA; + } + if (!custom_code_table_decoder_->FinishDecoding()) { + return RESULT_ERROR; + } + if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) { + LOG(DFATAL) << "Decoded custom code table size " + << custom_code_table_string_.length() + << " does not match expected size " + << sizeof(*custom_code_table_) << LOG_ENDL; + return RESULT_ERROR; + } + memcpy(custom_code_table_.get(), + custom_code_table_string_.data(), + sizeof(*custom_code_table_)); + custom_code_table_string_.clear(); + // Skip over the consumed data. + data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize()); + custom_code_table_decoder_.reset(); + delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode()); + return RESULT_SUCCESS; +} + +namespace { + +class TrackNewOutputText { + public: + explicit TrackNewOutputText(const string& decoded_target) + : decoded_target_(decoded_target), + initial_decoded_target_size_(decoded_target.size()) { } + + void AppendNewOutputText(size_t target_bytes_remaining, + OutputStringInterface* output_string) { + const size_t bytes_decoded_this_chunk = + decoded_target_.size() - initial_decoded_target_size_; + if (bytes_decoded_this_chunk > 0) { + if (target_bytes_remaining > 0) { + // The decoder is midway through decoding a target window. Resize + // output_string to match the expected length. The interface guarantees + // not to resize the output_string more than once per target window + // decoded. + output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk + + target_bytes_remaining); + } + output_string->append( + decoded_target_.data() + initial_decoded_target_size_, + bytes_decoded_this_chunk); + } + } + + private: + const string& decoded_target_; + size_t initial_decoded_target_size_; +}; + +} // anonymous namespace + +bool VCDiffStreamingDecoderImpl::DecodeChunk( + const char* data, + size_t len, + OutputStringInterface* output_string) { + if (!start_decoding_was_called_) { + LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL; + Reset(); + return false; + } + ParseableChunk parseable_chunk(data, len); + if (!unparsed_bytes_.empty()) { + unparsed_bytes_.append(data, len); + parseable_chunk.SetDataBuffer(unparsed_bytes_.data(), + unparsed_bytes_.size()); + } + TrackNewOutputText output_tracker(decoded_target_); + VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk); + if (RESULT_SUCCESS == result) { + result = ReadCustomCodeTable(&parseable_chunk); + } + if (RESULT_SUCCESS == result) { + result = delta_window_.DecodeWindows(&parseable_chunk); + } + if (RESULT_ERROR == result) { + Reset(); // Don't allow further DecodeChunk calls + return false; + } + unparsed_bytes_.assign(parseable_chunk.UnparsedData(), + parseable_chunk.UnparsedSize()); + output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(), + output_string); + return true; +} + +// Finishes decoding after all data has been received. Returns true +// if decoding of the entire stream was successful. +bool VCDiffStreamingDecoderImpl::FinishDecoding() { + bool success = true; + if (!start_decoding_was_called_) { + LOG(WARNING) << "FinishDecoding() called before StartDecoding()," + " or called after DecodeChunk() returned false" + << LOG_ENDL; + success = false; + } else if (!IsDecodingComplete()) { + LOG(ERROR) << "FinishDecoding() called before parsing entire" + " delta file window" << LOG_ENDL; + success = false; + } + // Reset the object state for the next decode operation + Reset(); + return success; +} + +// *** Methods for VCDiffDeltaFileWindow + +inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) { + Reset(); +} + +inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { } + +inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) { + parent_ = parent; +} + +void VCDiffDeltaFileWindow::Reset() { + found_header_ = false; + + // Mark the start of the current target window. + target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U; + target_window_length_ = 0; + + source_segment_ptr_ = NULL; + source_segment_length_ = 0; + + instructions_and_sizes_.Invalidate(); + data_for_add_and_run_.Invalidate(); + addresses_for_copy_.Invalidate(); + + interleaved_bytes_expected_ = 0; + + has_checksum_ = false; + expected_checksum_ = 0; + if (annotated_output_.get()) { + annotated_output_->clear(); + } +} + +VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections( + VCDiffHeaderParser* header_parser) { + size_t add_and_run_data_length = 0; + size_t instructions_and_sizes_length = 0; + size_t addresses_length = 0; + if (!header_parser->ParseSectionLengths(has_checksum_, + &add_and_run_data_length, + &instructions_and_sizes_length, + &addresses_length, + &expected_checksum_)) { + return header_parser->GetResult(); + } + if (parent_->AllowInterleaved() && + (add_and_run_data_length == 0) && + (addresses_length == 0)) { + // The interleaved format is being used. + interleaved_bytes_expected_ = + static_cast<int>(instructions_and_sizes_length); + UpdateInterleavedSectionPointers(header_parser->UnparsedData(), + header_parser->End()); + } else { + // If interleaved format is not used, then the whole window contents + // must be available before decoding can begin. If only part of + // the current window is available, then report end of data + // and re-parse the whole header when DecodeChunk() is called again. + if (header_parser->UnparsedSize() < (add_and_run_data_length + + instructions_and_sizes_length + + addresses_length)) { + return RESULT_END_OF_DATA; + } + data_for_add_and_run_.Init(header_parser->UnparsedData(), + add_and_run_data_length); + instructions_and_sizes_.Init(data_for_add_and_run_.End(), + instructions_and_sizes_length); + addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length); + if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) { + LOG(ERROR) << "The end of the instructions section " + "does not match the end of the delta window" << LOG_ENDL; + return RESULT_ERROR; + } + } + reader_.Init(instructions_and_sizes_.UnparsedDataAddr(), + instructions_and_sizes_.End()); + return RESULT_SUCCESS; +} + +// Here are the elements of the delta window header to be parsed, +// from section 4 of the RFC: +// +// Window1 +// Win_Indicator - byte +// [Source segment size] - integer +// [Source segment position] - integer +// The delta encoding of the target window +// Length of the delta encoding - integer +// The delta encoding +// Size of the target window - integer +// Delta_Indicator - byte +// Length of data for ADDs and RUNs - integer +// Length of instructions and sizes - integer +// Length of addresses for COPYs - integer +// Data section for ADDs and RUNs - array of bytes +// Instructions and sizes section - array of bytes +// Addresses section for COPYs - array of bytes +// +VCDiffResult VCDiffDeltaFileWindow::ReadHeader( + ParseableChunk* parseable_chunk) { + string* decoded_target = parent_->decoded_target(); + VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(), + parseable_chunk->End()); + size_t source_segment_position = 0; + unsigned char win_indicator = 0; + if (!header_parser.ParseWinIndicatorAndSourceSegment( + parent_->dictionary_size(), + decoded_target->size(), + &win_indicator, + &source_segment_length_, + &source_segment_position)) { + return header_parser.GetResult(); + } + has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM); + if (!header_parser.ParseWindowLengths(&target_window_length_)) { + return header_parser.GetResult(); + } + if (parent_->TargetWindowWouldExceedTargetByteLimit(target_window_length_)) { + // An error has been logged by TargetWindowWouldExceedTargetByteLimit(). + return RESULT_ERROR; + } + header_parser.ParseDeltaIndicator(); + VCDiffResult setup_return_code = SetUpWindowSections(&header_parser); + if (RESULT_SUCCESS != setup_return_code) { + return setup_return_code; + } + // Reserve enough space in the output string for the current target window. + decoded_target->reserve(target_window_start_pos_ + target_window_length_); + // Get a pointer to the start of the source segment. + if (win_indicator & VCD_SOURCE) { + source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position; + } else if (win_indicator & VCD_TARGET) { + // This assignment must happen after the reserve(). + // decoded_target should not be resized again while processing this window, + // so source_segment_ptr_ should remain valid. + source_segment_ptr_ = decoded_target->data() + source_segment_position; + } + // The whole window header was found and parsed successfully. + found_header_ = true; + parseable_chunk->Advance(header_parser.ParsedSize()); + return RESULT_SUCCESS; +} + +void VCDiffDeltaFileWindow::UpdateInstructionPointer( + ParseableChunk* parseable_chunk) { + if (IsInterleaved()) { + size_t bytes_parsed = instructions_and_sizes_.ParsedSize(); + // Reduce expected instruction segment length by bytes parsed + interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed); + parseable_chunk->Advance(bytes_parsed); + } +} + +size_t VCDiffDeltaFileWindow::TargetBytesDecoded() { + return parent_->decoded_target()->size() - target_window_start_pos_; +} + +size_t VCDiffDeltaFileWindow::TargetBytesRemaining() { + if (target_window_length_ == 0) { + // There is no window being decoded at present + return 0; + } else { + return target_window_length_ - TargetBytesDecoded(); + } +} + +void VCDiffDeltaFileWindow::CopyBytes(const char* data, + size_t size, + VCDiffAnnotationType annotation_type) { + parent_->decoded_target()->append(data, size); + if (annotated_output_.get()) { + annotated_output_->append(kAnnotationStartTags[annotation_type]); + annotated_output_->append(data, size); + annotated_output_->append(kAnnotationEndTags[annotation_type]); + } +} + +void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) { + parent_->decoded_target()->append(size, byte); + if (annotated_output_.get()) { + annotated_output_->append(kAnnotationStartTags[VCD_ANNOTATION_LITERAL]); + annotated_output_->append(size, byte); + annotated_output_->append(kAnnotationEndTags[VCD_ANNOTATION_LITERAL]); + } +} + +VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) { + if (size > data_for_add_and_run_.UnparsedSize()) { + return RESULT_END_OF_DATA; + } + // Write the next "size" data bytes + CopyBytes(data_for_add_and_run_.UnparsedData(), size, VCD_ANNOTATION_LITERAL); + data_for_add_and_run_.Advance(size); + return RESULT_SUCCESS; +} + +VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) { + if (data_for_add_and_run_.Empty()) { + return RESULT_END_OF_DATA; + } + // Write "size" copies of the next data byte + RunByte(*data_for_add_and_run_.UnparsedData(), size); + data_for_add_and_run_.Advance(1); + return RESULT_SUCCESS; +} + +VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size, + unsigned char mode) { + // Keep track of the number of target bytes decoded as a local variable + // to avoid recalculating it each time it is needed. + size_t target_bytes_decoded = TargetBytesDecoded(); + const VCDAddress here_address = + static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded); + const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress( + here_address, + mode, + addresses_for_copy_.UnparsedDataAddr(), + addresses_for_copy_.End()); + switch (decoded_address) { + case RESULT_ERROR: + LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL; + return RESULT_ERROR; + case RESULT_END_OF_DATA: + return RESULT_END_OF_DATA; + default: + if ((decoded_address < 0) || (decoded_address > here_address)) { + LOG(DFATAL) << "Internal error: unexpected address " << decoded_address + << " returned from DecodeAddress, with here_address = " + << here_address << LOG_ENDL; + return RESULT_ERROR; + } + break; + } + size_t address = static_cast<size_t>(decoded_address); + if ((address + size) <= source_segment_length_) { + // Copy all data from source segment + CopyBytes(&source_segment_ptr_[address], size, VCD_ANNOTATION_DMATCH); + return RESULT_SUCCESS; + } + // Copy some data from target window... + if (address < source_segment_length_) { + // ... plus some data from source segment + const size_t partial_copy_size = source_segment_length_ - address; + CopyBytes(&source_segment_ptr_[address], + partial_copy_size, + VCD_ANNOTATION_DMATCH); + target_bytes_decoded += partial_copy_size; + address += partial_copy_size; + size -= partial_copy_size; + } + address -= source_segment_length_; + // address is now based at start of target window + const char* const target_segment_ptr = parent_->decoded_target()->data() + + target_window_start_pos_; + while (size > (target_bytes_decoded - address)) { + // Recursive copy that extends into the yet-to-be-copied target data + const size_t partial_copy_size = target_bytes_decoded - address; + CopyBytes(&target_segment_ptr[address], + partial_copy_size, + VCD_ANNOTATION_BMATCH); + target_bytes_decoded += partial_copy_size; + address += partial_copy_size; + size -= partial_copy_size; + } + CopyBytes(&target_segment_ptr[address], size, VCD_ANNOTATION_BMATCH); + return RESULT_SUCCESS; +} + +int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) { + if (IsInterleaved() && (instructions_and_sizes_.UnparsedData() + != parseable_chunk->UnparsedData())) { + LOG(DFATAL) << "Internal error: interleaved format is used, but the" + " input pointer does not point to the instructions section" + << LOG_ENDL; + return RESULT_ERROR; + } + while (TargetBytesDecoded() < target_window_length_) { + int32_t decoded_size = VCD_INSTRUCTION_ERROR; + unsigned char mode = 0; + VCDiffInstructionType instruction = + reader_.GetNextInstruction(&decoded_size, &mode); + switch (instruction) { + case VCD_INSTRUCTION_END_OF_DATA: + UpdateInstructionPointer(parseable_chunk); + return RESULT_END_OF_DATA; + case VCD_INSTRUCTION_ERROR: + return RESULT_ERROR; + default: + break; + } + const size_t size = static_cast<size_t>(decoded_size); + // The value of "size" itself could be enormous (say, INT32_MAX) + // so check it individually against the limit to protect against + // overflow when adding it to something else. + if ((size > target_window_length_) || + ((size + TargetBytesDecoded()) > target_window_length_)) { + LOG(ERROR) << VCDiffInstructionName(instruction) + << " with size " << size + << " plus existing " << TargetBytesDecoded() + << " bytes of target data exceeds length of target" + " window (" << target_window_length_ << " bytes)" + << LOG_ENDL; + return RESULT_ERROR; + } + VCDiffResult result = RESULT_SUCCESS; + switch (instruction) { + case VCD_ADD: + result = DecodeAdd(size); + break; + case VCD_RUN: + result = DecodeRun(size); + break; + case VCD_COPY: + result = DecodeCopy(size, mode); + break; + default: + LOG(DFATAL) << "Unexpected instruction type " << instruction + << "in opcode stream" << LOG_ENDL; + return RESULT_ERROR; + } + switch (result) { + case RESULT_END_OF_DATA: + reader_.UnGetInstruction(); + UpdateInstructionPointer(parseable_chunk); + return RESULT_END_OF_DATA; + case RESULT_ERROR: + return RESULT_ERROR; + case RESULT_SUCCESS: + break; + } + } + if (TargetBytesDecoded() != target_window_length_) { + LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded() + << " bytes) does not match expected size (" + << target_window_length_ << " bytes)" << LOG_ENDL; + return RESULT_ERROR; + } + const char* const target_window_start = + parent_->decoded_target()->data() + target_window_start_pos_; + if (has_checksum_ && + (ComputeAdler32(target_window_start, target_window_length_) + != expected_checksum_)) { + LOG(ERROR) << "Target data does not match checksum; this could mean " + "that the wrong dictionary was used" << LOG_ENDL; + return RESULT_ERROR; + } + if (!instructions_and_sizes_.Empty()) { + LOG(ERROR) << "Excess instructions and sizes left over " + "after decoding target window" << LOG_ENDL; + return RESULT_ERROR; + } + if (!IsInterleaved()) { + // Standard format is being used, with three separate sections for the + // instructions, data, and addresses. + if (!data_for_add_and_run_.Empty()) { + LOG(ERROR) << "Excess ADD/RUN data left over " + "after decoding target window" << LOG_ENDL; + return RESULT_ERROR; + } + if (!addresses_for_copy_.Empty()) { + LOG(ERROR) << "Excess COPY addresses left over " + "after decoding target window" << LOG_ENDL; + return RESULT_ERROR; + } + // Reached the end of the window. Update the ParseableChunk to point to the + // end of the addresses section, which is the last section in the window. + parseable_chunk->SetPosition(addresses_for_copy_.End()); + } else { + // Interleaved format is being used. The window may have been only + // partially decoded. + UpdateInstructionPointer(parseable_chunk); + } + return RESULT_SUCCESS; +} + +VCDiffResult VCDiffDeltaFileWindow::DecodeWindows( + ParseableChunk* parseable_chunk) { + if (!parent_) { + LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() " + "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL; + return RESULT_ERROR; + } + while (!parseable_chunk->Empty()) { + if (!found_header_) { + switch (ReadHeader(parseable_chunk)) { + case RESULT_END_OF_DATA: + return RESULT_END_OF_DATA; + case RESULT_ERROR: + return RESULT_ERROR; + default: + // Reset address cache between windows (RFC section 5.1) + if (!parent_->addr_cache()->Init()) { + LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL; + return RESULT_ERROR; + } + } + } else { + // We are resuming a window that was partially decoded before a + // RESULT_END_OF_DATA was returned. This can only happen on the first + // loop iteration, and only if the interleaved format is enabled and used. + if (!IsInterleaved()) { + LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window" + " when interleaved format is not being used" << LOG_ENDL; + return RESULT_ERROR; + } + UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(), + parseable_chunk->End()); + reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(), + instructions_and_sizes_.End()); + } + switch (DecodeBody(parseable_chunk)) { + case RESULT_END_OF_DATA: + if (MoreDataExpected()) { + return RESULT_END_OF_DATA; + } else { + LOG(ERROR) << "End of data reached while decoding VCDIFF delta file" + << LOG_ENDL; + // fall through to RESULT_ERROR case + } + case RESULT_ERROR: + return RESULT_ERROR; + default: + break; // DecodeBody succeeded + } + AppendAnnotatedOutput(parent_->annotated_output()); + // Get ready to read a new delta window + Reset(); + if (parent_->MetTargetByteLimit()) { + // Found exactly the length expected. Stop decoding. + return RESULT_SUCCESS; + } + } + return RESULT_SUCCESS; +} + +// *** Methods for VCDiffStreamingDecoder + +VCDiffStreamingDecoder::VCDiffStreamingDecoder() +: impl_(new VCDiffStreamingDecoderImpl) { } + +VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; } + +void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) { + impl_->StartDecoding(source, len); +} + +bool VCDiffStreamingDecoder::DecodeChunkToInterface( + const char* data, + size_t len, + OutputStringInterface* output_string) { + return impl_->DecodeChunk(data, len, output_string); +} + +bool VCDiffStreamingDecoder::FinishDecoding() { + return impl_->FinishDecoding(); +} + +void VCDiffStreamingDecoder::EnableAnnotatedOutput() { + impl_->EnableAnnotatedOutput(); +} + +void VCDiffStreamingDecoder::DisableAnnotatedOutput() { + impl_->DisableAnnotatedOutput(); +} + +void VCDiffStreamingDecoder::GetAnnotatedOutputToInterface( + OutputStringInterface* annotated_output) { + impl_->GetAnnotatedOutput(annotated_output); +} + +bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr, + size_t dictionary_size, + const string& encoding, + OutputStringInterface* target) { + target->clear(); + decoder_.StartDecoding(dictionary_ptr, dictionary_size); + if (!decoder_.DecodeChunkToInterface(encoding.data(), + encoding.size(), + target)) { + return false; + } + return decoder_.FinishDecoding(); +} + +} // namespace open_vcdiff diff --git a/src/vcdecoder_test.cc b/src/vcdecoder_test.cc new file mode 100644 index 0000000..10ae599 --- /dev/null +++ b/src/vcdecoder_test.cc @@ -0,0 +1,294 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "vcdecoder_test.h" +#include <string> +#include "checksum.h" +#include "codetable.h" +#include "testing.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +using std::string; + +const char VCDiffDecoderTest::kStandardFileHeader[] = { + 0xD6, // 'V' | 0x80 + 0xC3, // 'C' | 0x80 + 0xC4, // 'D' | 0x80 + 0x00, // Draft standard version number + 0x00 // Hdr_Indicator: no custom code table, no compression + }; + +const char VCDiffDecoderTest::kInterleavedFileHeader[] = { + 0xD6, // 'V' | 0x80 + 0xC3, // 'C' | 0x80 + 0xC4, // 'D' | 0x80 + 'S', // SDCH version code + 0x00 // Hdr_Indicator: no custom code table, no compression + }; + +const char VCDiffDecoderTest::kDictionary[] = + "\"Just the place for a Snark!\" the Bellman cried,\n" + "As he landed his crew with care;\n" + "Supporting each man on the top of the tide\n" + "By a finger entwined in his hair.\n"; + +const char VCDiffDecoderTest::kExpectedTarget[] = + "\"Just the place for a Snark! I have said it twice:\n" + "That alone should encourage the crew.\n" + "Just the place for a Snark! I have said it thrice:\n" + "What I tell you three times is true.\"\n"; + +const char VCDiffDecoderTest::kExpectedAnnotatedTarget[] = + "<dmatch>\"Just the place for a Snark!</dmatch>" + "<literal> I have said it twice:\n" + "That alone should encourage the crew.\n</literal>" + "<bmatch>Just the place for a Snark! I have said it t</bmatch>" + "<literal>hr</literal>" + "<bmatch>ice:\n</bmatch>" + "<literal>What I te</literal>" + "<literal>ll</literal>" + "<literal> you three times is true.\"\n</literal>"; + +VCDiffDecoderTest::VCDiffDecoderTest() : fuzzer_(0), fuzzed_byte_position_(0) { + dictionary_ = kDictionary; + expected_target_ = kExpectedTarget; + expected_annotated_target_ = kExpectedAnnotatedTarget; +} + +void VCDiffDecoderTest::SetUp() { + InitializeDeltaFile(); +} + +void VCDiffDecoderTest::UseStandardFileHeader() { + delta_file_header_.assign(kStandardFileHeader, + sizeof(kStandardFileHeader)); +} + +void VCDiffDecoderTest::UseInterleavedFileHeader() { + delta_file_header_.assign(kInterleavedFileHeader, + sizeof(kInterleavedFileHeader)); +} + +void VCDiffDecoderTest::InitializeDeltaFile() { + delta_file_ = delta_file_header_ + delta_window_header_ + delta_window_body_; +} + +char VCDiffDecoderTest::GetByteFromStringLength(const char* s, + int which_byte) { + char varint_buf[VarintBE<int32_t>::kMaxBytes]; + VarintBE<int32_t>::Encode(static_cast<int32_t>(strlen(s)), varint_buf); + return varint_buf[which_byte]; +} + +void VCDiffDecoderTest::AddChecksum(VCDChecksum checksum) { + int32_t checksum_as_int32 = static_cast<int32_t>(checksum); + delta_window_header_[0] |= VCD_CHECKSUM; + VarintBE<int32_t>::AppendToString(checksum_as_int32, &delta_window_header_); + // Adjust delta window size to include checksum. + // This method wouldn't work if adding to the length caused the VarintBE + // value to spill over into another byte. Luckily, this test data happens + // not to cause such an overflow. + delta_window_header_[4] += VarintBE<int32_t>::Length(checksum_as_int32); +} + +void VCDiffDecoderTest::ComputeAndAddChecksum() { + AddChecksum(ComputeAdler32(expected_target_.data(), + expected_target_.size())); +} + +// Write the maximum expressible positive 32-bit VarintBE +// (0x7FFFFFFF) at the given offset in the delta window. +void VCDiffDecoderTest::WriteMaxVarintAtOffset(int offset, + int bytes_to_replace) { + static const char kMaxVarint[] = { 0x87, 0xFF, 0xFF, 0xFF, 0x7F }; + delta_file_.replace(delta_file_header_.size() + offset, + bytes_to_replace, + kMaxVarint, + sizeof(kMaxVarint)); +} + +// Write a negative 32-bit VarintBE (0x80000000) at the given offset +// in the delta window. +void VCDiffDecoderTest::WriteNegativeVarintAtOffset(int offset, + int bytes_to_replace) { + static const char kNegativeVarint[] = { 0x88, 0x80, 0x80, 0x80, 0x00 }; + delta_file_.replace(delta_file_header_.size() + offset, + bytes_to_replace, + kNegativeVarint, + sizeof(kNegativeVarint)); +} + +// Write a VarintBE that has too many continuation bytes +// at the given offset in the delta window. +void VCDiffDecoderTest::WriteInvalidVarintAtOffset(int offset, + int bytes_to_replace) { + static const char kInvalidVarint[] = { 0x87, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F }; + delta_file_.replace(delta_file_header_.size() + offset, + bytes_to_replace, + kInvalidVarint, + sizeof(kInvalidVarint)); +} + +bool VCDiffDecoderTest::FuzzOneByteInDeltaFile() { + static const struct Fuzzer { + char _and; + char _or; + char _xor; + } fuzzers[] = { + { 0xff, 0x80, 0x00 }, + { 0xff, 0xff, 0x00 }, + { 0xff, 0x00, 0x80 }, + { 0xff, 0x00, 0xff }, + { 0xff, 0x01, 0x00 }, + { 0x7f, 0x00, 0x00 }, + }; + + for (; fuzzer_ < (sizeof(fuzzers) / sizeof(fuzzers[0])); ++fuzzer_) { + for (; fuzzed_byte_position_ < delta_file_.size(); + ++fuzzed_byte_position_) { + char fuzzed_byte = (((delta_file_[fuzzed_byte_position_] + & fuzzers[fuzzer_]._and) + | fuzzers[fuzzer_]._or) + ^ fuzzers[fuzzer_]._xor); + if (fuzzed_byte != delta_file_[fuzzed_byte_position_]) { + delta_file_[fuzzed_byte_position_] = fuzzed_byte; + ++fuzzed_byte_position_; + return true; + } + } + fuzzed_byte_position_ = 0; + } + return false; +} + +const char VCDiffStandardDecoderTest::kWindowHeader[] = { + VCD_SOURCE, // Win_Indicator: take source from dictionary + FirstByteOfStringLength(kDictionary), // Source segment size + SecondByteOfStringLength(kDictionary), + 0x00, // Source segment position: start of dictionary + 0x79, // Length of the delta encoding + FirstByteOfStringLength(kExpectedTarget), // Size of the target window + SecondByteOfStringLength(kExpectedTarget), + 0x00, // Delta_indicator (no compression) + 0x64, // length of data for ADDs and RUNs + 0x0C, // length of instructions section + 0x03 // length of addresses for COPYs + }; + +const char VCDiffStandardDecoderTest::kWindowBody[] = { + // Data for ADDs: 1st section (length 61) + ' ', 'I', ' ', 'h', 'a', 'v', 'e', ' ', 's', 'a', 'i', 'd', ' ', + 'i', 't', ' ', 't', 'w', 'i', 'c', 'e', ':', '\n', + 'T', 'h', 'a', 't', ' ', + 'a', 'l', 'o', 'n', 'e', ' ', 's', 'h', 'o', 'u', 'l', 'd', ' ', + 'e', 'n', 'c', 'o', 'u', 'r', 'a', 'g', 'e', ' ', + 't', 'h', 'e', ' ', 'c', 'r', 'e', 'w', '.', '\n', + // Data for ADDs: 2nd section (length 2) + 'h', 'r', + // Data for ADDs: 3rd section (length 9) + 'W', 'h', 'a', 't', ' ', + 'I', ' ', 't', 'e', + // Data for RUN: 4th section (length 1) + 'l', + // Data for ADD: 4th section (length 27) + ' ', 'y', 'o', 'u', ' ', + 't', 'h', 'r', 'e', 'e', ' ', 't', 'i', 'm', 'e', 's', ' ', 'i', 's', ' ', + 't', 'r', 'u', 'e', '.', '\"', '\n', + // Instructions and sizes (length 13) + 0x13, // VCD_COPY mode VCD_SELF, size 0 + 0x1C, // Size of COPY (28) + 0x01, // VCD_ADD size 0 + 0x3D, // Size of ADD (61) + 0x23, // VCD_COPY mode VCD_HERE, size 0 + 0x2C, // Size of COPY (44) + 0xCB, // VCD_ADD size 2 + VCD_COPY mode NEAR(1), size 5 + 0x0A, // VCD_ADD size 9 + 0x00, // VCD_RUN size 0 + 0x02, // Size of RUN (2) + 0x01, // VCD_ADD size 0 + 0x1B, // Size of ADD (27) + // Addresses for COPYs (length 3) + 0x00, // Start of dictionary + 0x58, // HERE mode address for 2nd copy (27+61 back from here_address) + 0x2D // NEAR(1) mode address for 2nd copy (45 after prior address) + }; + +VCDiffStandardDecoderTest::VCDiffStandardDecoderTest() { + UseStandardFileHeader(); + delta_window_header_.assign(kWindowHeader, sizeof(kWindowHeader)); + delta_window_body_.assign(kWindowBody, sizeof(kWindowBody)); +} + +const char VCDiffInterleavedDecoderTest::kWindowHeader[] = { + VCD_SOURCE, // Win_Indicator: take source from dictionary + FirstByteOfStringLength(kDictionary), // Source segment size + SecondByteOfStringLength(kDictionary), + 0x00, // Source segment position: start of dictionary + 0x79, // Length of the delta encoding + FirstByteOfStringLength(kExpectedTarget), // Size of the target window + SecondByteOfStringLength(kExpectedTarget), + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs (unused) + 0x73, // length of interleaved section + 0x00 // length of addresses for COPYs (unused) + }; + +const char VCDiffInterleavedDecoderTest::kWindowBody[] = { + 0x13, // VCD_COPY mode VCD_SELF, size 0 + 0x1C, // Size of COPY (28) + 0x00, // Address of COPY: Start of dictionary + 0x01, // VCD_ADD size 0 + 0x3D, // Size of ADD (61) + // Data for ADD (length 61) + ' ', 'I', ' ', 'h', 'a', 'v', 'e', ' ', 's', 'a', 'i', 'd', ' ', + 'i', 't', ' ', 't', 'w', 'i', 'c', 'e', ':', '\n', + 'T', 'h', 'a', 't', ' ', + 'a', 'l', 'o', 'n', 'e', ' ', 's', 'h', 'o', 'u', 'l', 'd', ' ', + 'e', 'n', 'c', 'o', 'u', 'r', 'a', 'g', 'e', ' ', + 't', 'h', 'e', ' ', 'c', 'r', 'e', 'w', '.', '\n', + 0x23, // VCD_COPY mode VCD_HERE, size 0 + 0x2C, // Size of COPY (44) + 0x58, // HERE mode address (27+61 back from here_address) + 0xCB, // VCD_ADD size 2 + VCD_COPY mode NEAR(1), size 5 + // Data for ADDs: 2nd section (length 2) + 'h', 'r', + 0x2D, // NEAR(1) mode address (45 after prior address) + 0x0A, // VCD_ADD size 9 + // Data for ADDs: 3rd section (length 9) + 'W', 'h', 'a', 't', ' ', + 'I', ' ', 't', 'e', + 0x00, // VCD_RUN size 0 + 0x02, // Size of RUN (2) + // Data for RUN: 4th section (length 1) + 'l', + 0x01, // VCD_ADD size 0 + 0x1B, // Size of ADD (27) + // Data for ADD: 4th section (length 27) + ' ', 'y', 'o', 'u', ' ', + 't', 'h', 'r', 'e', 'e', ' ', 't', 'i', 'm', 'e', 's', ' ', 'i', 's', ' ', + 't', 'r', 'u', 'e', '.', '\"', '\n' + }; + +VCDiffInterleavedDecoderTest::VCDiffInterleavedDecoderTest() { + UseInterleavedFileHeader(); + delta_window_header_.assign(kWindowHeader, sizeof(kWindowHeader)); + delta_window_body_.assign(kWindowBody, sizeof(kWindowBody)); +} + +} // namespace open_vcdiff diff --git a/src/vcdecoder_test.h b/src/vcdecoder_test.h new file mode 100644 index 0000000..f3c5195 --- /dev/null +++ b/src/vcdecoder_test.h @@ -0,0 +1,161 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_VCDECODER_TEST_H_ +#define OPEN_VCDIFF_VCDECODER_TEST_H_ + +#include "google/vcdecoder.h" +#include <string> +#include "checksum.h" +#include "testing.h" + +namespace open_vcdiff { + +// A base class used for all the decoder tests. Most tests use the same +// dictionary and target and construct the delta file in the same way. +// Those elements are provided as string members and can be modified or +// overwritten by each specific decoder test as needed. +class VCDiffDecoderTest : public testing::Test { + protected: + static const char kDictionary[]; + static const char kExpectedTarget[]; + static const char kExpectedAnnotatedTarget[]; + + VCDiffDecoderTest(); + + virtual ~VCDiffDecoderTest() {} + + virtual void SetUp(); + + // These functions populate delta_file_header_ with a standard or interleaved + // file header. + void UseStandardFileHeader(); + void UseInterleavedFileHeader(); + + // This function is called by SetUp(). It populates delta_file_ with the + // concatenated delta file header, delta window header, and delta window + // body, plus (if UseChecksum() is true) the corresponding checksum. + // It can be called again by a test that has modified the contents of + // delta_file_ and needs to restore them to their original state. + virtual void InitializeDeltaFile(); + + // This function adds an Adler32 checksum to the delta window header. + void AddChecksum(VCDChecksum checksum); + + // This function computes the Adler32 checksum for the expected target + // and adds it to the delta window header. + void ComputeAndAddChecksum(); + + // Write the maximum expressible positive 32-bit VarintBE + // (0x7FFFFFFF) at the given offset in the delta window. + void WriteMaxVarintAtOffset(int offset, int bytes_to_replace); + + // Write a negative 32-bit VarintBE (0x80000000) at the given offset + // in the delta window. + void WriteNegativeVarintAtOffset(int offset, int bytes_to_replace); + + // Write a VarintBE that has too many continuation bytes + // at the given offset in the delta window. + void WriteInvalidVarintAtOffset(int offset, int bytes_to_replace); + + // This function iterates through a list of fuzzers (bit masks used to corrupt + // bytes) and through positions in the delta file. Each time it is called, it + // attempts to corrupt a different byte in delta_file_ in a different way. If + // successful, it returns true. Once it exhausts the list of fuzzers and of + // byte positions in delta_file_, it returns false. + bool FuzzOneByteInDeltaFile(); + + // Assuming the length of the given string can be expressed as a VarintBE + // of length N, this function returns the byte at position which_byte, where + // 0 <= which_byte < N. + static char GetByteFromStringLength(const char* s, int which_byte); + + // Assuming the length of the given string can be expressed as a one-byte + // VarintBE, this function returns that byte value. + static char StringLengthAsByte(const char* s) { + return GetByteFromStringLength(s, 0); + } + + // Assuming the length of the given string can be expressed as a two-byte + // VarintBE, this function returns the first byte of its representation. + static char FirstByteOfStringLength(const char* s) { + return GetByteFromStringLength(s, 0); + } + + // Assuming the length of the given string can be expressed as a two-byte + // VarintBE, this function returns the second byte of its representation. + static char SecondByteOfStringLength(const char* s) { + return GetByteFromStringLength(s, 1); + } + + VCDiffStreamingDecoder decoder_; + + // delta_file_ will be populated by InitializeDeltaFile() using the components + // delta_file_header_, delta_window_header_, and delta_window_body_. + string delta_file_; + + // This string is not populated during setup, but is used to receive the + // decoded target file in each test. + string output_; + + // Test fixtures that inherit from VCDiffDecoderTest can set these strings in + // their constructors to override their default values (which come from + // kDictionary, kExpectedTarget, etc.) + string dictionary_; + string expected_target_; + string expected_annotated_target_; + + // The components that will be used to construct delta_file_. + string delta_file_header_; + string delta_window_header_; + string delta_window_body_; + + private: + // These values should only be accessed via UseStandardFileHeader() and + // UseInterleavedFileHeader(). + static const char kStandardFileHeader[]; + static const char kInterleavedFileHeader[]; + + // These two counters are used by FuzzOneByteInDeltaFile() to iterate through + // different ways to corrupt the delta file. + size_t fuzzer_; + size_t fuzzed_byte_position_; +}; + +// The "standard" decoder test, which decodes a delta file that uses the +// standard VCDIFF (RFC 3284) format with no extensions. +class VCDiffStandardDecoderTest : public VCDiffDecoderTest { + protected: + VCDiffStandardDecoderTest(); + virtual ~VCDiffStandardDecoderTest() {} + + private: + static const char kWindowHeader[]; + static const char kWindowBody[]; +}; + +class VCDiffInterleavedDecoderTest : public VCDiffDecoderTest { + protected: + VCDiffInterleavedDecoderTest(); + virtual ~VCDiffInterleavedDecoderTest() {} + + private: + static const char kWindowHeader[]; + static const char kWindowBody[]; +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_VCDECODER_TEST_H_ diff --git a/src/vcdecoder_test1.cc b/src/vcdecoder_test1.cc new file mode 100644 index 0000000..f0dae41 --- /dev/null +++ b/src/vcdecoder_test1.cc @@ -0,0 +1,777 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "google/vcdecoder.h" +#include <string> +#include "testing.h" +#include "vcdecoder_test.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +using std::string; + +TEST_F(VCDiffStandardDecoderTest, DecodeHeaderOnly) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_header_.data(), + delta_file_header_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +// If we add a checksum to a standard-format delta file (without using format +// extensions), it will be interpreted as random bytes inserted into the middle +// of the file. The decode operation should fail, but where exactly it fails is +// not easy to predict. +TEST_F(VCDiffStandardDecoderTest, StandardFormatDoesNotSupportChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +// Remove one byte from the length of the chunk to process, and +// verify that an error is returned for FinishDecoding(). +TEST_F(VCDiffStandardDecoderTest, FinishAfterDecodingPartialWindow) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size() - 1, + &output_)); + EXPECT_FALSE(decoder_.FinishDecoding()); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTest, FinishAfterDecodingPartialWindowHeader) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_header_.size() + + delta_window_header_.size() - 1, + &output_)); + EXPECT_FALSE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +// Fuzz bits to make sure decoder does not violently crash. +// This test has no expected behavior except that no crashes should occur. +// In some cases, changing bits will still decode to the correct target; +// for example, changing unused bits within a bitfield. +TEST_F(VCDiffStandardDecoderTest, FuzzBits) { + while (FuzzOneByteInDeltaFile()) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + if (decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)) { + decoder_.FinishDecoding(); + } + InitializeDeltaFile(); + output_.clear(); + } +} + +TEST_F(VCDiffStandardDecoderTest, CheckAnnotatedOutput) { + decoder_.EnableAnnotatedOutput(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + string annotated_output; + decoder_.GetAnnotatedOutput(&annotated_output); + EXPECT_EQ(expected_annotated_target_, annotated_output); +} + +// Change each element of the delta file window to an erroneous value +// and make sure it's caught as an error. + +TEST_F(VCDiffStandardDecoderTest, WinIndicatorHasBothSourceAndTarget) { + delta_file_[delta_file_header_.size()] = VCD_SOURCE + VCD_TARGET; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, OkayToSetUpperBitsOfWinIndicator) { + // It is not an error to set any of the other bits in Win_Indicator + // besides VCD_SOURCE and VCD_TARGET. + delta_file_[delta_file_header_.size()] = 0xFD; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyInstructionsShouldFailIfNoSourceSegment) { + // Replace the Win_Indicator and the source size and source offset with a + // single 0 byte (a Win_Indicator for a window with no source segment.) + delta_window_header_.replace(0, 4, "\0", 1); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + // The first COPY instruction should fail, so there should be no output + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, SourceSegmentSizeExceedsDictionarySize) { + ++delta_file_[delta_file_header_.size() + 2]; // increment size + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, SourceSegmentSizeMaxInt) { + WriteMaxVarintAtOffset(1, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, SourceSegmentSizeNegative) { + WriteNegativeVarintAtOffset(1, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, SourceSegmentSizeInvalid) { + WriteInvalidVarintAtOffset(1, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, SourceSegmentEndExceedsDictionarySize) { + ++delta_file_[delta_file_header_.size() + 3]; // increment start pos + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, SourceSegmentPosMaxInt) { + WriteMaxVarintAtOffset(3, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, SourceSegmentPosNegative) { + WriteNegativeVarintAtOffset(3, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, SourceSegmentPosInvalid) { + WriteInvalidVarintAtOffset(3, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, DeltaEncodingLengthZero) { + delta_file_[delta_file_header_.size() + 4] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, DeltaEncodingLengthTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 4]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, DeltaEncodingLengthTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 4]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, DeltaEncodingLengthMaxInt) { + WriteMaxVarintAtOffset(4, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, DeltaEncodingLengthNegative) { + WriteNegativeVarintAtOffset(4, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, DeltaEncodingLengthInvalid) { + WriteInvalidVarintAtOffset(4, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, TargetWindowSizeZero) { + static const char zero_size[] = { 0x00 }; + delta_file_.replace(delta_file_header_.size() + 5, 2, zero_size, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, TargetWindowSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 6]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, TargetWindowSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 6]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, TargetWindowSizeMaxInt) { + WriteMaxVarintAtOffset(5, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, TargetWindowSizeNegative) { + WriteNegativeVarintAtOffset(5, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, TargetWindowSizeInvalid) { + WriteInvalidVarintAtOffset(5, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, OkayToSetUpperBitsOfDeltaIndicator) { + delta_file_[delta_file_header_.size() + 7] = 0xF8; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffStandardDecoderTest, DataCompressionNotSupported) { + delta_file_[delta_file_header_.size() + 7] = 0x01; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, InstructionCompressionNotSupported) { + delta_file_[delta_file_header_.size() + 7] = 0x02; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddressCompressionNotSupported) { + delta_file_[delta_file_header_.size() + 7] = 0x04; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddRunDataSizeZero) { + delta_file_[delta_file_header_.size() + 8] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddRunDataSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 8]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddRunDataSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 8]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddRunDataSizeMaxInt) { + WriteMaxVarintAtOffset(8, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddRunDataSizeNegative) { + WriteNegativeVarintAtOffset(8, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddRunDataSizeInvalid) { + WriteInvalidVarintAtOffset(8, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, InstructionsSizeZero) { + delta_file_[delta_file_header_.size() + 9] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, InstructionsSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 9]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, InstructionsSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 9]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, InstructionsSizeMaxInt) { + WriteMaxVarintAtOffset(9, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, InstructionsSizeNegative) { + WriteNegativeVarintAtOffset(9, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, InstructionsSizeInvalid) { + WriteInvalidVarintAtOffset(9, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressSizeZero) { + delta_file_[delta_file_header_.size() + 10] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 10]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 10]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressSizeMaxInt) { + WriteMaxVarintAtOffset(10, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressSizeNegative) { + WriteNegativeVarintAtOffset(10, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressSizeInvalid) { + WriteInvalidVarintAtOffset(10, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, InstructionsEndEarly) { + --delta_file_[delta_file_header_.size() + 9]; + ++delta_file_[delta_file_header_.size() + 10]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +// From this point on, the tests should also be run against the interleaved +// format. + +TEST_F(VCDiffStandardDecoderTest, CopyMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x70] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x71] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopySizeZero) { + delta_file_[delta_file_header_.size() + 0x70] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopySizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x70]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopySizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x70]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopySizeMaxInt) { + WriteMaxVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopySizeNegative) { + WriteNegativeVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopySizeInvalid) { + WriteInvalidVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressBeyondHereAddress) { + delta_file_[delta_file_header_.size() + 0x7B] = + FirstByteOfStringLength(kDictionary); + delta_file_[delta_file_header_.size() + 0x7C] = + SecondByteOfStringLength(kDictionary); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressMaxInt) { + WriteMaxVarintAtOffset(0x7B, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressNegative) { + WriteNegativeVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, CopyAddressInvalid) { + WriteInvalidVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x72] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x73] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddSizeZero) { + delta_file_[delta_file_header_.size() + 0x72] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x72]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x72]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddSizeMaxInt) { + WriteMaxVarintAtOffset(0x72, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddSizeNegative) { + WriteNegativeVarintAtOffset(0x72, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, AddSizeInvalid) { + WriteInvalidVarintAtOffset(0x72, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, RunMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x78] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x79] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, RunSizeZero) { + delta_file_[delta_file_header_.size() + 0x78] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, RunSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x78]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, RunSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x78]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, RunSizeMaxInt) { + WriteMaxVarintAtOffset(0x78, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, RunSizeNegative) { + WriteNegativeVarintAtOffset(0x78, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTest, RunSizeInvalid) { + WriteInvalidVarintAtOffset(0x78, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +} // namespace open_vcdiff diff --git a/src/vcdecoder_test2.cc b/src/vcdecoder_test2.cc new file mode 100644 index 0000000..a051b12 --- /dev/null +++ b/src/vcdecoder_test2.cc @@ -0,0 +1,1263 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "google/vcdecoder.h" +#include <string> +#include "testing.h" +#include "vcdecoder_test.h" +#include "vcdiff_defs.h" // VCD_SOURCE + +namespace open_vcdiff { + +using std::string; + +// These are the same tests as for VCDiffStandardDecoderTest, with the added +// complication that instead of calling DecodeChunk() once with the entire data +// set, DecodeChunk() is called once for each byte of input. This is intended +// to shake out any bugs with rewind and resume while parsing chunked data. + +typedef VCDiffStandardDecoderTest VCDiffStandardDecoderTestByteByByte; + +TEST_F(VCDiffStandardDecoderTestByteByByte, DecodeHeaderOnly) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_header_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_header_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +// Remove one byte from the length of the chunk to process, and +// verify that an error is returned for FinishDecoding(). +TEST_F(VCDiffStandardDecoderTestByteByByte, FinishAfterDecodingPartialWindow) { + delta_file_.resize(delta_file_.size() - 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_FALSE(decoder_.FinishDecoding()); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, + FinishAfterDecodingPartialWindowHeader) { + delta_file_.resize(delta_file_header_.size() + + delta_window_header_.size() - 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_FALSE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +// If we add a checksum to a standard-format delta file (without using format +// extensions), it will be interpreted as random bytes inserted into the middle +// of the file. The decode operation should fail, but where exactly it fails is +// undefined. +TEST_F(VCDiffStandardDecoderTestByteByByte, + StandardFormatDoesNotSupportChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +// Fuzz bits to make sure decoder does not violently crash. +// This test has no expected behavior except that no crashes should occur. +// In some cases, changing bits will still decode to the correct target; +// for example, changing unused bits within a bitfield. +TEST_F(VCDiffStandardDecoderTestByteByByte, FuzzBits) { + while (FuzzOneByteInDeltaFile()) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + if (!failed) { + decoder_.FinishDecoding(); + } + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); + InitializeDeltaFile(); + output_.clear(); + } +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CheckAnnotatedOutput) { + decoder_.EnableAnnotatedOutput(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + string annotated_output; + decoder_.GetAnnotatedOutput(&annotated_output); + EXPECT_EQ(expected_annotated_target_, annotated_output); +} + +// Change each element of the delta file window to an erroneous value +// and make sure it's caught as an error. + +TEST_F(VCDiffStandardDecoderTestByteByByte, + WinIndicatorHasBothSourceAndTarget) { + delta_file_[delta_file_header_.size()] = VCD_SOURCE + VCD_TARGET; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size(), i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, OkayToSetUpperBitsOfWinIndicator) { + // It is not an error to set any of the other bits in Win_Indicator + // besides VCD_SOURCE and VCD_TARGET. + delta_file_[delta_file_header_.size()] = 0xFD; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, + CopyInstructionsShouldFailIfNoSourceSegment) { + // Replace the Win_Indicator and the source size and source offset with a + // single 0 byte (a Win_Indicator for a window with no source segment.) + delta_window_header_.replace(0, 4, "\0", 1); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // The first COPY instruction should fail. With the standard format, + // it may need to see the whole delta window before knowing that it is + // invalid. + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, + SourceSegmentSizeExceedsDictionarySize) { + ++delta_file_[delta_file_header_.size() + 2]; // increment size + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the source segment size + EXPECT_EQ(delta_file_header_.size() + 2, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, SourceSegmentSizeMaxInt) { + WriteMaxVarintAtOffset(1, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the source segment size + EXPECT_EQ(delta_file_header_.size() + 5, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, SourceSegmentSizeNegative) { + WriteNegativeVarintAtOffset(1, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the source segment size + EXPECT_EQ(delta_file_header_.size() + 5, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, SourceSegmentSizeInvalid) { + WriteInvalidVarintAtOffset(1, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the source segment size + EXPECT_GE(delta_file_header_.size() + 6, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, + SourceSegmentEndExceedsDictionarySize) { + ++delta_file_[delta_file_header_.size() + 3]; // increment start pos + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the source segment end + EXPECT_EQ(delta_file_header_.size() + 3, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, SourceSegmentPosMaxInt) { + WriteMaxVarintAtOffset(3, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the source segment pos + EXPECT_EQ(delta_file_header_.size() + 7, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, SourceSegmentPosNegative) { + WriteNegativeVarintAtOffset(3, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the source segment pos + EXPECT_EQ(delta_file_header_.size() + 7, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, SourceSegmentPosInvalid) { + WriteInvalidVarintAtOffset(3, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the source segment pos + EXPECT_GE(delta_file_header_.size() + 8, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, DeltaEncodingLengthZero) { + delta_file_[delta_file_header_.size() + 4] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, DeltaEncodingLengthTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 4]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, DeltaEncodingLengthTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 4]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, DeltaEncodingLengthMaxInt) { + WriteMaxVarintAtOffset(4, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail before finishing the window header + EXPECT_GE(delta_file_header_.size() + delta_window_header_.size() + 4, + i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, DeltaEncodingLengthNegative) { + WriteNegativeVarintAtOffset(4, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the delta encoding length + EXPECT_EQ(delta_file_header_.size() + 8, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, DeltaEncodingLengthInvalid) { + WriteInvalidVarintAtOffset(4, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the delta encoding length + EXPECT_GE(delta_file_header_.size() + 9, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, TargetWindowSizeZero) { + static const char zero_size[] = { 0x00 }; + delta_file_.replace(delta_file_header_.size() + 5, 2, zero_size, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, TargetWindowSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 6]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, TargetWindowSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 6]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, TargetWindowSizeMaxInt) { + WriteMaxVarintAtOffset(5, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the target window size + EXPECT_EQ(delta_file_header_.size() + 9, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, TargetWindowSizeNegative) { + WriteNegativeVarintAtOffset(5, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the target window size + EXPECT_EQ(delta_file_header_.size() + 9, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, TargetWindowSizeInvalid) { + WriteInvalidVarintAtOffset(5, 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the target window size + EXPECT_GE(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, + OkayToSetUpperBitsOfDeltaIndicator) { + delta_file_[delta_file_header_.size() + 7] = 0xF8; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, DataCompressionNotSupported) { + delta_file_[delta_file_header_.size() + 7] = 0x01; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the delta indicator + EXPECT_EQ(delta_file_header_.size() + 7, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, + InstructionCompressionNotSupported) { + delta_file_[delta_file_header_.size() + 7] = 0x02; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the delta indicator + EXPECT_EQ(delta_file_header_.size() + 7, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddressCompressionNotSupported) { + delta_file_[delta_file_header_.size() + 7] = 0x04; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the delta indicator + EXPECT_EQ(delta_file_header_.size() + 7, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddRunDataSizeZero) { + delta_file_[delta_file_header_.size() + 8] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddRunDataSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 8]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddRunDataSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 8]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddRunDataSizeMaxInt) { + WriteMaxVarintAtOffset(8, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail before finishing the window header + EXPECT_GE(delta_file_header_.size() + delta_window_header_.size() + 4, + i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddRunDataSizeNegative) { + WriteNegativeVarintAtOffset(8, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the add/run data segment size + EXPECT_EQ(delta_file_header_.size() + 12, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddRunDataSizeInvalid) { + WriteInvalidVarintAtOffset(8, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the add/run data segment size + EXPECT_GE(delta_file_header_.size() + 13, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, InstructionsSizeZero) { + delta_file_[delta_file_header_.size() + 9] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, InstructionsSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 9]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, InstructionsSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 9]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, InstructionsSizeMaxInt) { + WriteMaxVarintAtOffset(9, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail before finishing the window header + EXPECT_GE(delta_file_header_.size() + delta_window_header_.size() + 4, + i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, InstructionsSizeNegative) { + WriteNegativeVarintAtOffset(9, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the instructions segment size + EXPECT_EQ(delta_file_header_.size() + 13, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, InstructionsSizeInvalid) { + WriteInvalidVarintAtOffset(9, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the instructions segment size + EXPECT_GE(delta_file_header_.size() + 14, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressSizeZero) { + delta_file_[delta_file_header_.size() + 10] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 10]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 10]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 10, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressSizeMaxInt) { + WriteMaxVarintAtOffset(10, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 14, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressSizeNegative) { + WriteNegativeVarintAtOffset(10, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_EQ(delta_file_header_.size() + 14, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressSizeInvalid) { + WriteInvalidVarintAtOffset(10, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the copy address segment size + EXPECT_GE(delta_file_header_.size() + 15, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, InstructionsEndEarly) { + --delta_file_[delta_file_header_.size() + 9]; + ++delta_file_[delta_file_header_.size() + 10]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +// From this point on, the tests should also be run against the interleaved +// format. + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x70] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x71] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopySizeZero) { + delta_file_[delta_file_header_.size() + 0x70] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopySizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x70]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopySizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x70]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopySizeMaxInt) { + WriteMaxVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopySizeNegative) { + WriteNegativeVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopySizeInvalid) { + WriteInvalidVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressBeyondHereAddress) { + delta_file_[delta_file_header_.size() + 0x7B] = + FirstByteOfStringLength(kDictionary); + delta_file_[delta_file_header_.size() + 0x7C] = + SecondByteOfStringLength(kDictionary); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressMaxInt) { + WriteMaxVarintAtOffset(0x7B, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressNegative) { + WriteNegativeVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, CopyAddressInvalid) { + WriteInvalidVarintAtOffset(0x70, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x72] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x73] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddSizeZero) { + delta_file_[delta_file_header_.size() + 0x72] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x72]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x72]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddSizeMaxInt) { + WriteMaxVarintAtOffset(0x72, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddSizeNegative) { + WriteNegativeVarintAtOffset(0x72, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, AddSizeInvalid) { + WriteInvalidVarintAtOffset(0x72, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, RunMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x78] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x79] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, RunSizeZero) { + delta_file_[delta_file_header_.size() + 0x78] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, RunSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x78]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, RunSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x78]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, RunSizeMaxInt) { + WriteMaxVarintAtOffset(0x78, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, RunSizeNegative) { + WriteNegativeVarintAtOffset(0x78, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffStandardDecoderTestByteByByte, RunSizeInvalid) { + WriteInvalidVarintAtOffset(0x78, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +} // namespace open_vcdiff diff --git a/src/vcdecoder_test3.cc b/src/vcdecoder_test3.cc new file mode 100644 index 0000000..c470d0d --- /dev/null +++ b/src/vcdecoder_test3.cc @@ -0,0 +1,1178 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "google/vcdecoder.h" +#include <cstdlib> // free, posix_memalign +#include <cstring> // memcpy +#include <string> +#include "testing.h" +#include "varint_bigendian.h" +#include "vcdecoder_test.h" +#include "vcdiff_defs.h" // VCD_SOURCE + +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif // HAVE_MALLOC_H + +#ifdef HAVE_SYS_MMAN_H +#define _XOPEN_SOURCE 600 // posix_memalign +#include <sys/mman.h> // mprotect +#endif // HAVE_SYS_MMAN_H + +#ifdef HAVE_UNISTD_H +#include <unistd.h> // getpagesize +#endif // HAVE_UNISTD_H + +namespace open_vcdiff { + +using std::string; + +// Test headers, valid and invalid. + +TEST_F(VCDiffInterleavedDecoderTest, DecodeHeaderOnly) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_header_.data(), + delta_file_header_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, PartialHeaderNotEnough) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_header_.data(), + delta_file_header_.size() - 2, + &output_)); + EXPECT_FALSE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, BadMagicNumber) { + delta_file_[1] = 'Q' | 0x80; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, BadVersionNumber) { + delta_file_[3] = 0x01; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, SecondaryCompressionNotSupported) { + delta_file_[4] = 0x01; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, DecodeWithChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, ChecksumDoesNotMatch) { + AddChecksum(0xBADBAD); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, ChecksumIsInvalid64BitVarint) { + static const char kInvalidVarint[] = { 0x81, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x00 }; + delta_window_header_[0] |= VCD_CHECKSUM; + delta_window_header_.append(kInvalidVarint, sizeof(kInvalidVarint)); + // Adjust delta window size to include size of invalid Varint. + string size_of_invalid_varint; + VarintBE<int32_t>::AppendToString( + static_cast<int32_t>(delta_window_header_[4] + sizeof(kInvalidVarint)), + &size_of_invalid_varint); + delta_window_header_.replace(4, 1, size_of_invalid_varint); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +// Remove one byte from the length of the chunk to process, and +// verify that an error is returned for FinishDecoding(). +TEST_F(VCDiffInterleavedDecoderTest, FinishAfterDecodingPartialWindow) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size() - 1, + &output_)); + EXPECT_FALSE(decoder_.FinishDecoding()); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTest, FinishAfterDecodingPartialWindowHeader) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_header_.size() + + delta_window_header_.size() - 1, + &output_)); + EXPECT_FALSE(decoder_.FinishDecoding()); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +// Fuzz bits to make sure decoder does not violently crash. +// This test has no expected behavior except that no crashes should occur. +// In some cases, changing bits will still decode to the correct target; +// for example, changing unused bits within a bitfield. +TEST_F(VCDiffInterleavedDecoderTest, FuzzBits) { + while (FuzzOneByteInDeltaFile()) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + if (decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)) { + decoder_.FinishDecoding(); + } + InitializeDeltaFile(); + output_.clear(); + } +} + +// If a checksum is present, then fuzzing any of the bits may produce an error, +// but it should not result in an incorrect target being produced without +// an error. +TEST_F(VCDiffInterleavedDecoderTest, FuzzBitsWithChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + while (FuzzOneByteInDeltaFile()) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + if (decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)) { + if (decoder_.FinishDecoding()) { + // Decoding succeeded. Make sure the correct target was produced. + EXPECT_EQ(expected_target_, output_); + } + } else { + EXPECT_EQ("", output_); + } + InitializeDeltaFile(); + output_.clear(); + } +} + +TEST_F(VCDiffInterleavedDecoderTest, CheckAnnotatedOutput) { + decoder_.EnableAnnotatedOutput(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + string annotated_output; + decoder_.GetAnnotatedOutput(&annotated_output); + EXPECT_EQ(expected_annotated_target_, annotated_output); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopyMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x0C] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x0D] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopySizeZero) { + delta_file_[delta_file_header_.size() + 0x0C] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopySizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x0C]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopySizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x0C]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopySizeMaxInt) { + WriteMaxVarintAtOffset(0x0C, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopySizeNegative) { + WriteNegativeVarintAtOffset(0x0C, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopySizeInvalid) { + WriteInvalidVarintAtOffset(0x0C, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopyAddressBeyondHereAddress) { + delta_file_[delta_file_header_.size() + 0x0D] = + FirstByteOfStringLength(kDictionary); + delta_file_[delta_file_header_.size() + 0x0E] = + SecondByteOfStringLength(kDictionary); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopyAddressMaxInt) { + WriteMaxVarintAtOffset(0x0D, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopyAddressNegative) { + WriteNegativeVarintAtOffset(0x0D, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, CopyAddressInvalid) { + WriteInvalidVarintAtOffset(0x0D, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, AddMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x0F] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x10] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, AddSizeZero) { + delta_file_[delta_file_header_.size() + 0x0F] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, AddSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x0F]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, AddSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x0F]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, AddSizeMaxInt) { + WriteMaxVarintAtOffset(0x0F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, AddSizeNegative) { + WriteNegativeVarintAtOffset(0x0F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, AddSizeInvalid) { + WriteInvalidVarintAtOffset(0x0F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, RunMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x5F] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x60] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, RunSizeZero) { + delta_file_[delta_file_header_.size() + 0x5F] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, RunSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x5F]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, RunSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x5F]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, RunSizeMaxInt) { + WriteMaxVarintAtOffset(0x5F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, RunSizeNegative) { + WriteNegativeVarintAtOffset(0x5F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTest, RunSizeInvalid) { + WriteInvalidVarintAtOffset(0x5F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +#if defined(HAVE_MPROTECT) && \ + (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)) +TEST_F(VCDiffInterleavedDecoderTest, ShouldNotReadPastEndOfBuffer) { + // Allocate two memory pages. + const int page_size = getpagesize(); + void* two_pages = NULL; +#ifdef HAVE_POSIX_MEMALIGN + posix_memalign(&two_pages, page_size, 2 * page_size); +#else // !HAVE_POSIX_MEMALIGN + two_pages = memalign(page_size, 2 * page_size); +#endif // HAVE_POSIX_MEMALIGN + char* const first_page = reinterpret_cast<char*>(two_pages); + char* const second_page = first_page + page_size; + + // Place the delta string at the end of the first page. + char* delta_with_guard = second_page - delta_file_.size(); + memcpy(delta_with_guard, delta_file_.data(), delta_file_.size()); + + // Make the second page unreadable. + mprotect(second_page, page_size, PROT_NONE); + + // Now perform the decode operation, which will cause a segmentation fault + // if it reads past the end of the buffer. + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_with_guard, + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); + + // Undo the mprotect. + mprotect(second_page, page_size, PROT_READ|PROT_WRITE); + free(two_pages); +} + +TEST_F(VCDiffInterleavedDecoderTest, ShouldNotReadPastBeginningOfBuffer) { + // Allocate two memory pages. + const int page_size = getpagesize(); + void* two_pages = NULL; +#ifdef HAVE_POSIX_MEMALIGN + posix_memalign(&two_pages, page_size, 2 * page_size); +#else // !HAVE_POSIX_MEMALIGN + two_pages = memalign(page_size, 2 * page_size); +#endif // HAVE_POSIX_MEMALIGN + char* const first_page = reinterpret_cast<char*>(two_pages); + char* const second_page = first_page + page_size; + + // Make the first page unreadable. + mprotect(first_page, page_size, PROT_NONE); + + // Place the delta string at the beginning of the second page. + char* delta_with_guard = second_page; + memcpy(delta_with_guard, delta_file_.data(), delta_file_.size()); + + // Now perform the decode operation, which will cause a segmentation fault + // if it reads past the beginning of the buffer. + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_with_guard, + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); + + // Undo the mprotect. + mprotect(first_page, page_size, PROT_READ|PROT_WRITE); + free(two_pages); +} +#endif // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN) + +// These are the same tests as for VCDiffInterleavedDecoderTest, with the added +// complication that instead of calling DecodeChunk() once with the entire data +// set, DecodeChunk() is called once for each byte of input. This is intended +// to shake out any bugs with rewind and resume while parsing chunked data. + +typedef VCDiffInterleavedDecoderTest VCDiffInterleavedDecoderTestByteByByte; + +// Test headers, valid and invalid. + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, DecodeHeaderOnly) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_header_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_header_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, PartialHeaderNotEnough) { + delta_file_.resize(delta_file_header_.size() - 2); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_FALSE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, BadMagicNumber) { + delta_file_[1] = 'Q' | 0x80; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + // It should fail at the position that was altered + EXPECT_EQ(1U, i); + failed = true; + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, BadVersionNumber) { + delta_file_[3] = 0x01; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(3U, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, + SecondaryCompressionNotSupported) { + delta_file_[4] = 0x01; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(4U, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, DecodeWithChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, ChecksumDoesNotMatch) { + AddChecksum(0xBADBAD); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail after decoding the entire delta file + EXPECT_EQ(delta_file_.size() - 1, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, ChecksumIsInvalid64BitVarint) { + static const char kInvalidVarint[] = { 0x81, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x00 }; + delta_window_header_[0] |= VCD_CHECKSUM; + delta_window_header_.append(kInvalidVarint, sizeof(kInvalidVarint)); + // Adjust delta window size to include size of invalid Varint. + string size_of_invalid_varint; + VarintBE<int32_t>::AppendToString( + static_cast<int32_t>(delta_window_header_[4] + sizeof(kInvalidVarint)), + &size_of_invalid_varint); + delta_window_header_.replace(4, 1, size_of_invalid_varint); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail while trying to interpret the checksum. + EXPECT_EQ(delta_file_header_.size() + delta_window_header_.size() - 2, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +// Fuzz bits to make sure decoder does not violently crash. +// This test has no expected behavior except that no crashes should occur. +// In some cases, changing bits will still decode to the correct target; +// for example, changing unused bits within a bitfield. +TEST_F(VCDiffInterleavedDecoderTestByteByByte, FuzzBits) { + while (FuzzOneByteInDeltaFile()) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + if (!failed) { + decoder_.FinishDecoding(); + } + InitializeDeltaFile(); + output_.clear(); + } +} + +// If a checksum is present, then fuzzing any of the bits may produce an error, +// but it should not result in an incorrect target being produced without +// an error. +TEST_F(VCDiffInterleavedDecoderTestByteByByte, FuzzBitsWithChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + while (FuzzOneByteInDeltaFile()) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + if (!failed) { + if (decoder_.FinishDecoding()) { + // Decoding succeeded. Make sure the correct target was produced. + EXPECT_EQ(expected_target_, output_); + } + } + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); + InitializeDeltaFile(); + output_.clear(); + } +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CheckAnnotatedOutput) { + decoder_.EnableAnnotatedOutput(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + string annotated_output; + decoder_.GetAnnotatedOutput(&annotated_output); + EXPECT_EQ(expected_annotated_target_, annotated_output); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, + CopyInstructionsShouldFailIfNoSourceSegment) { + // Replace the Win_Indicator and the source size and source offset with a + // single 0 byte (a Win_Indicator for a window with no source segment.) + delta_window_header_.replace(0, 4, "\0", 1); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // The first COPY instruction should fail. + EXPECT_EQ(delta_file_header_.size() + delta_window_header_.size() + 2, i); + break; + } + } + EXPECT_TRUE(failed); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopyMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x0C] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x0D] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x0D, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +// A COPY instruction with an explicit size of 0 is not illegal according to the +// standard, although it is inefficient and should not be generated by any +// reasonable encoder. Changing the size of a COPY instruction to zero will +// cause a failure because the generated target window size will not match the +// expected target size. +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopySizeZero) { + delta_file_[delta_file_header_.size() + 0x0C] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopySizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x0C]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopySizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x0C]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopySizeMaxInt) { + WriteMaxVarintAtOffset(0x0C, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x10, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopySizeNegative) { + WriteNegativeVarintAtOffset(0x0C, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x10, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopySizeInvalid) { + WriteInvalidVarintAtOffset(0x0C, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x10, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopyAddressBeyondHereAddress) { + delta_file_[delta_file_header_.size() + 0x0D] = + FirstByteOfStringLength(kDictionary); + delta_file_[delta_file_header_.size() + 0x0E] = + SecondByteOfStringLength(kDictionary); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x0E, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopyAddressMaxInt) { + WriteMaxVarintAtOffset(0x0D, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x11, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopyAddressNegative) { + WriteNegativeVarintAtOffset(0x0D, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x11, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, CopyAddressInvalid) { + WriteInvalidVarintAtOffset(0x0D, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x11, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, AddMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x0F] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x10] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x10, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +// An ADD instruction with an explicit size of 0 is not illegal according to the +// standard, although it is inefficient and should not be generated by any +// reasonable encoder. Changing the size of an ADD instruction to zero will +// cause a failure because the generated target window size will not match the +// expected target size. +TEST_F(VCDiffInterleavedDecoderTestByteByByte, AddSizeZero) { + delta_file_[delta_file_header_.size() + 0x0F] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, AddSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x0F]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, AddSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x0F]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, AddSizeMaxInt) { + WriteMaxVarintAtOffset(0x0F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x13, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, AddSizeNegative) { + WriteNegativeVarintAtOffset(0x0F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x13, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, AddSizeInvalid) { + WriteInvalidVarintAtOffset(0x0F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x13, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, RunMoreThanExpectedTarget) { + delta_file_[delta_file_header_.size() + 0x5F] = + FirstByteOfStringLength(kExpectedTarget); + delta_file_[delta_file_header_.size() + 0x60] = + SecondByteOfStringLength(kExpectedTarget) + 1; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x60, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +// A RUN instruction with an explicit size of 0 is not illegal according to the +// standard, although it is inefficient and should not be generated by any +// reasonable encoder. Changing the size of a RUN instruction to zero will +// cause a failure because the generated target window size will not match the +// expected target size. +TEST_F(VCDiffInterleavedDecoderTestByteByByte, RunSizeZero) { + delta_file_[delta_file_header_.size() + 0x5F] = 0; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, RunSizeTooLargeByOne) { + ++delta_file_[delta_file_header_.size() + 0x5F]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, RunSizeTooSmallByOne) { + --delta_file_[delta_file_header_.size() + 0x5F]; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, RunSizeMaxInt) { + WriteMaxVarintAtOffset(0x5F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x63, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, RunSizeNegative) { + WriteNegativeVarintAtOffset(0x5F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x63, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +TEST_F(VCDiffInterleavedDecoderTestByteByByte, RunSizeInvalid) { + WriteInvalidVarintAtOffset(0x5F, 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + // It should fail at the position that was altered + EXPECT_EQ(delta_file_header_.size() + 0x63, i); + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +} // namespace open_vcdiff diff --git a/src/vcdecoder_test4.cc b/src/vcdecoder_test4.cc new file mode 100644 index 0000000..adb99e8 --- /dev/null +++ b/src/vcdecoder_test4.cc @@ -0,0 +1,961 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "google/vcdecoder.h" +#include <string> +#include "codetable.h" +#include "testing.h" +#include "vcdecoder_test.h" +#include "vcdiff_defs.h" // VCD_SOURCE + +namespace open_vcdiff { +namespace { + +using std::string; + +// Use the interleaved file header with the standard encoding. Should work. +class VCDiffDecoderInterleavedAllowedButNotUsed + : public VCDiffStandardDecoderTest { + public: + VCDiffDecoderInterleavedAllowedButNotUsed() { + UseInterleavedFileHeader(); + } + virtual ~VCDiffDecoderInterleavedAllowedButNotUsed() { } +}; + +TEST_F(VCDiffDecoderInterleavedAllowedButNotUsed, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffDecoderInterleavedAllowedButNotUsed, DecodeWithChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +typedef VCDiffDecoderInterleavedAllowedButNotUsed + VCDiffDecoderInterleavedAllowedButNotUsedByteByByte; + +TEST_F(VCDiffDecoderInterleavedAllowedButNotUsedByteByByte, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffDecoderInterleavedAllowedButNotUsedByteByByte, + DecodeWithChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +// Use the standard file header with the interleaved encoding. Should fail. +class VCDiffDecoderInterleavedUsedButNotSupported + : public VCDiffInterleavedDecoderTest { + public: + VCDiffDecoderInterleavedUsedButNotSupported() { + UseStandardFileHeader(); + } + virtual ~VCDiffDecoderInterleavedUsedButNotSupported() { } +}; + +TEST_F(VCDiffDecoderInterleavedUsedButNotSupported, DecodeShouldFail) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_EQ("", output_); +} + +TEST_F(VCDiffDecoderInterleavedUsedButNotSupported, + DecodeByteByByteShouldFail) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + bool failed = false; + for (size_t i = 0; i < delta_file_.size(); ++i) { + if (!decoder_.DecodeChunk(&delta_file_[i], 1, &output_)) { + failed = true; + break; + } + } + EXPECT_TRUE(failed); + // The decoder should not create more target bytes than were expected. + EXPECT_GE(expected_target_.size(), output_.size()); +} + +// Divides up the standard encoding into eight separate delta file windows. +// Each delta instruction appears in its own window. +class VCDiffStandardWindowDecoderTest : public VCDiffDecoderTest { + protected: + VCDiffStandardWindowDecoderTest(); + virtual ~VCDiffStandardWindowDecoderTest() {} + + private: + static const char kExpectedAnnotatedTarget[]; + static const char kWindowBody[]; +}; + +const char VCDiffStandardWindowDecoderTest::kWindowBody[] = { +// Window 1: + VCD_SOURCE, // Win_Indicator: take source from dictionary + FirstByteOfStringLength(kDictionary), // Source segment size + SecondByteOfStringLength(kDictionary), + 0x00, // Source segment position: start of dictionary + 0x08, // Length of the delta encoding + 0x1C, // Size of the target window (28) + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x02, // length of instructions section + 0x01, // length of addresses for COPYs + // No data for ADDs and RUNs + // Instructions and sizes (length 2) + 0x13, // VCD_COPY mode VCD_SELF, size 0 + 0x1C, // Size of COPY (28) + // Addresses for COPYs (length 1) + 0x00, // Start of dictionary +// Window 2: + 0x00, // Win_Indicator: No source segment (ADD only) + 0x44, // Length of the delta encoding + 0x3D, // Size of the target window (61) + 0x00, // Delta_indicator (no compression) + 0x3D, // length of data for ADDs and RUNs + 0x02, // length of instructions section + 0x00, // length of addresses for COPYs + // Data for ADD (length 61) + ' ', 'I', ' ', 'h', 'a', 'v', 'e', ' ', 's', 'a', 'i', 'd', ' ', + 'i', 't', ' ', 't', 'w', 'i', 'c', 'e', ':', '\n', + 'T', 'h', 'a', 't', ' ', + 'a', 'l', 'o', 'n', 'e', ' ', 's', 'h', 'o', 'u', 'l', 'd', ' ', + 'e', 'n', 'c', 'o', 'u', 'r', 'a', 'g', 'e', ' ', + 't', 'h', 'e', ' ', 'c', 'r', 'e', 'w', '.', '\n', + // Instructions and sizes (length 2) + 0x01, // VCD_ADD size 0 + 0x3D, // Size of ADD (61) + // No addresses for COPYs +// Window 3: + VCD_TARGET, // Win_Indicator: take source from decoded data + 0x59, // Source segment size: length of data decoded so far + 0x00, // Source segment position: start of decoded data + 0x08, // Length of the delta encoding + 0x2C, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x02, // length of instructions section + 0x01, // length of addresses for COPYs + // No data for ADDs and RUNs + // Instructions and sizes (length 2) + 0x23, // VCD_COPY mode VCD_HERE, size 0 + 0x2C, // Size of COPY (44) + // Addresses for COPYs (length 1) + 0x58, // HERE mode address (27+61 back from here_address) +// Window 4: + VCD_TARGET, // Win_Indicator: take source from decoded data + 0x05, // Source segment size: only 5 bytes needed for this COPY + 0x2E, // Source segment position: offset for COPY + 0x09, // Length of the delta encoding + 0x07, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x02, // length of data for ADDs and RUNs + 0x01, // length of instructions section + 0x01, // length of addresses for COPYs + // Data for ADD (length 2) + 'h', 'r', + // Instructions and sizes (length 1) + 0xA7, // VCD_ADD size 2 + VCD_COPY mode SELF size 5 + // Addresses for COPYs (length 1) + 0x00, // SELF mode address (start of source segment) +// Window 5: + 0x00, // Win_Indicator: No source segment (ADD only) + 0x0F, // Length of the delta encoding + 0x09, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x09, // length of data for ADDs and RUNs + 0x01, // length of instructions section + 0x00, // length of addresses for COPYs + // Data for ADD (length 9) + 'W', 'h', 'a', 't', ' ', 'I', ' ', 't', 'e', + // Instructions and sizes (length 1) + 0x0A, // VCD_ADD size 9 + // No addresses for COPYs +// Window 6: + 0x00, // Win_Indicator: No source segment (RUN only) + 0x08, // Length of the delta encoding + 0x02, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x01, // length of data for ADDs and RUNs + 0x02, // length of instructions section + 0x00, // length of addresses for COPYs + // Data for RUN (length 1) + 'l', + // Instructions and sizes (length 2) + 0x00, // VCD_RUN size 0 + 0x02, // Size of RUN (2) + // No addresses for COPYs +// Window 7: + 0x00, // Win_Indicator: No source segment (ADD only) + 0x22, // Length of the delta encoding + 0x1B, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x1B, // length of data for ADDs and RUNs + 0x02, // length of instructions section + 0x00, // length of addresses for COPYs + // Data for ADD: 4th section (length 27) + ' ', 'y', 'o', 'u', ' ', + 't', 'h', 'r', 'e', 'e', ' ', 't', 'i', 'm', 'e', 's', ' ', 'i', 's', ' ', + 't', 'r', 'u', 'e', '.', '\"', '\n', + // Instructions and sizes (length 2) + 0x01, // VCD_ADD size 0 + 0x1B, // Size of ADD (27) + // No addresses for COPYs + }; + +// The window encoding should produce the same target file as the standard +// encoding, but the annotated target will be different because some of the +// <bmatch> tags (copying from the previously decoded data in the current target +// window) are changed to <dmatch> (copying from the previously decoded data in +// another target window, which is used as the source window for the current +// delta window.) +const char VCDiffStandardWindowDecoderTest::kExpectedAnnotatedTarget[] = + "<dmatch>\"Just the place for a Snark!</dmatch>" + "<literal> I have said it twice:\n" + "That alone should encourage the crew.\n</literal>" + "<dmatch>Just the place for a Snark! I have said it t</dmatch>" + "<literal>hr</literal>" + "<dmatch>ice:\n</dmatch>" + "<literal>What I te</literal>" + "<literal>ll</literal>" + "<literal> you three times is true.\"\n</literal>"; + +VCDiffStandardWindowDecoderTest::VCDiffStandardWindowDecoderTest() { + UseStandardFileHeader(); + expected_annotated_target_.assign(kExpectedAnnotatedTarget); + delta_window_body_.assign(kWindowBody, sizeof(kWindowBody)); +} + +TEST_F(VCDiffStandardWindowDecoderTest, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +// Bug 1287926: If DecodeChunk() stops in the middle of the window header, +// and the expected size of the current target window is smaller than the +// cumulative target bytes decoded so far, an underflow occurs and the decoder +// tries to allocate ~MAX_INT bytes. +TEST_F(VCDiffStandardWindowDecoderTest, DecodeBreakInFourthWindowHeader) { + // Parse file header + first two windows. + const int chunk_1_size = delta_file_header_.size() + 83; + // Parse third window, plus everything up to "Size of the target window" field + // of fourth window, but do not parse complete header of fourth window. + const int chunk_2_size = 12 + 5; + CHECK_EQ(VCD_TARGET, static_cast<unsigned char>(delta_file_[chunk_1_size])); + CHECK_EQ(0x00, static_cast<int>(delta_file_[chunk_1_size + chunk_2_size])); + string output_chunk1, output_chunk2, output_chunk3; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[0], + chunk_1_size, + &output_chunk1)); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[chunk_1_size], + chunk_2_size, + &output_chunk2)); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[chunk_1_size + chunk_2_size], + delta_file_.size() + - (chunk_1_size + chunk_2_size), + &output_chunk3)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_chunk1 + output_chunk2 + output_chunk3); +} + +TEST_F(VCDiffStandardWindowDecoderTest, DecodeInTwoParts) { + const size_t delta_file_size = delta_file_.size(); + for (size_t i = 1; i < delta_file_size; i++) { + string output_chunk1, output_chunk2; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[0], + i, + &output_chunk1)); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], + delta_file_size - i, + &output_chunk2)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_chunk1 + output_chunk2); + } +} + +TEST_F(VCDiffStandardWindowDecoderTest, DecodeInThreeParts) { + const size_t delta_file_size = delta_file_.size(); + for (size_t i = 1; i < delta_file_size - 1; i++) { + for (size_t j = i + 1; j < delta_file_size; j++) { + string output_chunk1, output_chunk2, output_chunk3; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[0], + i, + &output_chunk1)); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], + j - i, + &output_chunk2)); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[j], + delta_file_size - j, + &output_chunk3)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, + output_chunk1 + output_chunk2 + output_chunk3); + } + } +} + +typedef VCDiffStandardWindowDecoderTest + VCDiffStandardWindowDecoderTestByteByByte; +TEST_F(VCDiffStandardWindowDecoderTestByteByByte, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +// Divides up the interleaved encoding into eight separate delta file windows. +class VCDiffInterleavedWindowDecoderTest + : public VCDiffStandardWindowDecoderTest { + protected: + VCDiffInterleavedWindowDecoderTest(); + virtual ~VCDiffInterleavedWindowDecoderTest() {} + private: + static const char kWindowBody[]; +}; + +const char VCDiffInterleavedWindowDecoderTest::kWindowBody[] = { +// Window 1: + VCD_SOURCE, // Win_Indicator: take source from dictionary + FirstByteOfStringLength(kDictionary), // Source segment size + SecondByteOfStringLength(kDictionary), + 0x00, // Source segment position: start of dictionary + 0x08, // Length of the delta encoding + 0x1C, // Size of the target window (28) + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x03, // length of instructions section + 0x00, // length of addresses for COPYs + 0x13, // VCD_COPY mode VCD_SELF, size 0 + 0x1C, // Size of COPY (28) + 0x00, // Start of dictionary +// Window 2: + 0x00, // Win_Indicator: No source segment (ADD only) + 0x44, // Length of the delta encoding + 0x3D, // Size of the target window (61) + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x3F, // length of instructions section + 0x00, // length of addresses for COPYs + 0x01, // VCD_ADD size 0 + 0x3D, // Size of ADD (61) + ' ', 'I', ' ', 'h', 'a', 'v', 'e', ' ', 's', 'a', 'i', 'd', ' ', + 'i', 't', ' ', 't', 'w', 'i', 'c', 'e', ':', '\n', + 'T', 'h', 'a', 't', ' ', + 'a', 'l', 'o', 'n', 'e', ' ', 's', 'h', 'o', 'u', 'l', 'd', ' ', + 'e', 'n', 'c', 'o', 'u', 'r', 'a', 'g', 'e', ' ', + 't', 'h', 'e', ' ', 'c', 'r', 'e', 'w', '.', '\n', +// Window 3: + VCD_TARGET, // Win_Indicator: take source from decoded data + 0x59, // Source segment size: length of data decoded so far + 0x00, // Source segment position: start of decoded data + 0x08, // Length of the delta encoding + 0x2C, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x03, // length of instructions section + 0x00, // length of addresses for COPYs + 0x23, // VCD_COPY mode VCD_HERE, size 0 + 0x2C, // Size of COPY (44) + 0x58, // HERE mode address (27+61 back from here_address) +// Window 4: + VCD_TARGET, // Win_Indicator: take source from decoded data + 0x05, // Source segment size: only 5 bytes needed for this COPY + 0x2E, // Source segment position: offset for COPY + 0x09, // Length of the delta encoding + 0x07, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x04, // length of instructions section + 0x00, // length of addresses for COPYs + 0xA7, // VCD_ADD size 2 + VCD_COPY mode SELF, size 5 + 'h', 'r', + 0x00, // SELF mode address (start of source segment) +// Window 5: + 0x00, // Win_Indicator: No source segment (ADD only) + 0x0F, // Length of the delta encoding + 0x09, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x0A, // length of instructions section + 0x00, // length of addresses for COPYs + 0x0A, // VCD_ADD size 9 + 'W', 'h', 'a', 't', ' ', 'I', ' ', 't', 'e', +// Window 6: + 0x00, // Win_Indicator: No source segment (RUN only) + 0x08, // Length of the delta encoding + 0x02, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x03, // length of instructions section + 0x00, // length of addresses for COPYs + 0x00, // VCD_RUN size 0 + 0x02, // Size of RUN (2) + 'l', +// Window 7: + 0x00, // Win_Indicator: No source segment (ADD only) + 0x22, // Length of the delta encoding + 0x1B, // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x1D, // length of instructions section + 0x00, // length of addresses for COPYs + 0x01, // VCD_ADD size 0 + 0x1B, // Size of ADD (27) + ' ', 'y', 'o', 'u', ' ', + 't', 'h', 'r', 'e', 'e', ' ', 't', 'i', 'm', 'e', 's', ' ', 'i', 's', ' ', + 't', 'r', 'u', 'e', '.', '\"', '\n', + }; + +VCDiffInterleavedWindowDecoderTest::VCDiffInterleavedWindowDecoderTest() { + UseInterleavedFileHeader(); + // delta_window_header_ is left blank. All window headers and bodies are + // lumped together in delta_window_body_. This means that AddChecksum() + // cannot be used to test the checksum feature. + delta_window_body_.assign(kWindowBody, sizeof(kWindowBody)); +} + +TEST_F(VCDiffInterleavedWindowDecoderTest, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffInterleavedWindowDecoderTest, DecodeInTwoParts) { + const size_t delta_file_size = delta_file_.size(); + for (size_t i = 1; i < delta_file_size; i++) { + string output_chunk1, output_chunk2; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[0], + i, + &output_chunk1)); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], + delta_file_size - i, + &output_chunk2)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_chunk1 + output_chunk2); + } +} + +TEST_F(VCDiffInterleavedWindowDecoderTest, DecodeInThreeParts) { + const size_t delta_file_size = delta_file_.size(); + for (size_t i = 1; i < delta_file_size - 1; i++) { + for (size_t j = i + 1; j < delta_file_size; j++) { + string output_chunk1, output_chunk2, output_chunk3; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[0], + i, + &output_chunk1)); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], + j - i, + &output_chunk2)); + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[j], + delta_file_size - j, + &output_chunk3)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, + output_chunk1 + output_chunk2 + output_chunk3); + } + } +} + +typedef VCDiffInterleavedWindowDecoderTest + VCDiffInterleavedWindowDecoderTestByteByByte; + +TEST_F(VCDiffInterleavedWindowDecoderTestByteByByte, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +// The original version of VCDiffDecoder did not allow the caller to modify the +// contents of output_string between calls to DecodeChunk(). That restriction +// has been removed. Verify that the same result is still produced if the +// output string is cleared after each call to DecodeChunk(). Use the window +// encoding because it refers back to the previously decoded target data, which +// is the feature that would fail if the restriction still applied. +// +TEST_F(VCDiffInterleavedWindowDecoderTest, OutputStringCanBeModified) { + string temp_output; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &temp_output)); + output_.append(temp_output); + temp_output.clear(); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffInterleavedWindowDecoderTest, OutputStringIsPreserved) { + const string previous_data("Previous data"); + output_ = previous_data; + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(previous_data + expected_target_, output_); +} + +// A decode job that tests the ability to COPY across the boundary between +// source data and target data. +class VCDiffStandardCrossDecoderTest : public VCDiffDecoderTest { + protected: + static const char kExpectedTarget[]; + static const char kExpectedAnnotatedTarget[]; + static const char kWindowHeader[]; + static const char kWindowBody[]; + + VCDiffStandardCrossDecoderTest(); + virtual ~VCDiffStandardCrossDecoderTest() {} +}; + +const char VCDiffStandardCrossDecoderTest::kWindowHeader[] = { + VCD_SOURCE, // Win_Indicator: take source from dictionary + FirstByteOfStringLength(kDictionary), // Source segment size + SecondByteOfStringLength(kDictionary), + 0x00, // Source segment position: start of dictionary + 0x15, // Length of the delta encoding + StringLengthAsByte(kExpectedTarget), // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x07, // length of data for ADDs and RUNs + 0x06, // length of instructions section + 0x03 // length of addresses for COPYs + }; + +const char VCDiffStandardCrossDecoderTest::kWindowBody[] = { + // Data for ADD (length 7) + 'S', 'p', 'i', 'd', 'e', 'r', 's', + // Instructions and sizes (length 6) + 0x01, // VCD_ADD size 0 + 0x07, // Size of ADD (7) + 0x23, // VCD_COPY mode VCD_HERE, size 0 + 0x19, // Size of COPY (25) + 0x14, // VCD_COPY mode VCD_SELF, size 4 + 0x25, // VCD_COPY mode VCD_HERE, size 5 + // Addresses for COPYs (length 3) + 0x15, // HERE mode address for 1st copy (21 back from here_address) + 0x06, // SELF mode address for 2nd copy + 0x14 // HERE mode address for 3rd copy + }; + +const char VCDiffStandardCrossDecoderTest::kExpectedTarget[] = + "Spiders in his hair.\n" + "Spiders in the air.\n"; + +const char VCDiffStandardCrossDecoderTest::kExpectedAnnotatedTarget[] = + "<literal>Spiders</literal>" + "<dmatch> in his hair.\n</dmatch>" // crosses source-target boundary + "<bmatch>Spiders in </bmatch>" + "<dmatch>the </dmatch>" + "<bmatch>air.\n</bmatch>"; + +VCDiffStandardCrossDecoderTest::VCDiffStandardCrossDecoderTest() { + UseStandardFileHeader(); + delta_window_header_.assign(kWindowHeader, sizeof(kWindowHeader)); + delta_window_body_.assign(kWindowBody, sizeof(kWindowBody)); + expected_target_.assign(kExpectedTarget); + expected_annotated_target_.assign(kExpectedAnnotatedTarget); +} + +TEST_F(VCDiffStandardCrossDecoderTest, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +typedef VCDiffStandardCrossDecoderTest VCDiffStandardCrossDecoderTestByteByByte; + +TEST_F(VCDiffStandardCrossDecoderTestByteByByte, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +// The same decode job that tests the ability to COPY across the boundary +// between source data and target data, but using the interleaved format rather +// than the standard format. +class VCDiffInterleavedCrossDecoderTest + : public VCDiffStandardCrossDecoderTest { + protected: + VCDiffInterleavedCrossDecoderTest(); + virtual ~VCDiffInterleavedCrossDecoderTest() {} + + private: + static const char kWindowHeader[]; + static const char kWindowBody[]; +}; + +const char VCDiffInterleavedCrossDecoderTest::kWindowHeader[] = { + VCD_SOURCE, // Win_Indicator: take source from dictionary + FirstByteOfStringLength(kDictionary), // Source segment size + SecondByteOfStringLength(kDictionary), + 0x00, // Source segment position: start of dictionary + 0x15, // Length of the delta encoding + StringLengthAsByte(kExpectedTarget), // Size of the target window + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs + 0x10, // length of instructions section + 0x00, // length of addresses for COPYs + }; + +const char VCDiffInterleavedCrossDecoderTest::kWindowBody[] = { + 0x01, // VCD_ADD size 0 + 0x07, // Size of ADD (7) + // Data for ADD (length 7) + 'S', 'p', 'i', 'd', 'e', 'r', 's', + 0x23, // VCD_COPY mode VCD_HERE, size 0 + 0x19, // Size of COPY (25) + 0x15, // HERE mode address for 1st copy (21 back from here_address) + 0x14, // VCD_COPY mode VCD_SELF, size 4 + 0x06, // SELF mode address for 2nd copy + 0x25, // VCD_COPY mode VCD_HERE, size 5 + 0x14 // HERE mode address for 3rd copy + }; + +VCDiffInterleavedCrossDecoderTest::VCDiffInterleavedCrossDecoderTest() { + UseInterleavedFileHeader(); + delta_window_header_.assign(kWindowHeader, sizeof(kWindowHeader)); + delta_window_body_.assign(kWindowBody, sizeof(kWindowBody)); +} + +TEST_F(VCDiffInterleavedCrossDecoderTest, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffInterleavedCrossDecoderTest, DecodeWithChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +typedef VCDiffInterleavedCrossDecoderTest + VCDiffInterleavedCrossDecoderTestByteByByte; + +TEST_F(VCDiffInterleavedCrossDecoderTestByteByByte, Decode) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffInterleavedCrossDecoderTestByteByByte, DecodeWithChecksum) { + ComputeAndAddChecksum(); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +// Test using a custom code table and custom cache sizes with interleaved +// format. +class VCDiffCustomCodeTableDecoderTest : public VCDiffInterleavedDecoderTest { + protected: + static const char kFileHeader[]; + static const char kWindowHeader[]; + static const char kWindowBody[]; + static const char kEncodedCustomCodeTable[]; + + VCDiffCustomCodeTableDecoderTest(); + virtual ~VCDiffCustomCodeTableDecoderTest() {} +}; + +const char VCDiffCustomCodeTableDecoderTest::kFileHeader[] = { + 0xD6, // 'V' | 0x80 + 0xC3, // 'C' | 0x80 + 0xC4, // 'D' | 0x80 + 'S', // SDCH version code + 0x02 // Hdr_Indicator: Use custom code table + }; + +// Make a custom code table that includes exactly the instructions we need +// to encode the first test's data without using any explicit length values. +// Be careful not to replace any existing opcodes that have size 0, +// to ensure that the custom code table is valid (can express all possible +// values of inst (also known as instruction type) and mode with size 0.) +// This encoding uses interleaved format, which is easier to read. +// +// Here are the changes to the standard code table: +// ADD size 2 (opcode 3) => RUN size 2 (inst1[3] = VCD_RUN) +// ADD size 16 (opcode 17) => ADD size 27 (size1[17] = 27) +// ADD size 17 (opcode 18) => ADD size 61 (size1[18] = 61) +// COPY mode 0 size 18 (opcode 34) => COPY mode 0 size 28 (size1[34] = 28) +// COPY mode 1 size 18 (opcode 50) => COPY mode 1 size 44 (size1[50] = 44) +// +const char VCDiffCustomCodeTableDecoderTest::kEncodedCustomCodeTable[] = { + 0xD6, // 'V' | 0x80 + 0xC3, // 'C' | 0x80 + 0xC4, // 'D' | 0x80 + 'S', // SDCH version code + 0x00, // Hdr_Indicator: no custom code table, no compression + VCD_SOURCE, // Win_Indicator: take source from dictionary + (sizeof(VCDiffCodeTableData) >> 7) | 0x80, // First byte of table length + sizeof(VCDiffCodeTableData) & 0x7F, // Second byte of table length + 0x00, // Source segment position: start of default code table + 0x1F, // Length of the delta encoding + (sizeof(VCDiffCodeTableData) >> 7) | 0x80, // First byte of table length + sizeof(VCDiffCodeTableData) & 0x7F, // Second byte of table length + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs (unused) + 0x19, // length of interleaved section + 0x00, // length of addresses for COPYs (unused) + 0x05, // VCD_ADD size 4 + // Data for ADD (length 4) + VCD_RUN, VCD_ADD, VCD_ADD, VCD_RUN, + 0x13, // VCD_COPY mode VCD_SELF size 0 + 0x84, // Size of copy: upper bits (512 - 4 + 17 = 525) + 0x0D, // Size of copy: lower bits + 0x04, // Address of COPY + 0x03, // VCD_ADD size 2 + // Data for ADD (length 2) + 0x1B, 0x3D, + 0x3F, // VCD_COPY mode VCD_NEAR(0) size 15 + 0x84, // Address of copy: upper bits (525 + 2 = 527) + 0x0F, // Address of copy: lower bits + 0x02, // VCD_ADD size 1 + // Data for ADD (length 1) + 0x1C, + 0x4F, // VCD_COPY mode VCD_NEAR(1) size 15 + 0x10, // Address of copy + 0x02, // VCD_ADD size 1 + // Data for ADD (length 1) + 0x2C, + 0x53, // VCD_COPY mode VCD_NEAR(2) size 0 + 0x87, // Size of copy: upper bits (256 * 4 - 51 = 973) + 0x4D, // Size of copy: lower bits + 0x10 // Address of copy + }; + +// This is similar to VCDiffInterleavedDecoderTest, but uses the custom code +// table to eliminate the need to explicitly encode instruction sizes. +// Notice that NEAR(0) mode is used here where NEAR(1) mode was used in +// VCDiffInterleavedDecoderTest. This is because the custom code table +// has the size of the NEAR cache set to 1; only the most recent +// COPY instruction is available. This will also be a test of +// custom cache sizes. +const char VCDiffCustomCodeTableDecoderTest::kWindowHeader[] = { + VCD_SOURCE, // Win_Indicator: take source from dictionary + FirstByteOfStringLength(kDictionary), // Source segment size + SecondByteOfStringLength(kDictionary), + 0x00, // Source segment position: start of dictionary + 0x74, // Length of the delta encoding + FirstByteOfStringLength(kExpectedTarget), // Size of the target window + SecondByteOfStringLength(kExpectedTarget), + 0x00, // Delta_indicator (no compression) + 0x00, // length of data for ADDs and RUNs (unused) + 0x6E, // length of interleaved section + 0x00 // length of addresses for COPYs (unused) + }; + +const char VCDiffCustomCodeTableDecoderTest::kWindowBody[] = { + 0x22, // VCD_COPY mode VCD_SELF, size 28 + 0x00, // Address of COPY: Start of dictionary + 0x12, // VCD_ADD size 61 + // Data for ADD (length 61) + ' ', 'I', ' ', 'h', 'a', 'v', 'e', ' ', 's', 'a', 'i', 'd', ' ', + 'i', 't', ' ', 't', 'w', 'i', 'c', 'e', ':', '\n', + 'T', 'h', 'a', 't', ' ', + 'a', 'l', 'o', 'n', 'e', ' ', 's', 'h', 'o', 'u', 'l', 'd', ' ', + 'e', 'n', 'c', 'o', 'u', 'r', 'a', 'g', 'e', ' ', + 't', 'h', 'e', ' ', 'c', 'r', 'e', 'w', '.', '\n', + 0x32, // VCD_COPY mode VCD_HERE, size 44 + 0x58, // HERE mode address (27+61 back from here_address) + 0xBF, // VCD_ADD size 2 + VCD_COPY mode NEAR(0), size 5 + // Data for ADDs: 2nd section (length 2) + 'h', 'r', + 0x2D, // NEAR(0) mode address (45 after prior address) + 0x0A, // VCD_ADD size 9 + // Data for ADDs: 3rd section (length 9) + 'W', 'h', 'a', 't', ' ', + 'I', ' ', 't', 'e', + 0x03, // VCD_RUN size 2 + // Data for RUN: 4th section (length 1) + 'l', + 0x11, // VCD_ADD size 27 + // Data for ADD: 4th section (length 27) + ' ', 'y', 'o', 'u', ' ', + 't', 'h', 'r', 'e', 'e', ' ', 't', 'i', 'm', 'e', 's', ' ', 'i', 's', ' ', + 't', 'r', 'u', 'e', '.', '\"', '\n' + }; + +VCDiffCustomCodeTableDecoderTest::VCDiffCustomCodeTableDecoderTest() { + delta_file_header_.assign(kFileHeader, sizeof(kFileHeader)); + delta_file_header_.push_back(0x01); // NEAR cache size (custom) + delta_file_header_.push_back(0x06); // SAME cache size (custom) + delta_file_header_.append(kEncodedCustomCodeTable, + sizeof(kEncodedCustomCodeTable)); + delta_window_header_.assign(kWindowHeader, sizeof(kWindowHeader)); + delta_window_body_.assign(kWindowBody, sizeof(kWindowBody)); +} + +TEST_F(VCDiffCustomCodeTableDecoderTest, CustomCodeTableEncodingMatches) { + VCDiffCodeTableData custom_code_table( + VCDiffCodeTableData::kDefaultCodeTableData); + custom_code_table.inst1[3] = VCD_RUN; + custom_code_table.size1[17] = 27; + custom_code_table.size1[18] = 61; + custom_code_table.size1[34] = 28; + custom_code_table.size1[50] = 44; + + decoder_.StartDecoding( + reinterpret_cast<const char*>( + &VCDiffCodeTableData::kDefaultCodeTableData), + sizeof(VCDiffCodeTableData::kDefaultCodeTableData)); + EXPECT_TRUE(decoder_.DecodeChunk(kEncodedCustomCodeTable, + sizeof(kEncodedCustomCodeTable), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(sizeof(custom_code_table), output_.size()); + const VCDiffCodeTableData* decoded_table = + reinterpret_cast<const VCDiffCodeTableData*>(output_.data()); + EXPECT_EQ(VCD_RUN, decoded_table->inst1[0]); + EXPECT_EQ(VCD_RUN, decoded_table->inst1[3]); + EXPECT_EQ(27, decoded_table->size1[17]); + EXPECT_EQ(61, decoded_table->size1[18]); + EXPECT_EQ(28, decoded_table->size1[34]); + EXPECT_EQ(44, decoded_table->size1[50]); + for (int i = 0; i < VCDiffCodeTableData::kCodeTableSize; ++i) { + EXPECT_EQ(custom_code_table.inst1[i], decoded_table->inst1[i]); + EXPECT_EQ(custom_code_table.inst2[i], decoded_table->inst2[i]); + EXPECT_EQ(custom_code_table.size1[i], decoded_table->size1[i]); + EXPECT_EQ(custom_code_table.size2[i], decoded_table->size2[i]); + EXPECT_EQ(custom_code_table.mode1[i], decoded_table->mode1[i]); + EXPECT_EQ(custom_code_table.mode2[i], decoded_table->mode2[i]); + } +} + +TEST_F(VCDiffCustomCodeTableDecoderTest, DecodeUsingCustomCodeTable) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffCustomCodeTableDecoderTest, IncompleteCustomCodeTable) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_TRUE(decoder_.DecodeChunk(delta_file_header_.data(), + delta_file_header_.size() - 1, + &output_)); + EXPECT_FALSE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +typedef VCDiffCustomCodeTableDecoderTest + VCDiffCustomCodeTableDecoderTestByteByByte; + +TEST_F(VCDiffCustomCodeTableDecoderTestByteByByte, DecodeUsingCustomCodeTable) { + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(expected_target_, output_); +} + +TEST_F(VCDiffCustomCodeTableDecoderTestByteByByte, IncompleteCustomCodeTable) { + delta_file_.resize(delta_file_header_.size() - 1); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + for (size_t i = 0; i < delta_file_.size(); ++i) { + EXPECT_TRUE(decoder_.DecodeChunk(&delta_file_[i], 1, &output_)); + } + EXPECT_FALSE(decoder_.FinishDecoding()); + EXPECT_EQ("", output_); +} + +#ifdef GTEST_HAS_DEATH_TEST +typedef VCDiffCustomCodeTableDecoderTest VCDiffCustomCodeTableDecoderDeathTest; + +TEST_F(VCDiffCustomCodeTableDecoderDeathTest, BadCustomCacheSizes) { + delta_file_header_.assign(kFileHeader, sizeof(kFileHeader)); + delta_file_header_.push_back(0x81); // NEAR cache size (top bit) + delta_file_header_.push_back(0x10); // NEAR cache size (custom value 0x90) + delta_file_header_.push_back(0x81); // SAME cache size (top bit) + delta_file_header_.push_back(0x10); // SAME cache size (custom value 0x90) + delta_file_header_.append(kEncodedCustomCodeTable, + sizeof(kEncodedCustomCodeTable)); + InitializeDeltaFile(); + decoder_.StartDecoding(dictionary_.data(), dictionary_.size()); + EXPECT_DEBUG_DEATH(EXPECT_FALSE(decoder_.DecodeChunk(delta_file_.data(), + delta_file_.size(), + &output_)), + "cache"); + EXPECT_EQ("", output_); +} +#endif // GTEST_HAS_DEATH_TEST + +} // namespace open_vcdiff +} // unnamed namespace diff --git a/src/vcdiff_defs.h b/src/vcdiff_defs.h new file mode 100644 index 0000000..ed35f1f --- /dev/null +++ b/src/vcdiff_defs.h @@ -0,0 +1,196 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Types and value definitions to support the implementation of RFC 3284 - +// The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html +// Many of the definitions below reference sections in that text. + +#ifndef OPEN_VCDIFF_VCDIFF_DEFS_H_ +#define OPEN_VCDIFF_VCDIFF_DEFS_H_ + +#include <config.h> +#include <stdint.h> // int32_t +#include <climits> // UCHAR_MAX + +namespace open_vcdiff { + +enum VCDiffResult { + RESULT_SUCCESS = 0, + // Many functions within open-vcdiff return signed integer types, + // and can also return either of these special negative values: + // + // An error occurred while performing the requested operation. + RESULT_ERROR = -1, + // The end of available data was reached + // before the requested operation could be completed. + RESULT_END_OF_DATA = -2 +}; + +// The delta file header section as described in section 4.1 of the RFC: +// +// "Each delta file starts with a header section organized as below. +// Note the convention that square-brackets enclose optional items. +// +// Header1 - byte = 0xD6 +// Header2 - byte = 0xC3 +// Header3 - byte = 0xC4 +// Header4 - byte +// Hdr_Indicator - byte +// [Secondary compressor ID] - byte +// [Length of code table data] - integer +// [Code table data] +// +// The first three Header bytes are the ASCII characters 'V', 'C' and +// 'D' with their most significant bits turned on (in hexadecimal, the +// values are 0xD6, 0xC3, and 0xC4). The fourth Header byte is +// currently set to zero. In the future, it might be used to indicate +// the version of Vcdiff." +// +typedef struct DeltaFileHeader { + unsigned char header1; // Always 0xD6 ('V' | 0x80) + unsigned char header2; // Always 0xC3 ('C' | 0x80) + unsigned char header3; // Always 0xC4 ('D' | 0x80) + unsigned char header4; // 0x00 for standard format, 'S' if extensions used + unsigned char hdr_indicator; +} DeltaFileHeader; + +// The possible values for the Hdr_Indicator field, as described +// in section 4.1 of the RFC: +// +// "The Hdr_Indicator byte shows if there is any initialization data +// required to aid in the reconstruction of data in the Window sections. +// This byte MAY have non-zero values for either, both, or neither of +// the two bits VCD_DECOMPRESS and VCD_CODETABLE below: +// +// 7 6 5 4 3 2 1 0 +// +-+-+-+-+-+-+-+-+ +// | | | | | | | | | +// +-+-+-+-+-+-+-+-+ +// ^ ^ +// | | +// | +-- VCD_DECOMPRESS +// +---- VCD_CODETABLE +// +// If bit 0 (VCD_DECOMPRESS) is non-zero, this indicates that a +// secondary compressor may have been used to further compress certain +// parts of the delta encoding data [...]" +// [Secondary compressors are not supported by open-vcdiff.] +// +// "If bit 1 (VCD_CODETABLE) is non-zero, this indicates that an +// application-defined code table is to be used for decoding the delta +// instructions. [...]" +// +const unsigned char VCD_DECOMPRESS = 0x01; +const unsigned char VCD_CODETABLE = 0x02; + +// The possible values for the Win_Indicator field, as described +// in section 4.2 of the RFC: +// +// "Win_Indicator: +// +// This byte is a set of bits, as shown: +// +// 7 6 5 4 3 2 1 0 +// +-+-+-+-+-+-+-+-+ +// | | | | | | | | | +// +-+-+-+-+-+-+-+-+ +// ^ ^ +// | | +// | +-- VCD_SOURCE +// +---- VCD_TARGET +// +// If bit 0 (VCD_SOURCE) is non-zero, this indicates that a +// segment of data from the "source" file was used as the +// corresponding source window of data to encode the target +// window. The decoder will use this same source data segment to +// decode the target window. +// +// If bit 1 (VCD_TARGET) is non-zero, this indicates that a +// segment of data from the "target" file was used as the +// corresponding source window of data to encode the target +// window. As above, this same source data segment is used to +// decode the target window. +// +// The Win_Indicator byte MUST NOT have more than one of the bits +// set (non-zero). It MAY have none of these bits set." +// +const unsigned char VCD_SOURCE = 0x01; +const unsigned char VCD_TARGET = 0x02; +// If this flag is set, the delta window includes an Adler32 checksum +// of the target window data. Not part of the RFC draft standard. +const unsigned char VCD_CHECKSUM = 0x04; + +// The possible values for the Delta_Indicator field, as described +// in section 4.3 of the RFC: +// +// "Delta_Indicator: +// This byte is a set of bits, as shown: +// +// 7 6 5 4 3 2 1 0 +// +-+-+-+-+-+-+-+-+ +// | | | | | | | | | +// +-+-+-+-+-+-+-+-+ +// ^ ^ ^ +// | | | +// | | +-- VCD_DATACOMP +// | +---- VCD_INSTCOMP +// +------ VCD_ADDRCOMP +// +// VCD_DATACOMP: bit value 1. +// VCD_INSTCOMP: bit value 2. +// VCD_ADDRCOMP: bit value 4. +// +// [...] If the bit VCD_DECOMPRESS (Section 4.1) was on, each of these +// sections may have been compressed using the specified secondary +// compressor. The bit positions 0 (VCD_DATACOMP), 1 +// (VCD_INSTCOMP), and 2 (VCD_ADDRCOMP) respectively indicate, if +// non-zero, that the corresponding parts are compressed." +// [Secondary compressors are not supported, so open-vcdiff decoding will fail +// if these bits are not all zero.] +// +const unsigned char VCD_DATACOMP = 0x01; +const unsigned char VCD_INSTCOMP = 0x02; +const unsigned char VCD_ADDRCOMP = 0x04; + +// A COPY address has 32 bits, which places a limit +// of 2GB on the maximum combined size of the dictionary plus +// the target window (= the chunk of data to be encoded.) +typedef int32_t VCDAddress; + +// The address modes used for COPY instructions, as defined in +// section 5.3 of the RFC. +// +// The first two modes (0 and 1) are defined as SELF (addressing forward +// from the beginning of the source window) and HERE (addressing backward +// from the current position in the source window + previously decoded +// target data.) +// +// After those first two modes, there are a variable number of NEAR modes +// (which take a recently-used address and add a positive offset to it) +// and SAME modes (which match a previously-used address using a "hash" of +// the lowest bits of the address.) The number of NEAR and SAME modes +// depends on the defined size of the address cache; since this number is +// variable, these modes cannot be specified as enum values. +enum VCDiffModes { + VCD_SELF_MODE = 0, + VCD_HERE_MODE = 1, + VCD_FIRST_NEAR_MODE = 2, + VCD_MAX_MODES = UCHAR_MAX + 1 // 256 +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_VCDIFF_DEFS_H_ diff --git a/src/vcdiff_main.cc b/src/vcdiff_main.cc new file mode 100644 index 0000000..714e6f8 --- /dev/null +++ b/src/vcdiff_main.cc @@ -0,0 +1,609 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A command-line interface to the open-vcdiff library. + +#include <config.h> +#include <cassert> +#include <cerrno> +#include <cstdio> +#include <cstring> // strerror +#include <memory> +#include <string> +#include <vector> +#include "gflags/gflags.h" +#include "logging.h" +#include "google/vcdecoder.h" +#include "google/vcencoder.h" + +using std::string; +using google::GetCommandLineFlagInfoOrDie; +using google::ShowUsageWithFlagsRestrict; + +// The buffer size, which determines the maximum allowable size +// of a target window, based on how much memory can be allocated. +// Both of these can be increased (and the default can be decreased) +// using the --buffersize flag. +static const size_t kDefaultBufferSize = 1 << 20; // 1 MB +static const size_t kMaxBufferSize = 1 << 26; // 64 MB + +// Definitions of command-line flags +DEFINE_string(dictionary, "", + "File containing dictionary data (required)"); +DEFINE_string(target, "", + "Target file (default is stdin for encode, stdout for decode"); +DEFINE_string(delta, "", + "Encoded delta file (default is stdout for encode, " + "stdin for decode"); +DEFINE_uint64(buffersize, kDefaultBufferSize, + "Buffer size for reading input file"); +DEFINE_bool(checksum, false, + "Include an Adler32 checksum of the target data when encoding"); +DEFINE_bool(interleaved, false, "Use interleaved format"); +DEFINE_bool(stats, false, "Report compression percentage"); +DEFINE_bool(target_matches, false, "Find duplicate strings in target data" + " as well as dictionary data"); + +static const char* const kUsageString = + " {encode | delta | decode | patch }[ <options> ]\n" + "encode or delta: create delta file from dictionary and target file\n" + "decode or patch: reconstruct target file from dictionary and delta file"; + +namespace open_vcdiff { + +class VCDiffFileBasedCoder { + public: + VCDiffFileBasedCoder(); + ~VCDiffFileBasedCoder(); + + // Once the command-line arguments have been parsed, these functions + // will use the supplied options to carry out a file-based encode + // or decode operation. + bool Encode(); + bool Decode(); + bool DecodeAndCompare(); // for "vcdiff test"; compare target with original + + private: + // Determines the size of the file. The given file must be an input file + // opened for reading only, not an input stream such as stdin. The function + // returns true and populates file_size if successful; otherwise, it returns + // false. + static bool FileSize(FILE* file, size_t* file_size); + + // Opens a file for incremental reading. file_name is the name of the file + // to be opened. file_type should be a descriptive name (like "target") for + // use in log messages. If successful, returns true and sets *file to a + // valid input file, *buffer to a region of memory allocated using malloc() + // (so the caller must release it using free()), and buffer_size to the size + // of the buffer, which will not be larger than the size of the file, and + // will not be smaller than the --buffersize option. If the function fails, + // it outputs a log message and returns false. + bool OpenFileForReading(const string& file_name, + const char* file_type, + FILE** file, + std::vector<char>* buffer); + + // Opens the dictionary file and reads it into a newly allocated buffer. + // If successful, returns true and populates dictionary_ with the dictionary + // contents; otherwise, returns false. + bool OpenDictionary(); + + // Opens the input file (the delta or target file) for reading. + // Allocates space for the input buffer. If successful, + // input_file_ will be valid and input_buffer_ will be allocated. + bool OpenInputFile() { + return OpenFileForReading(input_file_name_, + input_file_type_, + &input_file_, + &input_buffer_); + } + + // Opens the output file (the target or delta file) for writing. + // If successful, output_file_ will be valid. + bool OpenOutputFile(); + + // Opens the output file (the target file) for comparison against the decoded + // output when using "vcdiff test". + bool OpenOutputFileForCompare() { + return OpenFileForReading(output_file_name_, + output_file_type_, + &output_file_, + &compare_buffer_); + } + + // Reads as much input data as possible from the input file + // into input_buffer_. If successful, returns true and sets *bytes_read + // to the number of bytes read into input_buffer_. If an error occurs, + // writes an error log message and returns false. + bool ReadInput(size_t* bytes_read); + + // Writes the contents of output to output_file_. If successful, returns + // true. If an error occurs, writes an error log message and returns false. + bool WriteOutput(const string& output); + + // Reads a number of bytes from output_file_ equal to the size of output, + // and compares to make sure they match the contents of output. If the bytes + // do not match, or if end of file is reached before the expected number of + // bytes have been read, or a read error occurs, the function returns false; + // otherwise, returns true. + bool CompareOutput(const string& output); + + // Dictionary contents. The entire dictionary file will be read into memory. + std::vector<char> dictionary_; + + std::auto_ptr<open_vcdiff::HashedDictionary> hashed_dictionary_; + + // These should be set to either "delta" or "target". They are only + // used in log messages such as "Error opening delta file..." + const char* input_file_type_; + const char* output_file_type_; + + // The filenames used for input and output. Will be empty if stdin + // or stdout is being used. + string input_file_name_; + string output_file_name_; + + // stdio-style file handles for the input and output files and the dictionary. + // When encoding, input_file_ is the target file and output_file_ is the delta + // file; when decoding, the reverse is true. The dictionary is always read + // from a file rather than from standard input. + FILE* input_file_; + FILE* output_file_; + + // A memory buffer used to load the input file into memory. If the input + // comes from stdin because no input file was specified, then the size of + // input_buffer_ will be the value specified by the --buffersize option. + // If the input comes from a file, then the buffer will be allocated to match + // the file size, if possible. However, the buffer will not exceed + // kMaxBufferSize bytes in length, unless the user specifies the --buffersize + // option to override that limit. + std::vector<char> input_buffer_; + + // A memory buffer used to load the output file into memory for comparison + // if "vcdiff test" is specified. + std::vector<char> compare_buffer_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffFileBasedCoder(const VCDiffFileBasedCoder&); // NOLINT + void operator=(const VCDiffFileBasedCoder&); +}; + +inline VCDiffFileBasedCoder::VCDiffFileBasedCoder() + : input_file_type_(""), + output_file_type_(""), + input_file_(NULL), + output_file_(NULL) { } + +VCDiffFileBasedCoder::~VCDiffFileBasedCoder() { + if (input_file_ && (input_file_ != stdin)) { + fclose(input_file_); + input_file_ = NULL; + } + if (output_file_ && (output_file_ != stdout)) { + fclose(output_file_); + output_file_ = NULL; + } +} + +bool VCDiffFileBasedCoder::FileSize(FILE* file, size_t* file_size) { + long initial_position = ftell(file); + if (fseek(file, 0, SEEK_END) != 0) { + return false; + } + *file_size = static_cast<size_t>(ftell(file)); + if (fseek(file, initial_position, SEEK_SET) != 0) { + return false; + } + return true; +} + +bool VCDiffFileBasedCoder::OpenDictionary() { + assert(dictionary_.empty()); + assert(!FLAGS_dictionary.empty()); + FILE* dictionary_file = fopen(FLAGS_dictionary.c_str(), "rb"); + if (!dictionary_file) { + LOG(ERROR) << "Error opening dictionary file '" << FLAGS_dictionary + << "': " << strerror(errno) << LOG_ENDL; + return false; + } + size_t dictionary_size = 0U; + if (!FileSize(dictionary_file, &dictionary_size)) { + LOG(ERROR) << "Error finding size of dictionary file '" << FLAGS_dictionary + << "': " << strerror(errno) << LOG_ENDL; + return false; + } + dictionary_.resize(dictionary_size); + if (fread(&dictionary_[0], 1, dictionary_size, dictionary_file) + != dictionary_size) { + LOG(ERROR) << "Unable to read dictionary file '" << FLAGS_dictionary + << "': " << strerror(errno) << LOG_ENDL; + fclose(dictionary_file); + dictionary_.clear(); + return false; + } + fclose(dictionary_file); + return true; +} + +bool VCDiffFileBasedCoder::OpenFileForReading(const string& file_name, + const char* file_type, + FILE** file, + std::vector<char>* buffer) { + assert(buffer->empty()); + size_t buffer_size = 0U; + if (!*file && file_name.empty()) { + *file = stdin; + buffer_size = static_cast<size_t>(FLAGS_buffersize); + } else { + if (!*file) { + *file = fopen(file_name.c_str(), "rb"); + if (!*file) { + LOG(ERROR) << "Error opening " << file_type << " file '" + << file_name << "': " << strerror(errno) << LOG_ENDL; + return false; + } + } + size_t file_size = 0U; + if (!FileSize(*file, &file_size)) { + LOG(ERROR) << "Error finding size of " << file_type << " file '" + << file_name << "': " << strerror(errno) << LOG_ENDL; + return false; + } + buffer_size = kMaxBufferSize; + if (FLAGS_buffersize > buffer_size) { + buffer_size = static_cast<size_t>(FLAGS_buffersize); + } + if (file_size < buffer_size) { + // Allocate just enough memory to store the entire file + buffer_size = file_size; + } + } + buffer->resize(buffer_size); + return true; +} + +// Opens the output file for streamed read operations using the +// standard C I/O library, i.e., fopen(), fwrite(), fclose(). +// No output buffer is allocated because the encoded/decoded output +// is constructed progressively using a std::string object +// whose buffer is resized as needed. +bool VCDiffFileBasedCoder::OpenOutputFile() { + if (output_file_name_.empty()) { + output_file_ = stdout; + } else { + output_file_ = fopen(output_file_name_.c_str(), "wb"); + if (!output_file_) { + LOG(ERROR) << "Error opening " << output_file_type_ << " file '" + << output_file_name_ + << "': " << strerror(errno) << LOG_ENDL; + return false; + } + } + return true; +} + +bool VCDiffFileBasedCoder::ReadInput(size_t* bytes_read) { + // Read from file or stdin + *bytes_read = fread(&input_buffer_[0], 1, input_buffer_.size(), input_file_); + if (ferror(input_file_)) { + LOG(ERROR) << "Error reading from " << input_file_type_ << " file '" + << input_file_name_ + << "': " << strerror(errno) << LOG_ENDL; + return false; + } + return true; +} + +bool VCDiffFileBasedCoder::WriteOutput(const string& output) { + if (!output.empty()) { + // Some new output has been generated and is ready to be written + // to the output file or to stdout. + fwrite(output.data(), 1, output.size(), output_file_); + if (ferror(output_file_)) { + LOG(ERROR) << "Error writing " << output.size() << " bytes to " + << output_file_type_ << " file '" << output_file_name_ + << "': " << strerror(errno) << LOG_ENDL; + return false; + } + } + return true; +} + +bool VCDiffFileBasedCoder::CompareOutput(const string& output) { + if (!output.empty()) { + size_t output_size = output.size(); + // Some new output has been generated and is ready to be compared against + // the output file. + if (output_size > compare_buffer_.size()) { + compare_buffer_.resize(output_size); + } + size_t bytes_read = fread(&compare_buffer_[0], + 1, + output_size, + output_file_); + if (ferror(output_file_)) { + LOG(ERROR) << "Error reading from " << output_file_type_ << " file '" + << output_file_name_ << "': " << strerror(errno) << LOG_ENDL; + return false; + } + if (bytes_read < output_size) { + LOG(ERROR) << "Decoded target is longer than original target file" + << LOG_ENDL; + return false; + } + if (output.compare(0, output_size, &compare_buffer_[0], bytes_read) != 0) { + LOG(ERROR) << "Original target file does not match decoded target" + << LOG_ENDL; + return false; + } + } + return true; +} + +bool VCDiffFileBasedCoder::Encode() { + input_file_type_ = "target"; + input_file_name_ = FLAGS_target; + output_file_type_ = "delta"; + output_file_name_ = FLAGS_delta; + if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFile()) { + return false; + } + hashed_dictionary_.reset( + new open_vcdiff::HashedDictionary(&dictionary_[0], dictionary_.size())); + if (!hashed_dictionary_->Init()) { + LOG(ERROR) << "Error initializing hashed dictionary" << LOG_ENDL; + return false; + } + VCDiffFormatExtensionFlags format_flags = open_vcdiff::VCD_STANDARD_FORMAT; + if (FLAGS_interleaved) { + format_flags |= open_vcdiff::VCD_FORMAT_INTERLEAVED; + } + if (FLAGS_checksum) { + format_flags |= open_vcdiff::VCD_FORMAT_CHECKSUM; + } + open_vcdiff::VCDiffStreamingEncoder encoder(hashed_dictionary_.get(), + format_flags, + FLAGS_target_matches); + string output; + size_t input_size = 0; + size_t output_size = 0; + { + if (!encoder.StartEncoding(&output)) { + LOG(ERROR) << "Error during encoder initialization" << LOG_ENDL; + return false; + } + } + do { + size_t bytes_read = 0; + if (!WriteOutput(output) || !ReadInput(&bytes_read)) { + return false; + } + output_size += output.size(); + output.clear(); + if (bytes_read > 0) { + input_size += bytes_read; + if (!encoder.EncodeChunk(&input_buffer_[0], bytes_read, &output)) { + LOG(ERROR) << "Error trying to encode data chunk of length " + << bytes_read << LOG_ENDL; + return false; + } + } + } while (!feof(input_file_)); + encoder.FinishEncoding(&output); + if (!WriteOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + if (FLAGS_stats && (input_size > 0)) { + LOG(INFO) << "Original size: " << input_size + << "\tCompressed size: " << output_size << " (" + << ((static_cast<double>(output_size) / input_size) * 100) + << "% of original)" << LOG_ENDL; + } + return true; +} + +bool VCDiffFileBasedCoder::Decode() { + input_file_type_ = "delta"; + input_file_name_ = FLAGS_delta; + output_file_type_ = "target"; + output_file_name_ = FLAGS_target; + if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFile()) { + return false; + } + + open_vcdiff::VCDiffStreamingDecoder decoder; + string output; + size_t input_size = 0; + size_t output_size = 0; + decoder.StartDecoding(&dictionary_[0], dictionary_.size()); + + do { + size_t bytes_read = 0; + if (!ReadInput(&bytes_read)) { + return false; + } + if (bytes_read > 0) { + input_size += bytes_read; + if (!decoder.DecodeChunk(&input_buffer_[0], bytes_read, &output)) { + LOG(ERROR) << "Error trying to decode data chunk of length " + << bytes_read << LOG_ENDL; + return false; + } + } + if (!WriteOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + } while (!feof(input_file_)); + if (!decoder.FinishDecoding()) { + LOG(ERROR) << "Decode error; '" << FLAGS_delta + << " may not be a valid VCDIFF delta file" << LOG_ENDL; + return false; + } + if (!WriteOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + if (FLAGS_stats && (output_size > 0)) { + LOG(INFO) << "Decompressed size: " << output_size + << "\tCompressed size: " << input_size << " (" + << ((static_cast<double>(input_size) / output_size) * 100) + << "% of original)" << LOG_ENDL; + } + return true; +} + +bool VCDiffFileBasedCoder::DecodeAndCompare() { + input_file_type_ = "delta"; + input_file_name_ = FLAGS_delta; + output_file_type_ = "target"; + output_file_name_ = FLAGS_target; + if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFileForCompare()) { + return false; + } + + open_vcdiff::VCDiffStreamingDecoder decoder; + string output; + size_t input_size = 0; + size_t output_size = 0; + decoder.StartDecoding(&dictionary_[0], dictionary_.size()); + + do { + size_t bytes_read = 0; + if (!ReadInput(&bytes_read)) { + return false; + } + if (bytes_read > 0) { + input_size += bytes_read; + if (!decoder.DecodeChunk(&input_buffer_[0], bytes_read, &output)) { + LOG(ERROR) << "Error trying to decode data chunk of length " + << bytes_read << LOG_ENDL; + return false; + } + } + if (!CompareOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + } while (!feof(input_file_)); + if (!decoder.FinishDecoding()) { + LOG(ERROR) << "Decode error; '" << FLAGS_delta + << " may not be a valid VCDIFF delta file" << LOG_ENDL; + return false; + } + if (!CompareOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + if (fgetc(output_file_) != EOF) { + LOG(ERROR) << "Decoded target is shorter than original target file" + << LOG_ENDL; + return false; + } + if (ferror(output_file_)) { + LOG(ERROR) << "Error reading end-of-file indicator from target file" + << LOG_ENDL; + return false; + } + if (FLAGS_stats && (output_size > 0)) { + LOG(INFO) << "Decompressed size: " << output_size + << "\tCompressed size: " << input_size << " (" + << ((static_cast<double>(input_size) / output_size) * 100) + << "% of original)" << LOG_ENDL; + } + return true; +} + +} // namespace open_vcdiff + +int main(int argc, char** argv) { + const char* const command_name = argv[0]; + google::SetUsageMessage(kUsageString); + google::ParseCommandLineFlags(&argc, &argv, true); + if (argc != 2) { + LOG(ERROR) << command_name << ": Must specify exactly one command option" + << LOG_ENDL; + ShowUsageWithFlagsRestrict(command_name, "vcdiff"); + return 1; + } + const char* const command_option = argv[1]; + if (FLAGS_dictionary.empty()) { + LOG(ERROR) << command_name << " " << command_option + << ": Must specify --dictionary <file-name>" << LOG_ENDL; + ShowUsageWithFlagsRestrict(command_name, "vcdiff"); + return 1; + } + if (!GetCommandLineFlagInfoOrDie("buffersize").is_default && + (FLAGS_buffersize == 0)) { + LOG(ERROR) << command_name << ": Option --buffersize cannot be 0" + << LOG_ENDL; + ShowUsageWithFlagsRestrict(command_name, "vcdiff"); + return 1; + } + if ((strcmp(command_option, "encode") == 0) || + (strcmp(command_option, "delta") == 0)) { + open_vcdiff::VCDiffFileBasedCoder coder; + if (!coder.Encode()) { + return 1; + } + // The destructor for VCDiffFileBasedCoder will clean up the open files + // and allocated memory. + } else if ((strcmp(command_option, "decode") == 0) || + (strcmp(command_option, "patch") == 0)) { + open_vcdiff::VCDiffFileBasedCoder coder; + if (!coder.Decode()) { + return 1; + } + } else if ((strcmp(command_option, "test") == 0)) { + // "vcdiff test" does not appear in the usage string, but can be + // used for debugging. It encodes, then decodes, then compares the result + // with the original target. It expects the same arguments as + // "vcdiff encode", with the additional requirement that the --target + // and --delta file arguments must be specified, rather than using stdin + // or stdout. It produces a delta file just as for "vcdiff encode". + if (FLAGS_target.empty() || FLAGS_delta.empty()) { + LOG(ERROR) << command_name + << " test: Must specify both --target <file-name>" + " and --delta <file-name>" << LOG_ENDL; + return 1; + } + const string original_target(FLAGS_target); + // Put coder into a separate scope. + { + open_vcdiff::VCDiffFileBasedCoder coder; + if (!coder.Encode()) { + return 1; + } + } + { + open_vcdiff::VCDiffFileBasedCoder coder; + if (!coder.DecodeAndCompare()) { + return 1; + } + } + } else { + LOG(ERROR) << command_name << ": Unrecognized command option " + << command_option << LOG_ENDL; + ShowUsageWithFlagsRestrict(command_name, "vcdiff"); + return 1; + } + return 0; +} diff --git a/src/vcdiffengine.cc b/src/vcdiffengine.cc new file mode 100644 index 0000000..1254e46 --- /dev/null +++ b/src/vcdiffengine.cc @@ -0,0 +1,252 @@ +// Copyright 2006, 2008 Google Inc. +// Authors: Chandra Chereddi, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "vcdiffengine.h" +#include <stdint.h> // uint32_t +#include "blockhash.h" +#include "encodetable.h" +#include "logging.h" +#include "rolling_hash.h" + +namespace open_vcdiff { + +VCDiffEngine::VCDiffEngine(const char* dictionary, size_t dictionary_size) + // If dictionary_size == 0, then dictionary could be NULL. Guard against + // using a NULL value. + : dictionary_((dictionary_size > 0) ? new char[dictionary_size] : ""), + dictionary_size_(dictionary_size), + hashed_dictionary_(NULL) { + if (dictionary_size > 0) { + memcpy(const_cast<char*>(dictionary_), dictionary, dictionary_size); + } +} + +VCDiffEngine::~VCDiffEngine() { + delete hashed_dictionary_; + if (dictionary_size_ > 0) { + delete[] dictionary_; + } +} + +bool VCDiffEngine::Init() { + if (hashed_dictionary_) { + LOG(DFATAL) << "Init() called twice for same VCDiffEngine object" + << LOG_ENDL; + return false; + } + hashed_dictionary_ = BlockHash::CreateDictionaryHash(dictionary_, + dictionary_size()); + if (!hashed_dictionary_) { + LOG(DFATAL) << "Creation of dictionary hash failed" << LOG_ENDL; + return false; + } + if (!RollingHash<BlockHash::kBlockSize>::Init()) { + LOG(DFATAL) << "RollingHash initialization failed" << LOG_ENDL; + return false; + } + return true; +} + +// This helper function tries to find an appropriate match within +// hashed_dictionary_ for the block starting at the current target position. +// If target_hash is not NULL, this function will also look for a match +// within the previously encoded target data. +// +// If a match is found, this function will generate an ADD instruction +// for all unencoded data that precedes the match, +// and a COPY instruction for the match itself; then it returns +// the number of bytes processed by both instructions, +// which is guaranteed to be > 0. +// If no appropriate match is found, the function returns 0. +// +// The first four parameters are input parameters which are passed +// directly to BlockHash::FindBestMatch; please see that function +// for a description of their allowable values. +template<bool look_for_target_matches> +inline size_t VCDiffEngine::EncodeCopyForBestMatch( + uint32_t hash_value, + const char* target_candidate_start, + const char* unencoded_target_start, + size_t unencoded_target_size, + const BlockHash* target_hash, + VCDiffCodeTableWriter* coder) const { + // When FindBestMatch() comes up with a match for a candidate block, + // it will populate best_match with the size, source offset, + // and target offset of the match. + BlockHash::Match best_match; + + // First look for a match in the dictionary. + hashed_dictionary_->FindBestMatch(hash_value, + target_candidate_start, + unencoded_target_start, + unencoded_target_size, + &best_match); + // If target matching is enabled, then see if there is a better match + // within the target data that has been encoded so far. + if (look_for_target_matches) { + target_hash->FindBestMatch(hash_value, + target_candidate_start, + unencoded_target_start, + unencoded_target_size, + &best_match); + } + if (!ShouldGenerateCopyInstructionForMatchOfSize(best_match.size())) { + return 0; + } + if (best_match.target_offset() > 0) { + // Create an ADD instruction to encode all target bytes + // from the end of the last COPY match, if any, up to + // the beginning of this COPY match. + coder->Add(unencoded_target_start, best_match.target_offset()); + } + coder->Copy(best_match.source_offset(), best_match.size()); + return best_match.target_offset() // ADD size + + best_match.size(); // + COPY size +} + +// Once the encoder loop has finished checking for matches in the target data, +// this function creates an ADD instruction to encode all target bytes +// from the end of the last COPY match, if any, through the end of +// the target data. In the worst case, if no matches were found at all, +// this function will create one big ADD instruction +// for the entire buffer of target data. +inline void VCDiffEngine::AddUnmatchedRemainder( + const char* unencoded_target_start, + size_t unencoded_target_size, + VCDiffCodeTableWriter* coder) const { + if (unencoded_target_size > 0) { + coder->Add(unencoded_target_start, unencoded_target_size); + } +} + +// This helper function tells the coder to finish the encoding and write +// the results into the output string "diff". +inline void VCDiffEngine::FinishEncoding(size_t target_size, + OutputStringInterface* diff, + VCDiffCodeTableWriter* coder) const { + if (target_size != static_cast<size_t>(coder->target_length())) { + LOG(DFATAL) << "Internal error in VCDiffEngine::Encode: " + "original target size (" << target_size + << ") does not match number of bytes processed (" + << coder->target_length() << ")" << LOG_ENDL; + } + coder->Output(diff); +} + +template<bool look_for_target_matches> +void VCDiffEngine::EncodeInternal(const char* target_data, + size_t target_size, + OutputStringInterface* diff, + VCDiffCodeTableWriter* coder) const { + if (!hashed_dictionary_) { + LOG(DFATAL) << "Internal error: VCDiffEngine::Encode() " + "called before VCDiffEngine::Init()" << LOG_ENDL; + return; + } + if (target_size == 0) { + return; // Do nothing for empty target + } + if (!coder->Init(dictionary_size())) { + LOG(DFATAL) << "Internal error: " + "Initialization of VCDiffCodeTableWriter failed" << LOG_ENDL; + return; + } + // Special case for really small input + if (target_size < static_cast<size_t>(BlockHash::kBlockSize)) { + AddUnmatchedRemainder(target_data, target_size, coder); + FinishEncoding(target_size, diff, coder); + return; + } + RollingHash<BlockHash::kBlockSize> hasher; + BlockHash* target_hash = NULL; + if (look_for_target_matches) { + // Check matches against previously encoded target data + // in this same target window, as well as against the dictionary + target_hash = BlockHash::CreateTargetHash(target_data, + target_size, + dictionary_size()); + if (!target_hash) { + LOG(DFATAL) << "Instantiation of target hash failed" << LOG_ENDL; + return; + } + } + const char* const target_end = target_data + target_size; + const char* const start_of_last_block = target_end - BlockHash::kBlockSize; + // Offset of next bytes in string to ADD if NOT copied (i.e., not found in + // dictionary) + const char* next_encode = target_data; + // candidate_pos points to the start of the kBlockSize-byte block that may + // begin a match with the dictionary or previously encoded target data. + const char* candidate_pos = target_data; + uint32_t hash_value = hasher.Hash(candidate_pos); + while (1) { + const size_t bytes_encoded = + EncodeCopyForBestMatch<look_for_target_matches>( + hash_value, + candidate_pos, + next_encode, + (target_end - next_encode), + target_hash, + coder); + if (bytes_encoded > 0) { + next_encode += bytes_encoded; // Advance past COPYed data + candidate_pos = next_encode; + if (candidate_pos > start_of_last_block) { + break; // Reached end of target data + } + // candidate_pos has jumped ahead by bytes_encoded bytes, so UpdateHash + // can't be used to calculate the hash value at its new position. + hash_value = hasher.Hash(candidate_pos); + if (look_for_target_matches) { + // Update the target hash for the ADDed and COPYed data + target_hash->AddAllBlocksThroughIndex( + static_cast<int>(next_encode - target_data)); + } + } else { + // No match, or match is too small to be worth a COPY instruction. + // Move to the next position in the target data. + if ((candidate_pos + 1) > start_of_last_block) { + break; // Reached end of target data + } + if (look_for_target_matches) { + target_hash->AddOneIndexHash( + static_cast<int>(candidate_pos - target_data), + hash_value); + } + hash_value = hasher.UpdateHash(hash_value, + candidate_pos[0], + candidate_pos[BlockHash::kBlockSize]); + ++candidate_pos; + } + } + AddUnmatchedRemainder(next_encode, target_end - next_encode, coder); + FinishEncoding(target_size, diff, coder); + delete target_hash; +} + +void VCDiffEngine::Encode(const char* target_data, + size_t target_size, + bool look_for_target_matches, + OutputStringInterface* diff, + VCDiffCodeTableWriter* coder) const { + if (look_for_target_matches) { + EncodeInternal<true>(target_data, target_size, diff, coder); + } else { + EncodeInternal<false>(target_data, target_size, diff, coder); + } +} + +} // namespace open_vcdiff diff --git a/src/vcdiffengine.h b/src/vcdiffengine.h new file mode 100644 index 0000000..96ca334 --- /dev/null +++ b/src/vcdiffengine.h @@ -0,0 +1,128 @@ +// Copyright 2006 Google Inc. +// Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_VCDIFFENGINE_H_ +#define OPEN_VCDIFF_VCDIFFENGINE_H_ + +#include <config.h> +#include <stdint.h> // uint32_t +#include <cstddef> // size_t + +namespace open_vcdiff { + +class BlockHash; +class OutputStringInterface; +class VCDiffCodeTableWriter; + +// The VCDiffEngine class is used to find the optimal encoding (in terms of +// COPY and ADD instructions) for a given dictionary and target window. +// To write the instructions for this encoding, it calls the Copy() +// and Add() methods of the VCDiffCodeTableWriter object which is passed +// as an argument to Encode(). +// +class VCDiffEngine { + public: + VCDiffEngine(const char* dictionary, size_t dictionary_size); + + ~VCDiffEngine(); + + // Initializes the object before use. + // This method must be called after constructing a VCDiffEngine object, + // and before any other method may be called. It should not be called + // twice on the same object. + // Returns true if initialization succeeded, or false if an error occurred, + // in which case no other method except the destructor may then be used + // on the object. + // The Init() method is the only one allowed to treat hashed_dictionary_ + // as non-const. + bool Init(); + + size_t dictionary_size() const { return dictionary_size_; } + + // Main worker function. Finds the best matches between the dictionary + // (source) and target data, and uses the coder to write a + // delta file window into *diff. + // Because it is a const function, many threads + // can call Encode() at once for the same VCDiffEngine object. + // All thread-specific data will be stored in the coder and diff arguments. + // + // look_for_target_matches determines whether to look for matches + // within the previously encoded target data, or just within the source + // (dictionary) data. Please see vcencoder.h for a full explanation + // of this parameter. + void Encode(const char* target_data, + size_t target_size, + bool look_for_target_matches, + OutputStringInterface* diff, + VCDiffCodeTableWriter* coder) const; + + private: + // The minimum size of a string match that is worth putting into + // a COPY instruction. This value is taken from the RFC, which states: + // "The choice of the minimum size 4 for COPY instructions in the default + // code table was made from experiments that showed that excluding small + // matches (less than 4 bytes long) improved the compression rates." + static const size_t kMinimumMatchSize = 4; + + static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) { + return size >= kMinimumMatchSize; + } + + // The following two functions use templates to produce two different + // versions of the code depending on the value of the option + // look_for_target_matches. This approach saves a test-and-branch instruction + // within the inner loop of EncodeCopyForBestMatch. + template<bool look_for_target_matches> + void EncodeInternal(const char* target_data, + size_t target_size, + OutputStringInterface* diff, + VCDiffCodeTableWriter* coder) const; + + // If look_for_target_matches is true, then target_hash must point to a valid + // BlockHash object, and cannot be NULL. If look_for_target_matches is + // false, then the value of target_hash is ignored. + template<bool look_for_target_matches> + size_t EncodeCopyForBestMatch(uint32_t hash_value, + const char* target_candidate_start, + const char* unencoded_target_start, + size_t unencoded_target_size, + const BlockHash* target_hash, + VCDiffCodeTableWriter* coder) const; + + void AddUnmatchedRemainder(const char* unencoded_target_start, + size_t unencoded_target_size, + VCDiffCodeTableWriter* coder) const; + + void FinishEncoding(size_t target_size, + OutputStringInterface* diff, + VCDiffCodeTableWriter* coder) const; + + const char* dictionary_; // A copy of the dictionary contents + + const size_t dictionary_size_; + + // A hash that contains one element for every kBlockSize bytes of dictionary_. + // This can be reused to encode many different target strings using the + // same dictionary, without the need to compute the hash values each time. + const BlockHash* hashed_dictionary_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffEngine(const VCDiffEngine&); + void operator=(const VCDiffEngine&); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_VCDIFFENGINE_H_ diff --git a/src/vcdiffengine_test.cc b/src/vcdiffengine_test.cc new file mode 100644 index 0000000..a115adc --- /dev/null +++ b/src/vcdiffengine_test.cc @@ -0,0 +1,1002 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "vcdiffengine.h" +#include <algorithm> +#include <string> +#include <vector> +#include "addrcache.h" +#include "blockhash.h" +#include "encodetable.h" +#include "google/output_string.h" +#include "rolling_hash.h" +#include "testing.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" + +namespace open_vcdiff { + +namespace { + +using std::string; + +class VCDiffEngineTestBase : public testing::Test { + protected: + // Some common definitions and helper functions used in the various tests + // for VCDiffEngine. + static const int kBlockSize = BlockHash::kBlockSize; + + VCDiffEngineTestBase() : interleaved_(false), + diff_output_string_(&diff_), + verify_position_(0), + saved_total_size_position_(0), + saved_delta_encoding_position_(0), + saved_section_sizes_position_(0), + data_bytes_(0), + instruction_bytes_(0), + address_bytes_(0) { } + + virtual ~VCDiffEngineTestBase() { } + + virtual void TearDown() { + VerifyMatchCounts(); + } + + // Copy string_without_spaces into newly allocated result buffer, + // but pad its contents with space characters so that every character + // in string_without_spaces corresponds to (block_size - 1) + // spaces in the result, followed by that character. + // For example: + // If string_without_spaces begins "The only thing"... and block_size is 4, + // then 3 space characters will be inserted + // between each letter in the result, as follows: + // " T h e o n l y t h i n g"... + // This makes testing simpler, because finding a block_size-byte match + // between the dictionary and target only depends on the + // trailing letter in each block. + // If no_initial_padding is true, then the first letter will not have + // spaces added in front of it. + static void MakeEachLetterABlock(const char* string_without_spaces, + const char** result, + int block_size, + bool no_initial_padding) { + const size_t length_without_spaces = strlen(string_without_spaces); + char* padded_text = new char[(block_size * length_without_spaces) + 1]; + memset(padded_text, ' ', block_size * length_without_spaces); + char* padded_text_ptr = padded_text; + if (!no_initial_padding) { + padded_text_ptr += block_size - 1; + } + for (size_t i = 0; i < length_without_spaces; ++i) { + *padded_text_ptr = string_without_spaces[i]; + padded_text_ptr += block_size; + } + *(padded_text_ptr - block_size + 1) = '\0'; + *result = padded_text; + } + + // These functions iterate through the decoded output and expect + // simple elements: bytes or variable-length integers. + void ExpectByte(char byte) { + EXPECT_GT(diff_.size(), verify_position_); + EXPECT_EQ(byte, diff_[verify_position_]); + ++verify_position_; + } + + size_t ExpectVarint(int32_t expected_value) { + EXPECT_GT(diff_.size(), verify_position_); + const char* const original_position = &diff_[verify_position_]; + const char* new_position = original_position; + const size_t expected_length = VarintBE<int32_t>::Length(expected_value); + int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), + &new_position); + EXPECT_LE(0, parsed_value); + size_t parsed_length = new_position - original_position; + EXPECT_EQ(expected_value, parsed_value); + EXPECT_EQ(expected_length, parsed_length); + verify_position_ += parsed_length; + return parsed_length; + } + + size_t ExpectSize(size_t size) { + return ExpectVarint(static_cast<int32_t>(size)); + } + + size_t ExpectStringLength(const char* s) { + return ExpectSize(strlen(s)); + } + + void SkipVarint() { + EXPECT_GT(diff_.size(), verify_position_); + const char* const original_position = &diff_[verify_position_]; + const char* new_position = original_position; + VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position); + size_t parsed_length = new_position - original_position; + verify_position_ += parsed_length; + } + + void ExpectDataByte(char byte) { + ExpectByte(byte); + if (interleaved_) { + ++instruction_bytes_; + } else { + ++data_bytes_; + } + } + + void ExpectDataString(const char *expected_string) { + const char* ptr = expected_string; + while (*ptr) { + ExpectDataByte(*ptr); + ++ptr; + } + } + + void ExpectDataStringWithBlockSpacing(const char *expected_string, + bool trailing_spaces) { + const char* ptr = expected_string; + while (*ptr) { + for (int i = 0; i < (kBlockSize - 1); ++i) { + ExpectDataByte(' '); + } + ExpectDataByte(*ptr); + ++ptr; + } + if (trailing_spaces) { + for (int i = 0; i < (kBlockSize - 1); ++i) { + ExpectDataByte(' '); + } + } + } + + void ExpectInstructionByte(char byte) { + ExpectByte(byte); + ++instruction_bytes_; + } + + void ExpectInstructionVarint(int32_t value) { + instruction_bytes_ += ExpectVarint(value); + } + + void ExpectAddressByte(char byte) { + ExpectByte(byte); + if (interleaved_) { + ++instruction_bytes_; + } else { + ++address_bytes_; + } + } + + void ExpectAddressVarint(int32_t value) { + if (interleaved_) { + instruction_bytes_ += ExpectVarint(value); + } else { + address_bytes_ += ExpectVarint(value); + } + } + + // The following functions leverage the fact that the encoder uses + // the default code table and cache sizes. They are able to search for + // instructions of a particular size. The logic for mapping from + // instruction type, mode, and size to opcode value is very different here + // from the logic used in encodetable.{h,cc}, so hopefully + // this version will help validate that the other is correct. + // This version uses conditional statements, while encodetable.h + // looks up values in a mapping table. + void ExpectAddress(int32_t address, int copy_mode) { + if ((copy_mode >= default_cache_.FirstSameMode()) && + (copy_mode <= default_cache_.LastMode())) { + ExpectAddressByte(address); + } else { + ExpectAddressVarint(address); + } + } + + void ExpectAddInstruction(int size) { + if (size <= 18) { + ExpectInstructionByte(0x01 + size); + } else { + ExpectInstructionByte(0x01); + ExpectInstructionVarint(size); + } + } + + void ExpectCopyInstruction(int size, int mode) { + if ((size >= 4) && (size <= 16)) { + ExpectInstructionByte(0x10 + (0x10 * mode) + size); + } else { + ExpectInstructionByte(0x13 + (0x10 * mode)); + ExpectInstructionVarint(size); + } + ExpectMatch(size); + } + + bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) { + if ((copy_mode < default_cache_.FirstSameMode()) && + (add_size <= 4) && + (copy_size >= 4) && + (copy_size <= 6)) { + ExpectInstructionByte(0x9C + + (0x0C * copy_mode) + + (0x03 * add_size) + + copy_size); + ExpectMatch(copy_size); + return true; + } else if ((copy_mode >= default_cache_.FirstSameMode()) && + (add_size <= 4) && + (copy_size == 4)) { + ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size); + ExpectMatch(copy_size); + return true; + } else { + ExpectAddInstruction(add_size); + return false; + } + } + + void ExpectAddInstructionForStringLength(const char* s) { + ExpectAddInstruction(static_cast<int>(strlen(s))); + } + + // Call this function before beginning to iterate through the diff string + // using the Expect... functions. + // text must be NULL-terminated. + void VerifyHeaderForDictionaryAndTargetText(const char* dictionary, + const char* target_text) { + ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectStringLength(dictionary); + ExpectByte(0x00); // Source segment position: start of dictionary + saved_total_size_position_ = verify_position_; + SkipVarint(); // Length of the delta encoding + saved_delta_encoding_position_ = verify_position_; + ExpectStringLength(target_text); + ExpectByte(0x00); // Delta_indicator (no compression) + saved_section_sizes_position_ = verify_position_; + SkipVarint(); // length of data for ADDs and RUNs + SkipVarint(); // length of instructions section + SkipVarint(); // length of addresses for COPYs + } + + // Call this function before beginning to iterating through the entire + // diff string using the Expect... functions. It makes sure that the + // size totals in the window header match the number of bytes that + // were parsed. + void VerifySizes() { + EXPECT_EQ(verify_position_, diff_.size()); + const size_t delta_encoding_size = verify_position_ - + saved_delta_encoding_position_; + verify_position_ = saved_total_size_position_; + ExpectSize(delta_encoding_size); + verify_position_ = saved_section_sizes_position_; + ExpectSize(data_bytes_); + ExpectSize(instruction_bytes_); + ExpectSize(address_bytes_); + } + + void ExpectMatch(size_t match_size) { + if (match_size >= expected_match_counts_.size()) { + // Be generous to avoid resizing again + expected_match_counts_.resize(match_size * 2, 0); + } + ++expected_match_counts_[match_size]; + } + + void VerifyMatchCounts() { + EXPECT_TRUE(std::equal(expected_match_counts_.begin(), + expected_match_counts_.end(), + actual_match_counts_.begin())); + } + + bool interleaved_; + string diff_; + OutputString<string> diff_output_string_; + size_t verify_position_; + size_t saved_total_size_position_; + size_t saved_delta_encoding_position_; + size_t saved_section_sizes_position_; + size_t data_bytes_; + size_t instruction_bytes_; + size_t address_bytes_; + VCDiffAddressCache default_cache_; // Used for finding mode values + std::vector<int> expected_match_counts_; + std::vector<int> actual_match_counts_; +}; + +class VCDiffEngineTest : public VCDiffEngineTestBase { + protected: + VCDiffEngineTest() : + engine_(dictionary_, strlen(dictionary_)) { + EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); + } + + virtual ~VCDiffEngineTest() { } + + + static void SetUpTestCase() { + MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_, + kBlockSize, false); + MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false); + } + + static void TearDownTestCase() { + delete[] dictionary_; + delete[] target_; + } + + void EncodeNothing(bool interleaved, bool target_matching) { + interleaved_ = interleaved; + VCDiffCodeTableWriter coder(interleaved); + engine_.Encode("", 0, target_matching, &diff_output_string_, &coder); + EXPECT_TRUE(diff_.empty()); + actual_match_counts_ = coder.match_counts(); + } + + // text must be NULL-terminated + void EncodeText(const char* text, bool interleaved, bool target_matching) { + interleaved_ = interleaved; + VCDiffCodeTableWriter coder(interleaved); + engine_.Encode(text, + strlen(text), + target_matching, + &diff_output_string_, + &coder); + actual_match_counts_ = coder.match_counts(); + } + + void Encode(bool interleaved, bool target_matching) { + EncodeText(target_, interleaved, target_matching); + VerifyHeader(); + } + + void VerifyHeader() { + VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); + } + + static const char dictionary_without_spaces_[]; + static const char target_without_spaces_[]; + + static const char* dictionary_; + static const char* target_; + + const VCDiffEngine engine_; +}; + +#ifdef GTEST_HAS_DEATH_TEST +typedef VCDiffEngineTest VCDiffEngineDeathTest; +#endif // GTEST_HAS_DEATH_TEST + +const char VCDiffEngineTest::dictionary_without_spaces_[] = + "The only thing we have to fear is fear itself"; + +const char VCDiffEngineTest::target_without_spaces_[] = + "What we hear is fearsome"; + +const char* VCDiffEngineTest::dictionary_ = NULL; +const char* VCDiffEngineTest::target_ = NULL; + +#ifdef GTEST_HAS_DEATH_TEST +TEST_F(VCDiffEngineDeathTest, InitCalledTwice) { + EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()), + "twice"); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST_F(VCDiffEngineTest, EngineEncodeNothing) { + EncodeNothing(/* interleaved = */ false, /* target matching = */ false); +} + +TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) { + EncodeNothing(/* interleaved = */ true, /* target matching = */ false); +} + +TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) { + EncodeNothing(/* interleaved = */ false, /* target matching = */ true); +} + +TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) { + EncodeNothing(/* interleaved = */ true, /* target matching = */ true); +} + +TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) { + const char* small_text = " "; + EncodeText(small_text, + /* interleaved = */ false, + /* target matching = */ false); + VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); + // Data for ADDs + ExpectDataString(small_text); + // Instructions and sizes + ExpectAddInstructionForStringLength(small_text); +} + +TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) { + const char* small_text = " "; + EncodeText(small_text, + /* interleaved = */ true, + /* target matching = */ false); + VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); + // Interleaved section + ExpectAddInstructionForStringLength(small_text); + ExpectDataString(small_text); +} + +TEST_F(VCDiffEngineTest, EngineEncodeSampleText) { + Encode(/* interleaved = */ false, /* target matching = */ false); + // Data for ADDs + ExpectDataStringWithBlockSpacing("W", false); + ExpectDataByte('t'); + ExpectDataByte('s'); + if (kBlockSize < 4) { + ExpectDataStringWithBlockSpacing("ome", false); + } else { + ExpectDataByte('m'); + } + // Instructions and sizes + if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, + VCD_SELF_MODE)) { + ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); + } + ExpectAddInstruction(1); + ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); + ExpectCopyInstruction(11 * kBlockSize, + default_cache_.FirstNearMode()); + if (kBlockSize < 4) { + // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1 + // are too small to be selected + ExpectAddInstruction((3 * kBlockSize) + 1); + } else { + if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { + ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); + } + if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { + ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); + } + } + // Addresses for COPY + ExpectAddressVarint(18 * kBlockSize); // "ha" + ExpectAddressVarint(14 * kBlockSize); // " we h" + ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" + if (kBlockSize >= 4) { + ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" + ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" + } + VerifySizes(); +} + +TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) { + Encode(/* interleaved = */ true, /* target matching = */ false); + // Interleaved section + if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, + VCD_SELF_MODE)) { + ExpectDataStringWithBlockSpacing("W", false); + ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); + } else { + ExpectDataStringWithBlockSpacing("W", false); + } + ExpectAddressVarint(18 * kBlockSize); // "ha" + ExpectAddInstruction(1); + ExpectDataByte('t'); + ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); + ExpectAddressVarint(14 * kBlockSize); // " we h" + ExpectCopyInstruction(11 * kBlockSize, + default_cache_.FirstNearMode()); + ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" + if (kBlockSize < 4) { + // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1 + // are too small to be selected + ExpectAddInstruction((3 * kBlockSize) + 1); + ExpectDataByte('s'); + ExpectDataStringWithBlockSpacing("ome", false); + } else { + if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { + ExpectDataByte('s'); + ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); + } else { + ExpectDataByte('s'); + } + ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" + if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { + ExpectDataByte('m'); + ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); + } else { + ExpectDataByte('m'); + } + ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" + } + VerifySizes(); +} + +TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) { + Encode(/* interleaved = */ false, /* target matching = */ true); + // Data for ADDs + ExpectDataStringWithBlockSpacing("W", false); + ExpectDataByte('t'); + ExpectDataByte('s'); + if (kBlockSize < 4) { + ExpectDataStringWithBlockSpacing("ome", false); + } else { + ExpectDataByte('m'); + } + // Instructions and sizes + if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, + VCD_SELF_MODE)) { + ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); + } + ExpectAddInstruction(1); + ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); + ExpectCopyInstruction(11 * kBlockSize, + default_cache_.FirstNearMode()); + if (kBlockSize < 4) { + // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1 + // are too small to be selected + ExpectAddInstruction((3 * kBlockSize) + 1); + } else { + if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { + ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); + } + if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { + ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); + } + } + // Addresses for COPY + ExpectAddressVarint(18 * kBlockSize); // "ha" + ExpectAddressVarint(14 * kBlockSize); // " we h" + ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" + if (kBlockSize >= 4) { + ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" + ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" + } + VerifySizes(); +} + +TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) { + Encode(/* interleaved = */ true, /* target matching = */ false); + // Interleaved section + if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, + VCD_SELF_MODE)) { + ExpectDataStringWithBlockSpacing("W", false); + ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); + } else { + ExpectDataStringWithBlockSpacing("W", false); + } + ExpectAddressVarint(18 * kBlockSize); // "ha" + ExpectAddInstruction(1); + ExpectDataByte('t'); + ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); + ExpectAddressVarint(14 * kBlockSize); // " we h" + ExpectCopyInstruction(11 * kBlockSize, + default_cache_.FirstNearMode()); + ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" + if (kBlockSize < 4) { + // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1 + // are too small to be selected + ExpectAddInstruction((3 * kBlockSize) + 1); + ExpectDataByte('s'); + ExpectDataStringWithBlockSpacing("ome", false); + } else { + if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { + ExpectDataByte('s'); + ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); + } else { + ExpectDataByte('s'); + } + ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" + if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { + ExpectDataByte('m'); + ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); + } else { + ExpectDataByte('m'); + } + ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" + } + VerifySizes(); +} + +// This test case takes a dictionary containing several instances of the string +// "weasel", and a target string which is identical to the dictionary +// except that all instances of "weasel" have been replaced with the string +// "moon-pie". It tests that COPY instructions are generated for all +// boilerplate text (that is, the text between the "moon-pie" instances in +// the target) and, if target matching is enabled, that each instance of +// "moon-pie" (except the first one) is encoded using a COPY instruction +// rather than an ADD. +class WeaselsToMoonpiesTest : public VCDiffEngineTestBase { + protected: + // kCompressibleTestBlockSize: + // The size of the block to create for each letter in the + // dictionary and search string for the "compressible text" test. + // See MakeEachLetterABlock, below. + // If we use kCompressibleTestBlockSize = kBlockSize, then the + // encoder will find one match per unique letter in the HTML text. + // There are too many examples of "<" in the text for the encoder + // to iterate through them all, and some matches are not found. + // If we use kCompressibleTextBlockSize = 1, then the boilerplate + // text between "weasel" strings in the dictionary and "moon-pie" + // strings in the target may not be long enough to be found by + // the encoder's block-hash algorithm. A good value, that will give + // reproducible results across all block sizes, will be somewhere + // in between these extremes. + static const int kCompressibleTestBlockSize = + (kBlockSize < 4) ? 1 : kBlockSize / 4; + static const int kTrailingSpaces = kCompressibleTestBlockSize - 1; + + WeaselsToMoonpiesTest() : + engine_(dictionary_, strlen(dictionary_)), + match_index_(0), + search_dictionary_(dictionary_, strlen(dictionary_)), + copied_moonpie_address_(0) { + EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); + weasel_positions_[0] = 0; + after_weasel_[0] = 0; + moonpie_positions_[0] = 0; + after_moonpie_[0] = 0; + } + + virtual ~WeaselsToMoonpiesTest() { } + + static void SetUpTestCase() { + MakeEachLetterABlock(dictionary_without_spaces_, + &dictionary_, + kCompressibleTestBlockSize, + false); + MakeEachLetterABlock(target_without_spaces_, + &target_, + kCompressibleTestBlockSize, + false); + MakeEachLetterABlock(weasel_text_without_spaces_, + &weasel_text_, + kCompressibleTestBlockSize, + true); + MakeEachLetterABlock(moonpie_text_without_spaces_, + &moonpie_text_, + kCompressibleTestBlockSize, + true); + } + + static void TearDownTestCase() { + delete[] dictionary_; + delete[] target_; + delete[] weasel_text_; + delete[] moonpie_text_; + } + + // text must be NULL-terminated + void EncodeText(const char* text, bool interleaved, bool target_matching) { + interleaved_ = interleaved; + VCDiffCodeTableWriter coder(interleaved); + engine_.Encode(text, + strlen(text), + target_matching, + &diff_output_string_, + &coder); + actual_match_counts_ = coder.match_counts(); + } + + void Encode(bool interleaved, bool target_matching) { + EncodeText(target_, interleaved, target_matching); + VerifyHeader(); + } + + void VerifyHeader() { + VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); + } + + void ExpectCopyForSize(size_t size, int mode) { + ExpectCopyInstruction(static_cast<int>(size), mode); + } + + void ExpectAddForSize(size_t size) { + ExpectAddInstruction(static_cast<int>(size)); + } + + void ExpectAddressVarintForSize(size_t value) { + ExpectAddressVarint(static_cast<int32_t>(value)); + } + + void FindNextMoonpie(bool include_trailing_spaces) { + ++match_index_; + SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_, + AfterLastWeasel())); + if (CurrentWeaselPosition() == string::npos) { + SetCurrentMoonpiePosition(string::npos); + } else { + SetCurrentAfterWeaselPosition(CurrentWeaselPosition() + + strlen(weasel_text_) + + (include_trailing_spaces ? + kTrailingSpaces : 0)); + SetCurrentMoonpiePosition(AfterLastMoonpie() + + CurrentBoilerplateLength()); + SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition() + + strlen(moonpie_text_) + + (include_trailing_spaces ? + kTrailingSpaces : 0)); + } + } + bool NoMoreMoonpies() const { + return CurrentMoonpiePosition() == string::npos; + } + size_t CurrentWeaselPosition() const { + return weasel_positions_[match_index_]; + } + size_t LastWeaselPosition() const { + return weasel_positions_[match_index_ - 1]; + } + size_t CurrentMoonpiePosition() const { + return moonpie_positions_[match_index_]; + } + size_t LastMoonpiePosition() const { + return moonpie_positions_[match_index_ - 1]; + } + size_t AfterLastWeasel() const { + CHECK_GE(match_index_, 1); + return after_weasel_[match_index_ - 1]; + } + size_t AfterPreviousWeasel() const { + CHECK_GE(match_index_, 2); + return after_weasel_[match_index_ - 2]; + } + size_t AfterLastMoonpie() const { + CHECK_GE(match_index_, 1); + return after_moonpie_[match_index_ - 1]; + } + size_t AfterPreviousMoonpie() const { + CHECK_GE(match_index_, 2); + return after_moonpie_[match_index_ - 2]; + } + + void SetCurrentWeaselPosition(size_t value) { + weasel_positions_[match_index_] = value; + } + void SetCurrentAfterWeaselPosition(size_t value) { + after_weasel_[match_index_] = value; + } + void SetCurrentMoonpiePosition(size_t value) { + moonpie_positions_[match_index_] = value; + } + void SetCurrentAfterMoonpiePosition(size_t value) { + after_moonpie_[match_index_] = value; + } + + // Find the length of the text in between the "weasel" strings in the + // compressible dictionary, which is the same as the text between + // the "moon-pie" strings in the compressible target. + size_t CurrentBoilerplateLength() const { + CHECK_GE(match_index_, 1); + return CurrentWeaselPosition() - AfterLastWeasel(); + } + size_t DistanceFromLastWeasel() const { + CHECK_GE(match_index_, 1); + return CurrentWeaselPosition() - LastWeaselPosition(); + } + size_t DistanceFromLastMoonpie() const { + CHECK_GE(match_index_, 1); + return CurrentMoonpiePosition() - LastMoonpiePosition(); + } + size_t DistanceBetweenLastTwoWeasels() const { + CHECK_GE(match_index_, 2); + return AfterLastWeasel() - AfterPreviousWeasel(); + } + size_t DistanceBetweenLastTwoMoonpies() const { + CHECK_GE(match_index_, 2); + return AfterLastMoonpie() - AfterPreviousMoonpie(); + } + + int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const; + int UpdateCopyModeForMoonpie(int copy_mode) const; + int32_t FindMoonpieAddressForCopyMode(int copy_mode) const; + + void CopyBoilerplateAndAddMoonpie(int copy_mode); + void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode); + + static const char dictionary_without_spaces_[]; + static const char target_without_spaces_[]; + static const char weasel_text_without_spaces_[]; + static const char moonpie_text_without_spaces_[]; + + static const char* dictionary_; + static const char* target_; + static const char* weasel_text_; + static const char* moonpie_text_; + + const VCDiffEngine engine_; + size_t weasel_positions_[128]; + size_t after_weasel_[128]; + size_t moonpie_positions_[128]; + size_t after_moonpie_[128]; + int match_index_; + string search_dictionary_; + size_t copied_moonpie_address_; +}; + +// Care is taken in the formulation of the dictionary +// to ensure that the surrounding letters do not match; for example, +// there are not two instances of the string "weasels". Otherwise, +// the matching behavior would not be as predictable. +const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] = + "<html>\n" + "<head>\n" + "<meta content=\"text/html; charset=ISO-8859-1\"\n" + "http-equiv=\"content-type\">\n" + "<title>All about weasels</title>\n" + "</head>\n" + "<!-- You will notice that the word \"weasel\" may be replaced" + " with something else -->\n" + "<body>\n" + "<h1>All about the weasel: highly compressible HTML text</h1>" + "<ul>\n" + "<li>Don\'t look a gift weasel in its mouth.</li>\n" + "<li>This item makes sure the next occurrence is found.</li>\n" + "<li>Don\'t count your weasel, before it\'s hatched.</li>\n" + "</ul>\n" + "<br>\n" + "</body>\n" + "</html>\n"; + +const char WeaselsToMoonpiesTest::target_without_spaces_[] = + "<html>\n" + "<head>\n" + "<meta content=\"text/html; charset=ISO-8859-1\"\n" + "http-equiv=\"content-type\">\n" + "<title>All about moon-pies</title>\n" + "</head>\n" + "<!-- You will notice that the word \"moon-pie\" may be replaced" + " with something else -->\n" + "<body>\n" + "<h1>All about the moon-pie: highly compressible HTML text</h1>" + "<ul>\n" + "<li>Don\'t look a gift moon-pie in its mouth.</li>\n" + "<li>This item makes sure the next occurrence is found.</li>\n" + "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n" + "</ul>\n" + "<br>\n" + "</body>\n" + "</html>\n"; + +const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel"; +const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie"; + +const char* WeaselsToMoonpiesTest::dictionary_ = NULL; +const char* WeaselsToMoonpiesTest::target_ = NULL; +const char* WeaselsToMoonpiesTest::weasel_text_ = NULL; +const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL; + +int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode( + int copy_mode) const { + size_t copy_address = 0; + if (copy_mode == VCD_SELF_MODE) { + copy_address = AfterLastWeasel(); + } else if ((copy_mode >= default_cache_.FirstNearMode()) && + (copy_mode < default_cache_.FirstSameMode())) { + copy_address = DistanceBetweenLastTwoWeasels(); + } else if ((copy_mode >= default_cache_.FirstSameMode()) && + (copy_mode <= default_cache_.LastMode())) { + copy_address = AfterLastWeasel() % 256; + } else { + LOG(FATAL) << "Unexpected copy mode " << copy_mode; + } + return static_cast<int32_t>(copy_address); +} + +int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const { + if (copy_mode == default_cache_.FirstSameMode()) { + return default_cache_.FirstSameMode() + + static_cast<int>((copied_moonpie_address_ / 256) % 3); + } else { + return copy_mode; + } +} + +int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode( + int copy_mode) const { + size_t copy_address = 0; + if (copy_mode == VCD_HERE_MODE) { + copy_address = DistanceFromLastMoonpie(); + } else if ((copy_mode >= default_cache_.FirstNearMode()) && + (copy_mode < default_cache_.FirstSameMode())) { + copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces; + } else if ((copy_mode >= default_cache_.FirstSameMode()) && + (copy_mode <= default_cache_.LastMode())) { + copy_address = copied_moonpie_address_ % 256; + } else { + LOG(FATAL) << "Unexpected copy mode " << copy_mode; + } + return static_cast<int32_t>(copy_address); +} + +// Expect one dictionary instance of "weasel" to be replaced with "moon-pie" +// in the encoding. +void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) { + EXPECT_FALSE(NoMoreMoonpies()); + ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); + ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); + ExpectAddInstructionForStringLength(moonpie_text_); + ExpectDataString(moonpie_text_); +} + +// Expect one dictionary instance of "weasel" to be replaced with "moon-pie" +// in the encoding. The "moon-pie" text will be copied from the previously +// encoded target. +void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie( + int copy_mode, + int moonpie_copy_mode) { + EXPECT_FALSE(NoMoreMoonpies()); + ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); + ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); + moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode); + ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode); + ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode), + moonpie_copy_mode); +} + +TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) { + Encode(/* interleaved = */ true, /* target matching = */ false); + FindNextMoonpie(false); + // Expect all five "weasel"s to be replaced with "moon-pie"s + CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); + FindNextMoonpie(false); + CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE); + FindNextMoonpie(false); + CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1); + FindNextMoonpie(false); + CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2); + FindNextMoonpie(false); + CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3); + FindNextMoonpie(false); + EXPECT_TRUE(NoMoreMoonpies()); + ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), + default_cache_.FirstNearMode()); + ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); + VerifySizes(); +} + +TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) { + Encode(/* interleaved = */ true, /* target matching = */ true); + // Expect all five "weasel"s to be replaced with "moon-pie"s. + // Every "moon-pie" after the first one should be copied from the + // previously encoded target text. + FindNextMoonpie(false); + CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); + FindNextMoonpie(true); + CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE); + if (kBlockSize <= 4) { + copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition(); + FindNextMoonpie(true); + CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, + default_cache_.FirstSameMode()); + } else { // kBlockSize > 4 + copied_moonpie_address_ = strlen(dictionary_) + CurrentMoonpiePosition(); + FindNextMoonpie(true); + CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, + default_cache_.FirstNearMode() + 2); + } + LOG(INFO) << "copied_moonpie_address_ : " + << copied_moonpie_address_ << LOG_ENDL; + FindNextMoonpie(true); + CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3, + default_cache_.FirstSameMode()); + FindNextMoonpie(true); + CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, + default_cache_.FirstSameMode()); + FindNextMoonpie(true); + EXPECT_TRUE(NoMoreMoonpies()); + ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), + default_cache_.FirstNearMode() + 3); + ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); + VerifySizes(); +} + +} // anonymous namespace +} // namespace open-vcdiff diff --git a/src/vcencoder.cc b/src/vcencoder.cc new file mode 100644 index 0000000..94c883d --- /dev/null +++ b/src/vcencoder.cc @@ -0,0 +1,224 @@ +// Copyright 2007 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Classes to implement an Encoder for the format described in +// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html +// +// The RFC describes the possibility of using a secondary compressor +// to further reduce the size of each section of the VCDIFF output. +// That feature is not supported in this implementation of the encoder +// and decoder. +// No secondary compressor types have been publicly registered with +// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids +// in the more than five years since the registry was created, so there +// is no standard set of compressor IDs which would be generated by other +// encoders or accepted by other decoders. + +#include <config.h> +#include "google/vcencoder.h" +#include <vector> +#include "checksum.h" +#include "encodetable.h" +#include "logging.h" +#include "google/output_string.h" +#include "vcdiffengine.h" + +namespace open_vcdiff { + +HashedDictionary::HashedDictionary(const char* dictionary_contents, + size_t dictionary_size) + : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { } + +HashedDictionary::~HashedDictionary() { delete engine_; } + +bool HashedDictionary::Init() { + return const_cast<VCDiffEngine*>(engine_)->Init(); +} + +class VCDiffStreamingEncoderImpl { + public: + VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary, + VCDiffFormatExtensionFlags format_extensions, + bool look_for_target_matches); + + // These functions are identical to their counterparts + // in VCDiffStreamingEncoder. + bool StartEncoding(OutputStringInterface* out); + + bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out); + + bool FinishEncoding(OutputStringInterface* out); + + const std::vector<int>& match_counts() const { + return coder_.match_counts(); + } + + private: + // Write the header (as defined in section 4.1 of the RFC) to *output. + // This includes information that can be gathered + // before the first chunk of input is available. + void WriteHeader(OutputStringInterface* output) const; + + const VCDiffEngine* engine_; + + // This implementation of the encoder uses the default + // code table. A VCDiffCodeTableWriter could also be constructed + // using a custom code table. + VCDiffCodeTableWriter coder_; + + const VCDiffFormatExtensionFlags format_extensions_; + + // Determines whether to look for matches within the previously encoded + // target data, or just within the source (dictionary) data. Please see + // vcencoder.h for a full explanation of this parameter. + const bool look_for_target_matches_; + + // This state variable is used to ensure that StartEncoding(), EncodeChunk(), + // and FinishEncoding() are called in the correct order. It will be true + // if StartEncoding() has been called, followed by zero or more calls to + // EncodeChunk(), but FinishEncoding() has not yet been called. It will + // be false initially, and also after FinishEncoding() has been called. + bool encode_chunk_allowed_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT + void operator=(const VCDiffStreamingEncoderImpl&); +}; + +inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl( + const HashedDictionary* dictionary, + VCDiffFormatExtensionFlags format_extensions, + bool look_for_target_matches) + : engine_(dictionary->engine()), + coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0), + format_extensions_(format_extensions), + look_for_target_matches_(look_for_target_matches), + encode_chunk_allowed_(false) { } + +inline void VCDiffStreamingEncoderImpl::WriteHeader( + OutputStringInterface* output) const { + DeltaFileHeader header_data = { + 0xD6, // Header1: "V" | 0x80 + 0xC3, // Header2: "C" | 0x80 + 0xC4, // Header3: "D" | 0x80 + 0x00, // Header4: Draft standard format + 0x00 }; // Hdr_Indicator: + // No compression, no custom code table + if (format_extensions_ != VCD_STANDARD_FORMAT) { + header_data.header4 = 'S'; // Header4: VCDIFF/SDCH, extensions used + } + output->append(reinterpret_cast<const char*>(&header_data), + sizeof(header_data)); + // If custom cache table sizes or a custom code table were used + // for encoding, here is where they would be appended to *output. + // This implementation of the encoder does not use those features, + // although the decoder can understand and interpret them. +} + +inline bool VCDiffStreamingEncoderImpl::StartEncoding( + OutputStringInterface* out) { + WriteHeader(out); + encode_chunk_allowed_ = true; + return true; +} + +inline bool VCDiffStreamingEncoderImpl::EncodeChunk( + const char* data, + size_t len, + OutputStringInterface* out) { + if (!encode_chunk_allowed_) { + LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL; + return false; + } + if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) { + coder_.AddChecksum(ComputeAdler32(data, len)); + } + engine_->Encode(data, len, look_for_target_matches_, out, &coder_); + return true; +} + +inline bool VCDiffStreamingEncoderImpl::FinishEncoding( + OutputStringInterface* /*out*/) { + if (!encode_chunk_allowed_) { + LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL; + return false; + } + encode_chunk_allowed_ = false; + // There should not be any need to output more data + // since EncodeChunk() encodes a complete target window + // and there is no end-of-delta-file marker. + return true; +} + +VCDiffStreamingEncoder::VCDiffStreamingEncoder( + const HashedDictionary* dictionary, + VCDiffFormatExtensionFlags format_extensions, + bool look_for_target_matches) + : impl_(new VCDiffStreamingEncoderImpl(dictionary, + format_extensions, + look_for_target_matches)) { } + +VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; } + +bool VCDiffStreamingEncoder::StartEncodingToInterface( + OutputStringInterface* out) { + return impl_->StartEncoding(out); +} + +bool VCDiffStreamingEncoder::EncodeChunkToInterface( + const char* data, + size_t len, + OutputStringInterface* out) { + return impl_->EncodeChunk(data, len, out); +} + +bool VCDiffStreamingEncoder::FinishEncodingToInterface( + OutputStringInterface* out) { + return impl_->FinishEncoding(out); +} + +void VCDiffStreamingEncoder::GetMatchCounts( + std::vector<int>* match_counts) const { + if (!match_counts) { + LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL; + return; + } + *match_counts = impl_->match_counts(); +} + +bool VCDiffEncoder::EncodeToInterface(const char* target_data, + size_t target_len, + OutputStringInterface* out) { + out->clear(); + if (!encoder_) { + if (!dictionary_.Init()) { + LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL; + return false; + } + encoder_ = new VCDiffStreamingEncoder(&dictionary_, + flags_, + look_for_target_matches_); + } + if (!encoder_->StartEncodingToInterface(out)) { + return false; + } + if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) { + return false; + } + return encoder_->FinishEncodingToInterface(out); +} + +} // namespace open_vcdiff diff --git a/src/vcencoder_test.cc b/src/vcencoder_test.cc new file mode 100644 index 0000000..500e799 --- /dev/null +++ b/src/vcencoder_test.cc @@ -0,0 +1,948 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <config.h> +#include "google/vcencoder.h" +#include <algorithm> +#include <cstdlib> // free, posix_memalign +#include <cstring> // memcpy +#include <string> +#include <vector> +#include "blockhash.h" +#include "checksum.h" +#include "logging.h" +#include "google/output_string.h" +#include "testing.h" +#include "varint_bigendian.h" +#include "google/vcdecoder.h" +#include "vcdiff_defs.h" + +#ifdef HAVE_EXT_ROPE +#include <ext/rope> +#include "output_string_crope.h" +using __gnu_cxx::crope; +#endif // HAVE_EXT_ROPE + +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif // HAVE_MALLOC_H + +#ifdef HAVE_SYS_MMAN_H +#define _XOPEN_SOURCE 600 // posix_memalign +#include <sys/mman.h> // mprotect +#endif // HAVE_SYS_MMAN_H + +#ifdef HAVE_UNISTD_H +#include <unistd.h> // getpagesize +#endif // HAVE_UNISTD_H + +namespace open_vcdiff { +namespace { + +using std::string; + +static const size_t kFileHeaderSize = sizeof(DeltaFileHeader); + +// This is to check the maximum possible encoding size +// if using a single ADD instruction, so assume that the +// dictionary size, the length of the ADD data, the size +// of the target window, and the length of the delta window +// are all two-byte Varints, that is, 128 <= length < 4096. +// This figure includes three extra bytes for a zero-sized +// ADD instruction with a two-byte Varint explicit size. +// Any additional COPY & ADD instructions must reduce +// the length of the encoding from this maximum. +static const size_t kWindowHeaderSize = 21; + +class VerifyEncodedBytesTest : public testing::Test { + public: + VerifyEncodedBytesTest() : delta_index_(0) { } + virtual ~VerifyEncodedBytesTest() { } + + void ExpectByte(unsigned char b) { + EXPECT_EQ(b, static_cast<unsigned char>(delta_[delta_index_])); + ++delta_index_; + } + + void ExpectString(const char* s) { + const size_t size = strlen(s); // don't include terminating NULL char + EXPECT_EQ(string(s, size), + string(delta_data() + delta_index_, size)); + delta_index_ += size; + } + + void ExpectNoMoreBytes() { + EXPECT_EQ(delta_index_, delta_size()); + } + + void ExpectSize(size_t size) { + const char* delta_size_pos = &delta_[delta_index_]; + EXPECT_EQ(size, + static_cast<size_t>( + VarintBE<int32_t>::Parse(delta_data() + delta_size(), + &delta_size_pos))); + delta_index_ = delta_size_pos - delta_data(); + } + + void ExpectChecksum(VCDChecksum checksum) { + const char* delta_checksum_pos = &delta_[delta_index_]; + EXPECT_EQ(checksum, + static_cast<VCDChecksum>( + VarintBE<int64_t>::Parse(delta_data() + delta_size(), + &delta_checksum_pos))); + delta_index_ = delta_checksum_pos - delta_data(); + } + + const string& delta_as_const() const { return delta_; } + string* delta() { return &delta_; } + + const char* delta_data() const { return delta_as_const().data(); } + size_t delta_size() const { return delta_as_const().size(); } + + private: + string delta_; + size_t delta_index_; +}; + +class VCDiffEncoderTest : public VerifyEncodedBytesTest { + protected: + static const char kDictionary[]; + static const char kTarget[]; + + VCDiffEncoderTest(); + virtual ~VCDiffEncoderTest() { } + + void TestWithFixedChunkSize(size_t chunk_size); + void TestWithEncodedChunkVector(size_t chunk_size); + + HashedDictionary hashed_dictionary_; + VCDiffStreamingEncoder encoder_; + VCDiffStreamingDecoder decoder_; + VCDiffEncoder simple_encoder_; + VCDiffDecoder simple_decoder_; + + string result_target_; +}; + +const char VCDiffEncoderTest::kDictionary[] = + "\"Just the place for a Snark!\" the Bellman cried,\n" + "As he landed his crew with care;\n" + "Supporting each man on the top of the tide\n" + "By a finger entwined in his hair.\n"; + +const char VCDiffEncoderTest::kTarget[] = + "\"Just the place for a Snark! I have said it twice:\n" + "That alone should encourage the crew.\n" + "Just the place for a Snark! I have said it thrice:\n" + "What I tell you three times is true.\"\n"; + +VCDiffEncoderTest::VCDiffEncoderTest() + : hashed_dictionary_(kDictionary, sizeof(kDictionary)), + encoder_(&hashed_dictionary_, + VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, + /* look_for_target_matches = */ true), + simple_encoder_(kDictionary, sizeof(kDictionary)) { + EXPECT_TRUE(hashed_dictionary_.Init()); +} + +TEST_F(VCDiffEncoderTest, EncodeBeforeStartEncoding) { + EXPECT_FALSE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); +} + +TEST_F(VCDiffEncoderTest, FinishBeforeStartEncoding) { + EXPECT_FALSE(encoder_.FinishEncoding(delta())); +} + +TEST_F(VCDiffEncoderTest, EncodeDecodeNothing) { + HashedDictionary nothing_dictionary("", 0); + EXPECT_TRUE(nothing_dictionary.Init()); + VCDiffStreamingEncoder nothing_encoder(¬hing_dictionary, + VCD_STANDARD_FORMAT, + false); + EXPECT_TRUE(nothing_encoder.StartEncoding(delta())); + EXPECT_TRUE(nothing_encoder.FinishEncoding(delta())); + decoder_.StartDecoding("", 0); + EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), + delta_size(), + &result_target_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_TRUE(result_target_.empty()); +} + +// A NULL dictionary pointer is legal as long as the dictionary size is 0. +TEST_F(VCDiffEncoderTest, EncodeDecodeNullDictionaryPtr) { + HashedDictionary null_dictionary(NULL, 0); + EXPECT_TRUE(null_dictionary.Init()); + VCDiffStreamingEncoder null_encoder(&null_dictionary, + VCD_STANDARD_FORMAT, + false); + EXPECT_TRUE(null_encoder.StartEncoding(delta())); + EXPECT_TRUE(null_encoder.EncodeChunk(kTarget, strlen(kTarget), delta())); + EXPECT_TRUE(null_encoder.FinishEncoding(delta())); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_size()); + decoder_.StartDecoding(NULL, 0); + EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), + delta_size(), + &result_target_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(kTarget, result_target_); +} + +TEST_F(VCDiffEncoderTest, EncodeDecodeSimple) { + EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_size()); + EXPECT_TRUE(simple_decoder_.Decode(kDictionary, + sizeof(kDictionary), + delta_as_const(), + &result_target_)); + EXPECT_EQ(kTarget, result_target_); +} + +TEST_F(VCDiffEncoderTest, EncodeDecodeInterleaved) { + simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED); + EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_size()); + EXPECT_TRUE(simple_decoder_.Decode(kDictionary, + sizeof(kDictionary), + delta_as_const(), + &result_target_)); + EXPECT_EQ(kTarget, result_target_); +} + +TEST_F(VCDiffEncoderTest, EncodeDecodeInterleavedChecksum) { + simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM); + EXPECT_TRUE(simple_encoder_.Encode(kTarget, + strlen(kTarget), + delta())); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_size()); + EXPECT_TRUE(simple_decoder_.Decode(kDictionary, + sizeof(kDictionary), + delta_as_const(), + &result_target_)); + EXPECT_EQ(kTarget, result_target_); +} + +TEST_F(VCDiffEncoderTest, EncodeDecodeSingleChunk) { + EXPECT_TRUE(encoder_.StartEncoding(delta())); + EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); + EXPECT_TRUE(encoder_.FinishEncoding(delta())); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_size()); + decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); + EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), + delta_size(), + &result_target_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(kTarget, result_target_); +} + +TEST_F(VCDiffEncoderTest, EncodeDecodeSeparate) { + string delta_start, delta_encode, delta_finish; + EXPECT_TRUE(encoder_.StartEncoding(&delta_start)); + EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_encode)); + EXPECT_TRUE(encoder_.FinishEncoding(&delta_finish)); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_start.size() + delta_encode.size() + delta_finish.size()); + decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); + EXPECT_TRUE(decoder_.DecodeChunk(delta_start.data(), + delta_start.size(), + &result_target_)); + EXPECT_TRUE(decoder_.DecodeChunk(delta_encode.data(), + delta_encode.size(), + &result_target_)); + EXPECT_TRUE(decoder_.DecodeChunk(delta_finish.data(), + delta_finish.size(), + &result_target_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(kTarget, result_target_); +} + +#ifdef HAVE_EXT_ROPE +// Test that the crope class can be used in place of a string for encoding +// and decoding. +TEST_F(VCDiffEncoderTest, EncodeDecodeCrope) { + crope delta_crope, result_crope; + EXPECT_TRUE(encoder_.StartEncoding(&delta_crope)); + EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_crope)); + EXPECT_TRUE(encoder_.FinishEncoding(&delta_crope)); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_crope.size()); + decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); + // crope can't guarantee that its characters are contiguous, so the decoding + // has to be done byte-by-byte. + for (crope::const_iterator it = delta_crope.begin(); + it != delta_crope.end(); it++) { + const char this_char = *it; + EXPECT_TRUE(decoder_.DecodeChunk(&this_char, 1, &result_crope)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + crope expected_target(kTarget); + EXPECT_EQ(expected_target, result_crope); +} +#endif // HAVE_EXT_ROPE + +void VCDiffEncoderTest::TestWithFixedChunkSize(size_t chunk_size) { + delta()->clear(); + EXPECT_TRUE(encoder_.StartEncoding(delta())); + for (size_t chunk_start_index = 0; + chunk_start_index < strlen(kTarget); + chunk_start_index += chunk_size) { + size_t this_chunk_size = chunk_size; + const size_t bytes_available = strlen(kTarget) - chunk_start_index; + if (this_chunk_size > bytes_available) { + this_chunk_size = bytes_available; + } + EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index], + this_chunk_size, + delta())); + } + EXPECT_TRUE(encoder_.FinishEncoding(delta())); + const size_t num_windows = (strlen(kTarget) / chunk_size) + 1; + const size_t size_of_windows = + strlen(kTarget) + (kWindowHeaderSize * num_windows); + EXPECT_GE(kFileHeaderSize + size_of_windows, delta_size()); + result_target_.clear(); + decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); + for (size_t chunk_start_index = 0; + chunk_start_index < delta_size(); + chunk_start_index += chunk_size) { + size_t this_chunk_size = chunk_size; + const size_t bytes_available = delta_size() - chunk_start_index; + if (this_chunk_size > bytes_available) { + this_chunk_size = bytes_available; + } + EXPECT_TRUE(decoder_.DecodeChunk(delta_data() + chunk_start_index, + this_chunk_size, + &result_target_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(kTarget, result_target_); + LOG(INFO) << "Finished testing chunk_size = " << chunk_size << LOG_ENDL; +} + +TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizes) { + // These specific chunk sizes have failed in the past + TestWithFixedChunkSize(6); + TestWithFixedChunkSize(45); + TestWithFixedChunkSize(60); + + // Now loop through all possible chunk sizes + for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { + TestWithFixedChunkSize(chunk_size); + } +} + +// Splits the text to be encoded into fixed-size chunks. Encodes each +// chunk and puts it into a vector of strings. Then decodes each string +// in the vector and appends the result into result_target_. +void VCDiffEncoderTest::TestWithEncodedChunkVector(size_t chunk_size) { + std::vector<string> encoded_chunks; + string this_encoded_chunk; + size_t total_chunk_size = 0; + EXPECT_TRUE(encoder_.StartEncoding(&this_encoded_chunk)); + encoded_chunks.push_back(this_encoded_chunk); + total_chunk_size += this_encoded_chunk.size(); + for (size_t chunk_start_index = 0; + chunk_start_index < strlen(kTarget); + chunk_start_index += chunk_size) { + size_t this_chunk_size = chunk_size; + const size_t bytes_available = strlen(kTarget) - chunk_start_index; + if (this_chunk_size > bytes_available) { + this_chunk_size = bytes_available; + } + this_encoded_chunk.clear(); + EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index], + this_chunk_size, + &this_encoded_chunk)); + encoded_chunks.push_back(this_encoded_chunk); + total_chunk_size += this_encoded_chunk.size(); + } + this_encoded_chunk.clear(); + EXPECT_TRUE(encoder_.FinishEncoding(&this_encoded_chunk)); + encoded_chunks.push_back(this_encoded_chunk); + total_chunk_size += this_encoded_chunk.size(); + const size_t num_windows = (strlen(kTarget) / chunk_size) + 1; + const size_t size_of_windows = + strlen(kTarget) + (kWindowHeaderSize * num_windows); + EXPECT_GE(kFileHeaderSize + size_of_windows, total_chunk_size); + result_target_.clear(); + decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); + for (std::vector<string>::iterator it = encoded_chunks.begin(); + it != encoded_chunks.end(); ++it) { + EXPECT_TRUE(decoder_.DecodeChunk(it->data(), it->size(), &result_target_)); + } + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(kTarget, result_target_); + LOG(INFO) << "Finished testing chunk_size = " << chunk_size << LOG_ENDL; +} + +TEST_F(VCDiffEncoderTest, EncodeDecodeStreamOfChunks) { + // Loop through all possible chunk sizes + for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { + TestWithEncodedChunkVector(chunk_size); + } +} + +// Verify that HashedDictionary stores a copy of the dictionary text, +// rather than just storing a pointer to it. If the dictionary buffer +// is overwritten after creating a HashedDictionary from it, it shouldn't +// affect an encoder that uses that HashedDictionary. +TEST_F(VCDiffEncoderTest, DictionaryBufferOverwritten) { + string dictionary_copy(kDictionary, sizeof(kDictionary)); + HashedDictionary hd_copy(dictionary_copy.data(), dictionary_copy.size()); + EXPECT_TRUE(hd_copy.Init()); + VCDiffStreamingEncoder copy_encoder(&hd_copy, + VCD_FORMAT_INTERLEAVED + | VCD_FORMAT_CHECKSUM, + /* look_for_target_matches = */ true); + // Produce a reference version of the encoded text. + string delta_before; + EXPECT_TRUE(copy_encoder.StartEncoding(&delta_before)); + EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, + strlen(kTarget), + &delta_before)); + EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_before)); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_before.size()); + + // Overwrite the dictionary text with all 'Q' characters. + dictionary_copy.replace(0, + dictionary_copy.size(), + dictionary_copy.size(), + 'Q'); + // When the encoder is used on the same target text after overwriting + // the dictionary, it should produce the same encoded output. + string delta_after; + EXPECT_TRUE(copy_encoder.StartEncoding(&delta_after)); + EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_after)); + EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_after)); + EXPECT_EQ(delta_before, delta_after); +} + +// Binary data test part 1: The dictionary and target data should not +// be treated as NULL-terminated. An embedded NULL should be handled like +// any other byte of data. +TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNULLs) { + const char embedded_null_dictionary_text[] = + { 0x00, 0xFF, 0xFE, 0xFD, 0x00, 0xFD, 0xFE, 0xFF, 0x00, 0x03 }; + const char embedded_null_target[] = + { 0xFD, 0x00, 0xFD, 0xFE, 0x03, 0x00, 0x01, 0x00 }; + CHECK_EQ(10, sizeof(embedded_null_dictionary_text)); + CHECK_EQ(8, sizeof(embedded_null_target)); + HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text, + sizeof(embedded_null_dictionary_text)); + EXPECT_TRUE(embedded_null_dictionary.Init()); + VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary, + VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, + /* look_for_target_matches = */ true); + EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta())); + EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target, + sizeof(embedded_null_target), + delta())); + EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta())); + decoder_.StartDecoding(embedded_null_dictionary_text, + sizeof(embedded_null_dictionary_text)); + EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), + delta_size(), + &result_target_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(sizeof(embedded_null_target), result_target_.size()); + EXPECT_EQ(string(embedded_null_target, + sizeof(embedded_null_target)), + result_target_); +} + +// Binary data test part 2: An embedded CR or LF should be handled like +// any other byte of data. No text-processing of the data should occur. +TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNewlines) { + const char embedded_null_dictionary_text[] = + { 0x0C, 0xFF, 0xFE, 0x0C, 0x00, 0x0A, 0xFE, 0xFF, 0x00, 0x0A }; + const char embedded_null_target[] = + { 0x0C, 0x00, 0x0A, 0xFE, 0x03, 0x00, 0x0A, 0x00 }; + CHECK_EQ(10, sizeof(embedded_null_dictionary_text)); + CHECK_EQ(8, sizeof(embedded_null_target)); + HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text, + sizeof(embedded_null_dictionary_text)); + EXPECT_TRUE(embedded_null_dictionary.Init()); + VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary, + VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, + /* look_for_target_matches = */ true); + EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta())); + EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target, + sizeof(embedded_null_target), + delta())); + EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta())); + decoder_.StartDecoding(embedded_null_dictionary_text, + sizeof(embedded_null_dictionary_text)); + EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), + delta_size(), + &result_target_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + EXPECT_EQ(sizeof(embedded_null_target), result_target_.size()); + EXPECT_EQ(string(embedded_null_target, + sizeof(embedded_null_target)), + result_target_); +} + +TEST_F(VCDiffEncoderTest, UsingWideCharacters) { + const wchar_t wchar_dictionary_text[] = + L"\"Just the place for a Snark!\" the Bellman cried,\n" + L"As he landed his crew with care;\n" + L"Supporting each man on the top of the tide\n" + L"By a finger entwined in his hair.\n"; + + const wchar_t wchar_target[] = + L"\"Just the place for a Snark! I have said it twice:\n" + L"That alone should encourage the crew.\n" + L"Just the place for a Snark! I have said it thrice:\n" + L"What I tell you three times is true.\"\n"; + + HashedDictionary wchar_dictionary((const char*) wchar_dictionary_text, + sizeof(wchar_dictionary_text)); + EXPECT_TRUE(wchar_dictionary.Init()); + VCDiffStreamingEncoder wchar_encoder(&wchar_dictionary, + VCD_FORMAT_INTERLEAVED + | VCD_FORMAT_CHECKSUM, + /* look_for_target_matches = */ false); + EXPECT_TRUE(wchar_encoder.StartEncoding(delta())); + EXPECT_TRUE(wchar_encoder.EncodeChunk((const char*) wchar_target, + sizeof(wchar_target), + delta())); + EXPECT_TRUE(wchar_encoder.FinishEncoding(delta())); + decoder_.StartDecoding((const char*) wchar_dictionary_text, + sizeof(wchar_dictionary_text)); + EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), + delta_size(), + &result_target_)); + EXPECT_TRUE(decoder_.FinishDecoding()); + const wchar_t* result_as_wchar = (const wchar_t*) result_target_.data(); + EXPECT_EQ(wcslen(wchar_target), wcslen(result_as_wchar)); + EXPECT_EQ(0, wcscmp(wchar_target, result_as_wchar)); +} + +#if defined(HAVE_MPROTECT) && \ + (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)) +// Bug 1220602: Make sure the encoder doesn't read past the end of the input +// buffer. +TEST_F(VCDiffEncoderTest, ShouldNotReadPastEndOfBuffer) { + const size_t target_size = strlen(kTarget); + + // Allocate two memory pages. + const int page_size = getpagesize(); + void* two_pages = NULL; +#ifdef HAVE_POSIX_MEMALIGN + posix_memalign(&two_pages, page_size, 2 * page_size); +#else // !HAVE_POSIX_MEMALIGN + two_pages = memalign(page_size, 2 * page_size); +#endif // HAVE_POSIX_MEMALIGN + char* const first_page = reinterpret_cast<char*>(two_pages); + char* const second_page = first_page + page_size; + + // Place the target string at the end of the first page. + char* const target_with_guard = second_page - target_size; + memcpy(target_with_guard, kTarget, target_size); + + // Make the second page unreadable. + mprotect(second_page, page_size, PROT_NONE); + + // Now perform the encode operation, which will cause a segmentation fault + // if it reads past the end of the buffer. + EXPECT_TRUE(encoder_.StartEncoding(delta())); + EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta())); + EXPECT_TRUE(encoder_.FinishEncoding(delta())); + + // Undo the mprotect. + mprotect(second_page, page_size, PROT_READ|PROT_WRITE); + free(two_pages); +} + +TEST_F(VCDiffEncoderTest, ShouldNotReadPastBeginningOfBuffer) { + const size_t target_size = strlen(kTarget); + + // Allocate two memory pages. + const int page_size = getpagesize(); + void* two_pages = NULL; +#ifdef HAVE_POSIX_MEMALIGN + posix_memalign(&two_pages, page_size, 2 * page_size); +#else // !HAVE_POSIX_MEMALIGN + two_pages = memalign(page_size, 2 * page_size); +#endif // HAVE_POSIX_MEMALIGN + char* const first_page = reinterpret_cast<char*>(two_pages); + char* const second_page = first_page + page_size; + + // Make the first page unreadable. + mprotect(first_page, page_size, PROT_NONE); + + // Place the target string at the beginning of the second page. + char* const target_with_guard = second_page; + memcpy(target_with_guard, kTarget, target_size); + + // Now perform the encode operation, which will cause a segmentation fault + // if it reads past the beginning of the buffer. + EXPECT_TRUE(encoder_.StartEncoding(delta())); + EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta())); + EXPECT_TRUE(encoder_.FinishEncoding(delta())); + + // Undo the mprotect. + mprotect(first_page, page_size, PROT_READ|PROT_WRITE); + free(two_pages); +} +#endif // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN) + +class VCDiffMatchCountTest : public VerifyEncodedBytesTest { + protected: + virtual ~VCDiffMatchCountTest() { } + + void ExpectMatch(size_t match_size) { + if (match_size >= expected_match_counts_.size()) { + // Be generous to avoid resizing again + expected_match_counts_.resize(match_size * 2, 0); + } + ++expected_match_counts_[match_size]; + } + + void VerifyMatchCounts() { + EXPECT_TRUE(std::equal(expected_match_counts_.begin(), + expected_match_counts_.end(), + actual_match_counts_.begin())); + } + + std::vector<int> expected_match_counts_; + std::vector<int> actual_match_counts_; +}; + +class VCDiffHTML1Test : public VCDiffMatchCountTest { + protected: + static const char kDictionary[]; + static const char kTarget[]; + + VCDiffHTML1Test(); + virtual ~VCDiffHTML1Test() { } + + void SimpleEncode(); + void StreamingEncode(); + + HashedDictionary hashed_dictionary_; + VCDiffStreamingEncoder encoder_; + VCDiffStreamingDecoder decoder_; + VCDiffEncoder simple_encoder_; + VCDiffDecoder simple_decoder_; + + string result_target_; +}; + +const char VCDiffHTML1Test::kDictionary[] = + "<html><font color=red>This part from the dict</font><br>"; + +const char VCDiffHTML1Test::kTarget[] = + "<html><font color=red>This part from the dict</font><br>\n" + "And this part is not...</html>"; + +VCDiffHTML1Test::VCDiffHTML1Test() + : hashed_dictionary_(kDictionary, sizeof(kDictionary)), + encoder_(&hashed_dictionary_, + VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, + /* look_for_target_matches = */ true), + simple_encoder_(kDictionary, sizeof(kDictionary)) { + EXPECT_TRUE(hashed_dictionary_.Init()); +} + +void VCDiffHTML1Test::SimpleEncode() { + EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_size()); + EXPECT_TRUE(simple_decoder_.Decode(kDictionary, + sizeof(kDictionary), + delta_as_const(), + &result_target_)); + EXPECT_EQ(kTarget, result_target_); +} + +void VCDiffHTML1Test::StreamingEncode() { + EXPECT_TRUE(encoder_.StartEncoding(delta())); + EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); + EXPECT_TRUE(encoder_.FinishEncoding(delta())); +} + +TEST_F(VCDiffHTML1Test, CheckOutputOfSimpleEncoder) { + SimpleEncode(); + // These values do not depend on the block size used for encoding + ExpectByte(0xD6); // 'V' | 0x80 + ExpectByte(0xC3); // 'C' | 0x80 + ExpectByte(0xC4); // 'D' | 0x80 + ExpectByte(0x00); // Simple encoder never uses interleaved format + ExpectByte(0x00); // Hdr_Indicator + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(sizeof(kDictionary)); // Dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + if (BlockHash::kBlockSize == 2) { + // A very small block size will catch the "html>" match. + ExpectByte(0x1F); // Length of the delta encoding + ExpectSize(strlen(kTarget)); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x11); // Length of the data section + ExpectByte(0x06); // Length of the instructions section + ExpectByte(0x03); // Length of the address section + // Data section + ExpectString("\nAnd t"); // Data for 1st ADD + ExpectString("is not...</"); // Data for 2nd ADD + // Instructions section + ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) + ExpectByte(0x38); // COPY size (56) + ExpectByte(0x07); // ADD size 6 + ExpectByte(0x19); // COPY size 9 mode VCD_SELF + ExpectByte(0x0C); // ADD size 11 + ExpectByte(0x15); // COPY size 5 mode VCD_SELF + // Address section + ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) + ExpectByte(0x17); // COPY address (23) mode VCD_SELF + ExpectByte(0x01); // COPY address (1) mode VCD_SELF + } else if (BlockHash::kBlockSize < 16) { + // A medium block size will catch the "his part " match. + ExpectByte(0x22); // Length of the delta encoding + ExpectSize(strlen(kTarget)); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x16); // Length of the data section + ExpectByte(0x05); // Length of the instructions section + ExpectByte(0x02); // Length of the address section + // Data section + ExpectString("\nAnd t"); // Data for 1st ADD + ExpectString("is not...</html>"); // Data for 2nd ADD + // Instructions section + ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) + ExpectByte(0x38); // COPY size (56) + ExpectByte(0x07); // ADD size 6 + ExpectByte(0x19); // COPY size 9 mode VCD_SELF + ExpectByte(0x11); // ADD size 16 + // Address section + ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) + ExpectByte(0x17); // COPY address (23) mode VCD_SELF + } else if (BlockHash::kBlockSize <= 56) { + // Any block size up to 56 will catch the matching prefix string. + ExpectByte(0x29); // Length of the delta encoding + ExpectSize(strlen(kTarget)); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x1F); // Length of the data section + ExpectByte(0x04); // Length of the instructions section + ExpectByte(0x01); // Length of the address section + ExpectString("\nAnd this part is not...</html>"); // Data for ADD + // Instructions section + ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) + ExpectByte(0x38); // COPY size (56) + ExpectByte(0x01); // ADD size 0 + ExpectByte(0x1F); // Size of ADD (31) + // Address section + ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) + } else { + // The matching string is 56 characters long, and the block size is + // 64 or greater, so no match should be found. + ExpectSize(strlen(kTarget) + 7); // Delta encoding len + ExpectSize(strlen(kTarget)); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectSize(strlen(kTarget)); // Length of the data section + ExpectByte(0x02); // Length of the instructions section + ExpectByte(0x00); // Length of the address section + // Data section + ExpectString(kTarget); + ExpectByte(0x01); // ADD size 0 + ExpectSize(strlen(kTarget)); + } + ExpectNoMoreBytes(); +} + +TEST_F(VCDiffHTML1Test, MatchCounts) { + StreamingEncode(); + encoder_.GetMatchCounts(&actual_match_counts_); + if (BlockHash::kBlockSize == 2) { + // A very small block size will catch the "html>" match. + ExpectMatch(56); + ExpectMatch(9); + ExpectMatch(5); + } else if (BlockHash::kBlockSize < 16) { + // A medium block size will catch the "his part " match. + ExpectMatch(56); + ExpectMatch(9); + } else if (BlockHash::kBlockSize <= 56) { + // Any block size up to 56 will catch the matching prefix string. + ExpectMatch(56); + } + VerifyMatchCounts(); +} + +#ifdef GTEST_HAS_DEATH_TEST +typedef VCDiffHTML1Test VCDiffHTML1DeathTest; + +TEST_F(VCDiffHTML1DeathTest, NullMatchCounts) { + EXPECT_DEBUG_DEATH(encoder_.GetMatchCounts(NULL), "GetMatchCounts"); +} +#endif // GTEST_HAS_DEATH_TEST + +class VCDiffHTML2Test : public VCDiffMatchCountTest { + protected: + static const char kDictionary[]; + static const char kTarget[]; + + VCDiffHTML2Test(); + virtual ~VCDiffHTML2Test() { } + + void SimpleEncode(); + void StreamingEncode(); + + HashedDictionary hashed_dictionary_; + VCDiffStreamingEncoder encoder_; + VCDiffStreamingDecoder decoder_; + VCDiffEncoder simple_encoder_; + VCDiffDecoder simple_decoder_; + + string result_target_; +}; + +const char VCDiffHTML2Test::kDictionary[] = "10\nThis is a test"; + +const char VCDiffHTML2Test::kTarget[] = "This is a test!!!\n"; + +VCDiffHTML2Test::VCDiffHTML2Test() + : hashed_dictionary_(kDictionary, sizeof(kDictionary)), + encoder_(&hashed_dictionary_, + VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, + /* look_for_target_matches = */ true), + simple_encoder_(kDictionary, sizeof(kDictionary)) { + EXPECT_TRUE(hashed_dictionary_.Init()); +} + +void VCDiffHTML2Test::SimpleEncode() { + EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_size()); + EXPECT_TRUE(simple_decoder_.Decode(kDictionary, + sizeof(kDictionary), + delta_as_const(), + &result_target_)); + EXPECT_EQ(kTarget, result_target_); +} + +void VCDiffHTML2Test::StreamingEncode() { + EXPECT_TRUE(encoder_.StartEncoding(delta())); + EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); + EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, + delta_size()); + EXPECT_TRUE(simple_decoder_.Decode(kDictionary, + sizeof(kDictionary), + delta_as_const(), + &result_target_)); + EXPECT_EQ(kTarget, result_target_); +} + +TEST_F(VCDiffHTML2Test, VerifyOutputOfSimpleEncoder) { + SimpleEncode(); + // These values do not depend on the block size used for encoding + ExpectByte(0xD6); // 'V' | 0x80 + ExpectByte(0xC3); // 'C' | 0x80 + ExpectByte(0xC4); // 'D' | 0x80 + ExpectByte(0x00); // Simple encoder never uses interleaved format + ExpectByte(0x00); // Hdr_Indicator + ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) + ExpectByte(sizeof(kDictionary)); // Dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + if (BlockHash::kBlockSize <= 8) { + ExpectByte(12); // Length of the delta encoding + ExpectSize(strlen(kTarget)); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x04); // Length of the data section + ExpectByte(0x02); // Length of the instructions section + ExpectByte(0x01); // Length of the address section + ExpectByte('!'); + ExpectByte('!'); + ExpectByte('!'); + ExpectByte('\n'); + ExpectByte(0x1E); // COPY size 14 mode VCD_SELF + ExpectByte(0x05); // ADD size 4 + ExpectByte(0x03); // COPY address (3) mode VCD_SELF + } else { + // Larger block sizes will not catch any matches. + ExpectSize(strlen(kTarget) + 7); // Delta encoding len + ExpectSize(strlen(kTarget)); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectSize(strlen(kTarget)); // Length of the data section + ExpectByte(0x02); // Length of the instructions section + ExpectByte(0x00); // Length of the address section + // Data section + ExpectString(kTarget); + ExpectByte(0x01); // ADD size 0 + ExpectSize(strlen(kTarget)); + } + ExpectNoMoreBytes(); +} + +TEST_F(VCDiffHTML2Test, VerifyOutputWithChecksum) { + StreamingEncode(); + const VCDChecksum html2_checksum = ComputeAdler32(kTarget, strlen(kTarget)); + CHECK_EQ(5, VarintBE<int64_t>::Length(html2_checksum)); + // These values do not depend on the block size used for encoding + ExpectByte(0xD6); // 'V' | 0x80 + ExpectByte(0xC3); // 'C' | 0x80 + ExpectByte(0xC4); // 'D' | 0x80 + ExpectByte('S'); // Format extensions + ExpectByte(0x00); // Hdr_Indicator + ExpectByte(VCD_SOURCE | VCD_CHECKSUM); // Win_Indicator + ExpectByte(sizeof(kDictionary)); // Dictionary length + ExpectByte(0x00); // Source segment position: start of dictionary + if (BlockHash::kBlockSize <= 8) { + ExpectByte(17); // Length of the delta encoding + ExpectSize(strlen(kTarget)); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // Length of the data section + ExpectByte(0x07); // Length of the instructions section + ExpectByte(0x00); // Length of the address section + ExpectChecksum(html2_checksum); + ExpectByte(0x1E); // COPY size 14 mode VCD_SELF + ExpectByte(0x03); // COPY address (3) mode VCD_SELF + ExpectByte(0x05); // ADD size 4 + ExpectByte('!'); + ExpectByte('!'); + ExpectByte('!'); + ExpectByte('\n'); + } else { + // Larger block sizes will not catch any matches. + ExpectSize(strlen(kTarget) + 12); // Delta encoding len + ExpectSize(strlen(kTarget)); // Size of the target window + ExpectByte(0x00); // Delta_indicator (no compression) + ExpectByte(0x00); // Length of the data section + ExpectSize(0x02 + strlen(kTarget)); // Interleaved + ExpectByte(0x00); // Length of the address section + ExpectChecksum(html2_checksum); + // Data section + ExpectByte(0x01); // ADD size 0 + ExpectSize(strlen(kTarget)); + ExpectString(kTarget); + } + ExpectNoMoreBytes(); +} + +TEST_F(VCDiffHTML2Test, MatchCounts) { + StreamingEncode(); + encoder_.GetMatchCounts(&actual_match_counts_); + if (BlockHash::kBlockSize <= 8) { + ExpectMatch(14); + } + VerifyMatchCounts(); +} + +} // anonymous namespace +} // namespace open_vcdiff diff --git a/src/zconf.h b/src/zconf.h new file mode 100644 index 0000000..a4a2115 --- /dev/null +++ b/src/zconf.h @@ -0,0 +1,335 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + */ +#ifdef Z_PREFIX +# define deflateInit_ z_deflateInit_ +# define deflate z_deflate +# define deflateEnd z_deflateEnd +# define inflateInit_ z_inflateInit_ +# define inflate z_inflate +# define inflateEnd z_inflateEnd +# define deflateInit2_ z_deflateInit2_ +# define deflateSetDictionary z_deflateSetDictionary +# define deflateCopy z_deflateCopy +# define deflateReset z_deflateReset +# define deflateParams z_deflateParams +# define deflateBound z_deflateBound +# define deflatePrime z_deflatePrime +# define inflateInit2_ z_inflateInit2_ +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateCopy z_inflateCopy +# define inflateReset z_inflateReset +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# define uncompress z_uncompress +# define adler32 z_adler32 +# define crc32 z_crc32 +# define get_crc_table z_get_crc_table +# define zError z_zError + +# define alloc_func z_alloc_func +# define free_func z_free_func +# define in_func z_in_func +# define out_func z_out_func +# define Byte z_Byte +# define uInt z_uInt +# define uLong z_uLong +# define Bytef z_Bytef +# define charf z_charf +# define intf z_intf +# define uIntf z_uIntf +# define uLongf z_uLongf +# define voidpf z_voidpf +# define voidp z_voidp +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +/* Some Mac compilers merge all .h files incorrectly: */ +#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) +# define NO_DUMMY_DECL +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include <windows.h> + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#ifndef WIN32 +#if 1 /* HAVE_UNISTD_H -- this line is updated by ./configure */ +# include <sys/types.h> /* for off_t */ +# include <unistd.h> /* for SEEK_* and off_t */ +# ifdef VMS +# include <unixio.h> /* for off_t */ +# endif +# define z_off_t off_t +#endif +#endif + +#ifndef SEEK_SET +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif +#ifndef z_off_t +# define z_off_t long +#endif + +#if defined(__OS400__) +# define NO_vsnprintf +#endif + +#if defined(__MVS__) +# define NO_vsnprintf +# ifdef FAR +# undef FAR +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) +# pragma map(deflateInit_,"DEIN") +# pragma map(deflateInit2_,"DEIN2") +# pragma map(deflateEnd,"DEEND") +# pragma map(deflateBound,"DEBND") +# pragma map(inflateInit_,"ININ") +# pragma map(inflateInit2_,"ININ2") +# pragma map(inflateEnd,"INEND") +# pragma map(inflateSync,"INSY") +# pragma map(inflateSetDictionary,"INSEDI") +# pragma map(compressBound,"CMBND") +# pragma map(inflate_table,"INTABL") +# pragma map(inflate_fast,"INFA") +# pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/src/zlib.h b/src/zlib.h new file mode 100644 index 0000000..6f64b98 --- /dev/null +++ b/src/zlib.h @@ -0,0 +1,1373 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.3, July 18th, 2005 + + Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt + (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.3" +#define ZLIB_VERNUM 0x1230 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed + data. This version of the library supports only one compression method + (deflation) but other algorithms will be added later and will have the same + stream interface. + + Compression can be done in a single step if the buffers are large + enough (for example if an input file is mmap'ed), or can be done by + repeated calls of the compression function. In the latter case, the + application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip streams in memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never + crash even in case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total nb of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total nb of bytes output so far */ + + char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text */ + uLong adler; /* adler32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has + dropped to zero. It must update next_out and avail_out when avail_out + has dropped to zero. The application must initialize zalloc, zfree and + opaque before calling the init function. All other fields are set by the + compression library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or + progress reports. After compression, total_in holds the total size of + the uncompressed data and may be saved for use in the decompressor + (particularly if the decompressor wants to decompress everything in + a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative + * values are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field (though see inflate()) */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is + not compatible with the zlib.h header file used by the application. + This check is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. + If zalloc and zfree are set to Z_NULL, deflateInit updates them to + use default allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at + all (the input data is simply copied a block at a time). + Z_DEFAULT_COMPRESSION requests a default compromise between speed and + compression (currently equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if level is not a valid compression level, + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). + msg is set to null if there is no error message. deflateInit does not + perform any compression: this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce some + output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). + Some output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating avail_in or avail_out accordingly; avail_out + should never be zero before the call. The application can consume the + compressed output when it wants, for example when the output buffer is full + (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK + and with zero avail_out, it must be called again after making room in the + output buffer because there might be more output pending. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumualte before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In particular + avail_in is zero after the call if enough output space has been provided + before the call.) Flushing may degrade compression for some compression + algorithms and so it should be used only when necessary. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there + was enough output space; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the + stream are deflateReset or deflateEnd. + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least + the value returned by deflateBound (see below). If deflate does not return + Z_STREAM_END, then it must be called again as described above. + + deflate() sets strm->adler to the adler32 checksum of all input read + so far (that is, total_in bytes). + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered + binary. This field is only for information purposes and does not affect + the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not + fatal, and deflate() can be called again with more input and more output + space to continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, + msg may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. If next_in is not Z_NULL and avail_in is large enough (the exact + value depends on the compression method), inflateInit determines the + compression method from the zlib header and allocates all data structures + accordingly; otherwise the allocation will be deferred to the first call of + inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to + use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller. msg is set to null if there is no error + message. inflateInit does not perform any decompression apart from reading + the zlib header if present: this will be done by inflate(). (So next_in and + avail_in may be modified, but next_out and avail_out are unchanged.) +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing + will resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there + is no more input data or no more space in the output buffer (see below + about the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating the next_* and avail_* values accordingly. + The application can consume the uncompressed output when it wants, for + example when the output buffer is full (avail_out == 0), or after each + call of inflate(). If inflate returns Z_OK and with zero avail_out, it + must be called again after making room in the output buffer because there + might be more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, + Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() stop + if and when it gets to the next deflate block boundary. When decoding the + zlib or gzip format, this will cause inflate() to return immediately after + the header and before the first block. When doing a raw inflate, inflate() + will go ahead and process the first block, and will return when it gets to + the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + Also to assist in this, on return inflate() will set strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 + if inflate() is currently decoding the last block in the deflate stream, + plus 128 if inflate() returned immediately after decoding an end-of-block + code or decoding the complete header up to just before the first byte of the + deflate stream. The end-of-block will not be indicated until all of the + uncompressed data from that block has been written to strm->next_out. The + number of unused bits may in general be greater than seven, except when + bit 7 of data_type is set, in which case the number of unused bits will be + less than eight. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step + (a single call of inflate), the parameter flush should be set to + Z_FINISH. In this case all pending input is processed and all pending + output is flushed; avail_out must be large enough to hold all the + uncompressed data. (The size of the uncompressed data may have been saved + by the compressor for this purpose.) The next operation on this stream must + be inflateEnd to deallocate the decompression state. The use of Z_FINISH + is never required, but can be used to inform inflate that a faster approach + may be used for the single inflate() call. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the only effect of the flush parameter in this implementation + is on the return value of inflate(), as noted below, or when it returns early + because Z_BLOCK is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the adler32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the adler32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed adler32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() will decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically. Any information + contained in the gzip header is not retained, so applications that need that + information should instead use raw inflate, see inflateInit2() below, or + inflateBack() and perform their own processing of the gzip header and + trailer. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value), Z_STREAM_ERROR if the stream structure was inconsistent (for example + if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, + Z_BUF_ERROR if no progress is possible or if there was not enough room in the + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may then + call inflateSync() to look for a good compression block if a partial recovery + of the data is desired. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by + the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute an adler32 check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), + no header crc, and the operating system will be set to 255 (unknown). If a + gzip stream is being written, strm->adler is a crc32 instead of an adler32. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but + is slow and reduces compression ratio; memLevel=9 uses maximum memory + for optimal speed. The default value is 8. See zconf.h for total memory + usage as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as + Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy + parameter only affects the compression ratio but not the correctness of the + compressed output even if it is not set appropriately. Z_FIXED prevents the + use of dynamic Huffman codes, allowing for a simpler decoder for special + applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid + method). msg is set to null if there is no error message. deflateInit2 does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. This function must be called + immediately after deflateInit, deflateInit2 or deflateReset, before any + call of deflate. The compressor and decompressor must use exactly the same + dictionary (see inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size in + deflate or deflate2. Thus the strings most likely to be useful should be + put at the end of the dictionary, not at the front. In addition, the + current implementation of deflate will use at most the window size minus + 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the adler32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The adler32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + adler32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (such as NULL dictionary) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if the compression method is bsort). deflateSetDictionary does not + perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and + can consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. + The stream will keep the same compression level and any other attributes + that may have been set by deflateInit2. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2. This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different + strategy. If the compression level is changed, the input available so far + is compressed with the old level (and may be flushed); the new level will + take effect only at the next call of deflate(). + + Before the call of deflateParams, the stream state must be set as for + a call of deflate(), since the currently available input may have to + be compressed and flushed. In particular, strm->avail_out must be non-zero. + + deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source + stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR + if strm->avail_out was zero. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() + or deflateInit2(). This would be used to allocate an output buffer + for deflation in a single pass, and so would be called before deflate(). +*/ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the + bits leftover from a previous deflate stream when appending to it. As such, + this function can only be used for raw deflate, and must be used before the + first deflate() call after a deflateInit2() or deflateReset(). bits must be + less than or equal to 16, and that many of the least significant bits of + value will be inserted in the output. + + deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an adler32 or a crc32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is + a crc32 instead of an adler32. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg + is set to null if there is no error message. inflateInit2 does not perform + any decompression apart from reading the zlib header if present: this will + be done by inflate(). (So next_in and avail_in may be modified, but next_out + and avail_out are unchanged.) +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the adler32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called + immediately after inflateInit2() or inflateReset() and before any call of + inflate() to set the dictionary. The application must insure that the + dictionary that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (such as NULL dictionary) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect adler32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a full flush point (see above the + description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR + if no more input was provided, Z_DATA_ERROR if no flush point has been found, + or Z_STREAM_ERROR if the stream structure was inconsistent. In the success + case, the application may save the current current value of total_in which + indicates where valid compressed data was found. In the error case, the + application may repeatedly call inflateSync, providing more input each time, + until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate all the internal decompression state. + The stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK can be used to + force inflate() to return immediately after header processing is complete + and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When + any of extra, name, or comment are not Z_NULL and the respective field is + not present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the paramaters are invalid, Z_MEM_ERROR if the internal state could not + be allocated, or Z_VERSION_ERROR if the version of the library does not + match the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is more efficient than inflate() for + file i/o applications in that it avoids copying between the output and the + sliding window by simply making the window itself the output buffer. This + function trusts the application to not change the output buffer passed by + the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free + the allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects + only the raw deflate stream to decompress. This is different from the + normal behavior of inflate(), which expects either a zlib or gzip header and + trailer around the deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero--buf is ignored in that + case--and inflateBack() will return a buffer error. inflateBack() will call + out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() + should return zero on success, or non-zero on failure. If out() returns + non-zero, inflateBack() will return with an error. Neither in() nor out() + are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format + error in the deflate stream (in which case strm->msg is set to indicate the + nature of the error), or Z_STREAM_ERROR if the stream was not properly + initialized. In the case of Z_BUF_ERROR, an input or output error can be + distinguished using strm->next_in which will be Z_NULL only if in() returned + an error. If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to + out() returning non-zero. (in() will always be called before out(), so + strm->next_in is assured to be defined if out() returns non-zero.) Note + that inflateBack() cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + + + /* utility functions */ + +/* + The following utility functions are implemented on top of the + basic stream-oriented functions. To simplify the interface, some + default options are assumed (compression level and memory usage, + standard memory allocation functions). The source code of these + utility functions can easily be modified if you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least the value returned + by compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before + a compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. +*/ + + +typedef voidp gzFile; + +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); +/* + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb") but can also include a compression level + ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for + Huffman only compression as in "wb1h", or 'R' for run-length encoding + as in "wb1R". (See the description of deflateInit2 for more information + about the strategy parameter.) + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. + + gzopen returns NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). */ + +long long gzgetMtime(gzFile file); +/* Return modify time */ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen() associates a gzFile with the file descriptor fd. File + descriptors are obtained from calls like open, dup, creat, pipe or + fileno (in the file has been previously opened with fopen). + The mode parameter is as in gzopen. + The next call of gzclose on the returned gzFile will also close the + file descriptor fd, just like fclose(fdopen(fd), mode) closes the file + descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). + gzdopen returns NULL if there was insufficient memory to allocate + the (de)compression state. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not + opened for writing. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. + If the input file was not in gzip format, gzread copies the given number + of bytes into the buffer. + gzread returns the number of uncompressed bytes actually read (0 for + end of file, -1 for error). */ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes actually written + (0 in case of error). +*/ + +ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). The number of + uncompressed bytes written is limited to 4095. The caller should assure that + this limit is not exceeded. If it is exceeded, then gzprintf() will return + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf() + because the secure snprintf() or vsnprintf() functions were not available. +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or + a newline character is read and transferred to buf, or an end-of-file + condition is encountered. The string is then terminated with a null + character. + gzgets returns buf, or Z_NULL in case of error. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read again later. + Only one character of push-back is allowed. gzungetc() returns the + character pushed, or -1 on failure. gzungetc() will fail if a + character has been pushed but not read yet, or if c is -1. The pushed + character will be discarded if the stream is repositioned with gzseek() + or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. The return value is the zlib + error number (see function gzerror below). gzflush returns Z_OK if + the flush parameter is Z_FINISH and all output could be flushed. + gzflush should be called only when strictly necessary because it can + degrade compression. +*/ + +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); +/* + Sets the starting position for the next gzread or gzwrite on the + given compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); +/* + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns 1 if file is being read directly without decompression, otherwise + zero. +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. The return value is the zlib + error number (see function gzerror below). +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the + given compressed file. errnum is set to zlib error number. If an + error occurred in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the + compression library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is NULL, this function returns + the required initial value for the checksum. + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN void ZEXPORT adler32_range OF((uLong* min, uLong* max)); +/* + Set *min and *max (both of which must not be null) to the minimum and + maximum possible checksum values that adler32 can produce. + + This function is not part of original software distribution. It is + added at Google (2003) in accordance with the copyright notice above, + which permits alteration and redistribution of the original software + provided, among other things, that altered source versions must be + plainly marked as such and not misrepresented as being the original + software. +*/ + +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); +/* + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is NULL, this function returns the required initial + value for the for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + +/* + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) +#define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream)) +#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, sizeof(z_stream)) +#define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream)) +#define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, sizeof(z_stream)) + + +#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) + struct internal_state {int dummy;}; /* hack for buggy compilers */ +#endif + +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z)); +ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ |