// Copyright 2015 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // //////////////////////////////////////////////////////////////////////////////// #include "src/binary_parse/range_checked_byte_ptr.h" #include #include #include namespace piex { namespace binary_parse { #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE #define BREAK_IF_DEBUGGING() assert(false) #else #define BREAK_IF_DEBUGGING() assert(true) #endif namespace { class MemoryPagedByteArray : public PagedByteArray { public: MemoryPagedByteArray(const unsigned char *buffer, const size_t len); virtual size_t length() const; virtual size_t pageSize() const; virtual void getPage(size_t page_index, const unsigned char **begin, const unsigned char **end, PagePtr *page) const; private: const unsigned char *buffer_; const size_t len_; }; MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer, const size_t len) : buffer_(buffer), len_(len) {} size_t MemoryPagedByteArray::length() const { return len_; } size_t MemoryPagedByteArray::pageSize() const { return len_; } void MemoryPagedByteArray::getPage(size_t /* page_index */, const unsigned char **begin, const unsigned char **end, PagePtr *page) const { *begin = buffer_; *end = buffer_ + len_; *page = PagePtr(); } // A functor that does nothing. This is used as a no-op shared pointer // deallocator below. class NullFunctor { public: void operator()() {} void operator()(PagedByteArray * /* p */) const {} }; } // namespace PagedByteArray::~PagedByteArray() {} RangeCheckedBytePtr::RangeCheckedBytePtr() : array_(), page_data_(NULL), current_pos_(0), sub_array_begin_(0), sub_array_end_(0), page_begin_offset_(0), current_page_len_(0), error_flag_(RANGE_CHECKED_BYTE_ERROR) {} RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array, const size_t len) : array_(new MemoryPagedByteArray(array, len)), page_data_(NULL), current_pos_(0), sub_array_begin_(0), sub_array_end_(len), page_begin_offset_(0), current_page_len_(0), error_flag_(RANGE_CHECKED_BYTE_SUCCESS) { assert(array); if (array == NULL) { error_flag_ = RANGE_CHECKED_BYTE_ERROR; } } RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array) : array_(array, NullFunctor()), page_data_(NULL), current_pos_(0), sub_array_begin_(0), sub_array_end_(array->length()), page_begin_offset_(0), current_page_len_(0), error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {} RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() { return RangeCheckedBytePtr(); } RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray( size_t pos, size_t length) const { RangeCheckedBytePtr sub_result = (*this) + pos; if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) { sub_result.sub_array_begin_ = sub_result.current_pos_; sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length; // Restrict the boundaries of the current page to the newly set sub-array. sub_result.restrictPageToSubArray(); return sub_result; } else { error_flag_ = RANGE_CHECKED_BYTE_ERROR; return invalidPointer(); } } size_t RangeCheckedBytePtr::offsetInArray() const { // sub_array_begin_ <= current_pos_ is a class invariant, but protect // against violations of this invariant. if (sub_array_begin_ <= current_pos_) { return current_pos_ - sub_array_begin_; } else { assert(false); return 0; } } std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const { std::vector bytes = extractBytes(pos, length); std::string result; result.reserve(bytes.size()); for (size_t i = 0; i < bytes.size(); ++i) { result.push_back(static_cast(bytes[i])); } return result; } std::vector RangeCheckedBytePtr::extractBytes( size_t pos, size_t length) const { std::vector result; if (pos + length < pos /* overflow */ || remainingLength() < pos + length) { BREAK_IF_DEBUGGING(); error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; return result; } result.reserve(length); for (size_t i = 0; i < length; ++i) { result.push_back((*this)[pos + i]); } return result; } bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) { if (x.array_ != y.array_) { assert(false); return false; } return x.current_pos_ == y.current_pos_; } bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) { return !(x == y); } void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const { // The offset should always lie within the bounds of the sub-array (this // condition is enforced at the callsite). However, even if the offset lies // outside the sub-array, the restrictPageToSubArray() call at the end // ensures that the object is left in a consistent state that maintains the // class invariants. assert(offset >= sub_array_begin_ && offset < sub_array_end_); // Ensure that offset lies within the array. if (offset >= array_->length()) { assert(false); return; } // Determine the index of the page to request. size_t page_index = offset / array_->pageSize(); // Get the page. const unsigned char *page_begin; const unsigned char *page_end; array_->getPage(page_index, &page_begin, &page_end, &page_); // Ensure that the page has the expected length (as specified in the // PagedByteArray interface). size_t expected_page_size = array_->pageSize(); if (page_index == (array_->length() - 1) / array_->pageSize()) { expected_page_size = array_->length() - array_->pageSize() * page_index; } if ((page_end < page_begin) || (static_cast(page_end - page_begin) != expected_page_size)) { assert(false); return; } // Remember information about page. page_data_ = page_begin; page_begin_offset_ = page_index * array_->pageSize(); current_page_len_ = static_cast(page_end - page_begin); // Restrict the boundaries of the page to lie within the sub-array. restrictPageToSubArray(); } void RangeCheckedBytePtr::restrictPageToSubArray() const { // Restrict the current page's boundaries so that it is always contained // completely within the extent of the sub-array. // This function is purposely designed to work correctly in the following // two special cases: // a) The current page lies entirely outside the sub-array. In this case, // current_page_len_ will be set to zero. page_data_ may either remain // unchanged or may be changed to point one element beyond the end of the // page, depending on whether the current page lies before or after the // sub-array. // b) The current page is in the state as initialized by the constructor // (i.e. page_data_ is NULL and current_page_len_ is zero). In this case, // page_data_ and current_page_len_ will remain unchanged. // Does the beginning of the page lie before the beginning of the sub-array? if (page_begin_offset_ < sub_array_begin_) { // Compute amount by which to shorten page. size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_; if (amount_to_shorten > current_page_len_) { amount_to_shorten = current_page_len_; } // Adjust beginning of page accordingly. page_begin_offset_ += amount_to_shorten; page_data_ += amount_to_shorten; current_page_len_ -= amount_to_shorten; } // Does the end of the page lie beyond the end of the sub-array? if (page_begin_offset_ + current_page_len_ > sub_array_end_) { // Reduce length of page accordingly. size_t new_len = sub_array_end_ - page_begin_offset_; if (new_len > current_page_len_) { new_len = current_page_len_; } current_page_len_ = new_len; } } int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y, size_t num) { std::vector x_vec = x.extractBytes(0, num); std::vector y_vec = y.extractBytes(0, num); if (!x.errorOccurred() && !y.errorOccurred()) { return ::memcmp(&x_vec[0], &y_vec[0], num); } else { // return an arbitrary value return -1; } } int strcmp(const RangeCheckedBytePtr &x, const std::string &y) { std::vector x_vec = x.extractBytes(0, y.length()); if (!x.errorOccurred()) { return ::memcmp(&x_vec[0], y.c_str(), y.length()); } else { // return an arbitrary value return -1; } } size_t strlen(const RangeCheckedBytePtr &src) { size_t len = 0; RangeCheckedBytePtr str = src; while (!str.errorOccurred() && (str[0] != '\0')) { str++; len++; } return len; } int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian, MemoryStatus *status) { const uint16 unsigned_value = Get16u(input, big_endian, status); if (*status != RANGE_CHECKED_BYTE_SUCCESS) { // Return an arbitrary value. return 0; } // Convert the two's-complement signed integer encoded in 'unsigned_value' // into a signed representation in the implementation's native representation // for signed integers. An optimized Blaze build (x64) compiles all of the // following code to a no-op (as of this writing). // For further details, see the corresponding comment in Get32s(). if (unsigned_value == 0x8000u) { return static_cast(-0x8000); } else if (unsigned_value > 0x8000u) { return -static_cast(0x10000u - unsigned_value); } else { return static_cast(unsigned_value); } } uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian, MemoryStatus *status) { if (input.remainingLength() < 2) { if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) { *status = RANGE_CHECKED_BYTE_ERROR; } // Return an arbitrary value. return 0; } if (big_endian) { return (static_cast(input[0]) << 8) | static_cast(input[1]); } else { return (static_cast(input[1]) << 8) | static_cast(input[0]); } } int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian, MemoryStatus *status) { const uint32 unsigned_value = Get32u(input, big_endian, status); if (*status != RANGE_CHECKED_BYTE_SUCCESS) { // Return an arbitrary value. return 0; } // Convert the two's-complement signed integer encoded in 'unsigned_value' // into a signed representation in the implementation's native representation // for signed integers. // For all practical purposes, the same result could be obtained simply by // casting unsigned_value to int32; the result of this is // implementation-defined, but on all of the platforms we care about, it does // what we want. // The code below, however, arguably has the aesthetic advantage of being // independent of the representation for signed integers chosen by the // implementation, as long as 'int' and 'unsigned' have the required range to // represent all of the required values. // An optimized Blaze build (x64) compiles all of the following code to a // no-op (as of this writing); i.e. the value that Get32u() returned in %eax // is left unchanged. if (unsigned_value == 0x80000000u) { // Read here on why the constant expression is written this way: // http://stackoverflow.com/questions/14695118 return -0x7fffffff - 1; } else if (unsigned_value > 0x80000000u) { // The expression // 0xffffffffu - unsigned_value + 1 // is a portable way of flipping the sign of a twos-complement signed // integer whose binary representation is stored in an unsigned integer. // '0xffffffffu + 1' is used in preference to simply '0' because it makes // it clearer that the correct result will be obtained even if an int is // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around // 'unsigned_value' to prevent the compiler from warning about an // integral constant overflow. ('0' would produce the correct result in // this case too but would rely in a more subtle way on the rules for // unsigned wraparound.) return -static_cast(0xffffffffu - unsigned_value + 1); } else { return static_cast(unsigned_value); } } uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian, MemoryStatus *status) { if (input.remainingLength() < 4) { if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) { *status = RANGE_CHECKED_BYTE_ERROR; } // Return an arbitrary value. return 0; } if (big_endian) { return (static_cast(input[0]) << 24) | (static_cast(input[1]) << 16) | (static_cast(input[2]) << 8) | (static_cast(input[3]) << 0); } else { return (static_cast(input[3]) << 24) | (static_cast(input[2]) << 16) | (static_cast(input[1]) << 8) | (static_cast(input[0]) << 0); } } } // namespace binary_parse } // namespace piex