aboutsummaryrefslogtreecommitdiff
path: root/src/binary_parse/range_checked_byte_ptr.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/binary_parse/range_checked_byte_ptr.cc')
-rwxr-xr-xsrc/binary_parse/range_checked_byte_ptr.cc402
1 files changed, 402 insertions, 0 deletions
diff --git a/src/binary_parse/range_checked_byte_ptr.cc b/src/binary_parse/range_checked_byte_ptr.cc
new file mode 100755
index 0000000..bbfdee2
--- /dev/null
+++ b/src/binary_parse/range_checked_byte_ptr.cc
@@ -0,0 +1,402 @@
+// Copyright 2015 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "src/binary_parse/range_checked_byte_ptr.h"
+
+#include <assert.h>
+#include <cstddef>
+#include <cstring>
+
+namespace piex {
+namespace binary_parse {
+
+#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
+#define BREAK_IF_DEBUGGING() assert(false)
+#else
+#define BREAK_IF_DEBUGGING() assert(true)
+#endif
+
+namespace {
+class MemoryPagedByteArray : public PagedByteArray {
+ public:
+ MemoryPagedByteArray(const unsigned char *buffer, const size_t len);
+
+ virtual size_t length() const;
+ virtual size_t pageSize() const;
+ virtual void getPage(size_t page_index, const unsigned char **begin,
+ const unsigned char **end, PagePtr *page) const;
+
+ private:
+ const unsigned char *buffer_;
+ const size_t len_;
+};
+
+MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
+ const size_t len)
+ : buffer_(buffer), len_(len) {}
+
+size_t MemoryPagedByteArray::length() const { return len_; }
+
+size_t MemoryPagedByteArray::pageSize() const { return len_; }
+
+void MemoryPagedByteArray::getPage(size_t /* page_index */,
+ const unsigned char **begin,
+ const unsigned char **end,
+ PagePtr *page) const {
+ *begin = buffer_;
+ *end = buffer_ + len_;
+ *page = PagePtr();
+}
+
+// A functor that does nothing. This is used as a no-op shared pointer
+// deallocator below.
+class NullFunctor {
+ public:
+ void operator()() {}
+ void operator()(PagedByteArray * /* p */) const {}
+};
+} // namespace
+
+PagedByteArray::~PagedByteArray() {}
+
+RangeCheckedBytePtr::RangeCheckedBytePtr()
+ : array_(),
+ page_data_(NULL),
+ current_pos_(0),
+ sub_array_begin_(0),
+ sub_array_end_(0),
+ page_begin_offset_(0),
+ current_page_len_(0),
+ error_flag_(RANGE_CHECKED_BYTE_ERROR) {}
+
+RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
+ const size_t len)
+ : array_(new MemoryPagedByteArray(array, len)),
+ page_data_(NULL),
+ current_pos_(0),
+ sub_array_begin_(0),
+ sub_array_end_(len),
+ page_begin_offset_(0),
+ current_page_len_(0),
+ error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
+ assert(array);
+ if (array == NULL) {
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR;
+ }
+}
+
+RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
+ : array_(array, NullFunctor()),
+ page_data_(NULL),
+ current_pos_(0),
+ sub_array_begin_(0),
+ sub_array_end_(array->length()),
+ page_begin_offset_(0),
+ current_page_len_(0),
+ error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}
+
+RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
+ return RangeCheckedBytePtr();
+}
+
+RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
+ size_t pos, size_t length) const {
+ RangeCheckedBytePtr sub_result = (*this) + pos;
+ if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
+ sub_result.sub_array_begin_ = sub_result.current_pos_;
+ sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;
+
+ // Restrict the boundaries of the current page to the newly set sub-array.
+ sub_result.restrictPageToSubArray();
+
+ return sub_result;
+ } else {
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR;
+ return invalidPointer();
+ }
+}
+
+size_t RangeCheckedBytePtr::offsetInArray() const {
+ // sub_array_begin_ <= current_pos_ is a class invariant, but protect
+ // against violations of this invariant.
+ if (sub_array_begin_ <= current_pos_) {
+ return current_pos_ - sub_array_begin_;
+ } else {
+ assert(false);
+ return 0;
+ }
+}
+
+std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
+ std::vector<unsigned char> bytes = extractBytes(pos, length);
+ std::string result;
+ result.reserve(bytes.size());
+ for (size_t i = 0; i < bytes.size(); ++i) {
+ result.push_back(static_cast<char>(bytes[i]));
+ }
+ return result;
+}
+
+std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
+ size_t pos, size_t length) const {
+ std::vector<unsigned char> result;
+ if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
+ BREAK_IF_DEBUGGING();
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
+ return result;
+ }
+ result.reserve(length);
+ for (size_t i = 0; i < length; ++i) {
+ result.push_back((*this)[pos + i]);
+ }
+ return result;
+}
+
+bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
+ if (x.array_ != y.array_) {
+ assert(false);
+ return false;
+ }
+
+ return x.current_pos_ == y.current_pos_;
+}
+
+bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
+ return !(x == y);
+}
+
+void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
+ // The offset should always lie within the bounds of the sub-array (this
+ // condition is enforced at the callsite). However, even if the offset lies
+ // outside the sub-array, the restrictPageToSubArray() call at the end
+ // ensures that the object is left in a consistent state that maintains the
+ // class invariants.
+ assert(offset >= sub_array_begin_ && offset < sub_array_end_);
+
+ // Ensure that offset lies within the array.
+ if (offset >= array_->length()) {
+ assert(false);
+ return;
+ }
+
+ // Determine the index of the page to request.
+ size_t page_index = offset / array_->pageSize();
+
+ // Get the page.
+ const unsigned char *page_begin;
+ const unsigned char *page_end;
+ array_->getPage(page_index, &page_begin, &page_end, &page_);
+
+ // Ensure that the page has the expected length (as specified in the
+ // PagedByteArray interface).
+ size_t expected_page_size = array_->pageSize();
+ if (page_index == (array_->length() - 1) / array_->pageSize()) {
+ expected_page_size = array_->length() - array_->pageSize() * page_index;
+ }
+ if ((page_end < page_begin) ||
+ (static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
+ assert(false);
+ return;
+ }
+
+ // Remember information about page.
+ page_data_ = page_begin;
+ page_begin_offset_ = page_index * array_->pageSize();
+ current_page_len_ = static_cast<size_t>(page_end - page_begin);
+
+ // Restrict the boundaries of the page to lie within the sub-array.
+ restrictPageToSubArray();
+}
+
+void RangeCheckedBytePtr::restrictPageToSubArray() const {
+ // Restrict the current page's boundaries so that it is always contained
+ // completely within the extent of the sub-array.
+ // This function is purposely designed to work correctly in the following
+ // two special cases:
+ // a) The current page lies entirely outside the sub-array. In this case,
+ // current_page_len_ will be set to zero. page_data_ may either remain
+ // unchanged or may be changed to point one element beyond the end of the
+ // page, depending on whether the current page lies before or after the
+ // sub-array.
+ // b) The current page is in the state as initialized by the constructor
+ // (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
+ // page_data_ and current_page_len_ will remain unchanged.
+
+ // Does the beginning of the page lie before the beginning of the sub-array?
+ if (page_begin_offset_ < sub_array_begin_) {
+ // Compute amount by which to shorten page.
+ size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
+ if (amount_to_shorten > current_page_len_) {
+ amount_to_shorten = current_page_len_;
+ }
+
+ // Adjust beginning of page accordingly.
+ page_begin_offset_ += amount_to_shorten;
+ page_data_ += amount_to_shorten;
+ current_page_len_ -= amount_to_shorten;
+ }
+
+ // Does the end of the page lie beyond the end of the sub-array?
+ if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
+ // Reduce length of page accordingly.
+ size_t new_len = sub_array_end_ - page_begin_offset_;
+ if (new_len > current_page_len_) {
+ new_len = current_page_len_;
+ }
+ current_page_len_ = new_len;
+ }
+}
+
+int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
+ size_t num) {
+ std::vector<unsigned char> x_vec = x.extractBytes(0, num);
+ std::vector<unsigned char> y_vec = y.extractBytes(0, num);
+
+ if (!x.errorOccurred() && !y.errorOccurred()) {
+ return ::memcmp(&x_vec[0], &y_vec[0], num);
+ } else {
+ // return an arbitrary value
+ return -1;
+ }
+}
+
+int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
+ std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());
+
+ if (!x.errorOccurred()) {
+ return ::memcmp(&x_vec[0], y.c_str(), y.length());
+ } else {
+ // return an arbitrary value
+ return -1;
+ }
+}
+
+size_t strlen(const RangeCheckedBytePtr &src) {
+ size_t len = 0;
+ RangeCheckedBytePtr str = src;
+ while (!str.errorOccurred() && (str[0] != '\0')) {
+ str++;
+ len++;
+ }
+ return len;
+}
+
+int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status) {
+ const uint16 unsigned_value = Get16u(input, big_endian, status);
+ if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
+ // Return an arbitrary value.
+ return 0;
+ }
+
+ // Convert the two's-complement signed integer encoded in 'unsigned_value'
+ // into a signed representation in the implementation's native representation
+ // for signed integers. An optimized Blaze build (x64) compiles all of the
+ // following code to a no-op (as of this writing).
+ // For further details, see the corresponding comment in Get32s().
+ if (unsigned_value == 0x8000u) {
+ return static_cast<int16>(-0x8000);
+ } else if (unsigned_value > 0x8000u) {
+ return -static_cast<int16>(0x10000u - unsigned_value);
+ } else {
+ return static_cast<int16>(unsigned_value);
+ }
+}
+
+uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status) {
+ if (input.remainingLength() < 2) {
+ if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
+ *status = RANGE_CHECKED_BYTE_ERROR;
+ }
+ // Return an arbitrary value.
+ return 0;
+ }
+ if (big_endian) {
+ return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
+ } else {
+ return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
+ }
+}
+
+int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status) {
+ const uint32 unsigned_value = Get32u(input, big_endian, status);
+ if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
+ // Return an arbitrary value.
+ return 0;
+ }
+
+ // Convert the two's-complement signed integer encoded in 'unsigned_value'
+ // into a signed representation in the implementation's native representation
+ // for signed integers.
+ // For all practical purposes, the same result could be obtained simply by
+ // casting unsigned_value to int32; the result of this is
+ // implementation-defined, but on all of the platforms we care about, it does
+ // what we want.
+ // The code below, however, arguably has the aesthetic advantage of being
+ // independent of the representation for signed integers chosen by the
+ // implementation, as long as 'int' and 'unsigned' have the required range to
+ // represent all of the required values.
+ // An optimized Blaze build (x64) compiles all of the following code to a
+ // no-op (as of this writing); i.e. the value that Get32u() returned in %eax
+ // is left unchanged.
+ if (unsigned_value == 0x80000000u) {
+ // Read here on why the constant expression is written this way:
+ // http://stackoverflow.com/questions/14695118
+ return -0x7fffffff - 1;
+ } else if (unsigned_value > 0x80000000u) {
+ // The expression
+ // 0xffffffffu - unsigned_value + 1
+ // is a portable way of flipping the sign of a twos-complement signed
+ // integer whose binary representation is stored in an unsigned integer.
+ // '0xffffffffu + 1' is used in preference to simply '0' because it makes
+ // it clearer that the correct result will be obtained even if an int is
+ // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
+ // 'unsigned_value' to prevent the compiler from warning about an
+ // integral constant overflow. ('0' would produce the correct result in
+ // this case too but would rely in a more subtle way on the rules for
+ // unsigned wraparound.)
+ return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
+ } else {
+ return static_cast<int32>(unsigned_value);
+ }
+}
+
+uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status) {
+ if (input.remainingLength() < 4) {
+ if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
+ *status = RANGE_CHECKED_BYTE_ERROR;
+ }
+ // Return an arbitrary value.
+ return 0;
+ }
+ if (big_endian) {
+ return (static_cast<uint32>(input[0]) << 24) |
+ (static_cast<uint32>(input[1]) << 16) |
+ (static_cast<uint32>(input[2]) << 8) |
+ (static_cast<uint32>(input[3]) << 0);
+ } else {
+ return (static_cast<uint32>(input[3]) << 24) |
+ (static_cast<uint32>(input[2]) << 16) |
+ (static_cast<uint32>(input[1]) << 8) |
+ (static_cast<uint32>(input[0]) << 0);
+ }
+}
+
+} // namespace binary_parse
+} // namespace piex