aboutsummaryrefslogtreecommitdiff
path: root/src/binary_parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/binary_parse')
-rwxr-xr-xsrc/binary_parse/cached_paged_byte_array.cc79
-rwxr-xr-xsrc/binary_parse/cached_paged_byte_array.h73
-rwxr-xr-xsrc/binary_parse/range_checked_byte_ptr.cc402
-rwxr-xr-xsrc/binary_parse/range_checked_byte_ptr.h609
4 files changed, 1163 insertions, 0 deletions
diff --git a/src/binary_parse/cached_paged_byte_array.cc b/src/binary_parse/cached_paged_byte_array.cc
new file mode 100755
index 0000000..83b6c03
--- /dev/null
+++ b/src/binary_parse/cached_paged_byte_array.cc
@@ -0,0 +1,79 @@
+// Copyright 2015 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// The cache layer works as follows:
+// The cache is implemented as a vector (of size 'cache_size') of shared
+// pointers to pages recently used. The least recently used page is stored
+// at the begining of the vector, the most recent at the end.
+
+#include "src/binary_parse/cached_paged_byte_array.h"
+
+#include <cstddef>
+
+namespace piex {
+namespace binary_parse {
+
+CachedPagedByteArray::CachedPagedByteArray(
+ const PagedByteArray* paged_byte_array, size_t cache_size)
+ : paged_byte_array_(paged_byte_array), cache_size_(cache_size) {}
+
+void CachedPagedByteArray::getPage(size_t page_index,
+ const unsigned char** begin,
+ const unsigned char** end,
+ PagedByteArray::PagePtr* page) const {
+ std::lock_guard<std::mutex> lock(mutex_);
+ size_t cache_index;
+ if (getFromCache(page_index, &cache_index)) {
+ // Cache hit, retrieve the page from the cache.
+ *begin = cached_pages_[cache_index].begin;
+ *end = cached_pages_[cache_index].end;
+ *page = cached_pages_[cache_index].page;
+
+ // Remove the page to insert it at the end of the cache later.
+ cached_pages_.erase(cached_pages_.begin() +
+ static_cast<std::ptrdiff_t>(cache_index));
+ } else {
+ // Cache miss, ask PagedByteArray to load the page.
+ paged_byte_array_->getPage(page_index, begin, end, page);
+
+ // If the cache is full, remove the first (least recently used) page.
+ if (cached_pages_.size() >= cache_size_) {
+ cached_pages_.erase(cached_pages_.begin());
+ }
+ }
+
+ // Cache the most recently used page to the end of the vector.
+ CachedPage cache_page;
+ cache_page.index = page_index;
+ cache_page.page = *page;
+ cache_page.begin = *begin;
+ cache_page.end = *end;
+ cached_pages_.push_back(cache_page);
+}
+
+bool CachedPagedByteArray::getFromCache(size_t page_index,
+ size_t* cache_index) const {
+ for (size_t i = 0; i < cached_pages_.size(); ++i) {
+ if (cached_pages_[i].index == page_index) {
+ *cache_index = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace binary_parse
+} // namespace piex
diff --git a/src/binary_parse/cached_paged_byte_array.h b/src/binary_parse/cached_paged_byte_array.h
new file mode 100755
index 0000000..26f0eae
--- /dev/null
+++ b/src/binary_parse/cached_paged_byte_array.h
@@ -0,0 +1,73 @@
+// Copyright 2015 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// LRU cache decorator for binary_parse::PagedByteArray subclasses.
+
+#ifndef PIEX_BINARY_PARSE_CACHED_PAGED_BYTE_ARRAY_H_
+#define PIEX_BINARY_PARSE_CACHED_PAGED_BYTE_ARRAY_H_
+
+#include <mutex>
+#include <vector>
+
+#if !defined(WIN32_LEAN_AND_MEAN)
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include "src/binary_parse/range_checked_byte_ptr.h"
+
+namespace piex {
+namespace binary_parse {
+
+class CachedPagedByteArray : public PagedByteArray {
+ public:
+ // Decorates 'paged_byte_array' with a LRU cache layer of the size
+ // 'cache_size'.
+ explicit CachedPagedByteArray(const PagedByteArray* paged_byte_array,
+ size_t cache_size);
+
+ virtual size_t length() const { return paged_byte_array_->length(); }
+
+ virtual size_t pageSize() const { return paged_byte_array_->pageSize(); }
+
+ virtual void getPage(size_t page_index, const unsigned char** begin,
+ const unsigned char** end,
+ PagedByteArray::PagePtr* page) const;
+
+ private:
+ struct CachedPage {
+ size_t index;
+ PagedByteArray::PagePtr page;
+ const unsigned char* begin;
+ const unsigned char* end;
+ };
+
+ // Disallow copy construction and assignment.
+ CachedPagedByteArray(const CachedPagedByteArray&);
+ void operator=(const CachedPagedByteArray&);
+
+ // Gets the index of the page if it is in the cache and returns true, else
+ // returns false.
+ bool getFromCache(size_t page_index, size_t* cache_index) const;
+
+ mutable std::mutex mutex_;
+ const PagedByteArray* paged_byte_array_;
+ const size_t cache_size_;
+ mutable std::vector<CachedPage> cached_pages_;
+};
+
+} // namespace binary_parse
+} // namespace piex
+
+#endif // PIEX_BINARY_PARSE_CACHED_PAGED_BYTE_ARRAY_H_
diff --git a/src/binary_parse/range_checked_byte_ptr.cc b/src/binary_parse/range_checked_byte_ptr.cc
new file mode 100755
index 0000000..bbfdee2
--- /dev/null
+++ b/src/binary_parse/range_checked_byte_ptr.cc
@@ -0,0 +1,402 @@
+// Copyright 2015 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "src/binary_parse/range_checked_byte_ptr.h"
+
+#include <assert.h>
+#include <cstddef>
+#include <cstring>
+
+namespace piex {
+namespace binary_parse {
+
+#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
+#define BREAK_IF_DEBUGGING() assert(false)
+#else
+#define BREAK_IF_DEBUGGING() assert(true)
+#endif
+
+namespace {
+class MemoryPagedByteArray : public PagedByteArray {
+ public:
+ MemoryPagedByteArray(const unsigned char *buffer, const size_t len);
+
+ virtual size_t length() const;
+ virtual size_t pageSize() const;
+ virtual void getPage(size_t page_index, const unsigned char **begin,
+ const unsigned char **end, PagePtr *page) const;
+
+ private:
+ const unsigned char *buffer_;
+ const size_t len_;
+};
+
+MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
+ const size_t len)
+ : buffer_(buffer), len_(len) {}
+
+size_t MemoryPagedByteArray::length() const { return len_; }
+
+size_t MemoryPagedByteArray::pageSize() const { return len_; }
+
+void MemoryPagedByteArray::getPage(size_t /* page_index */,
+ const unsigned char **begin,
+ const unsigned char **end,
+ PagePtr *page) const {
+ *begin = buffer_;
+ *end = buffer_ + len_;
+ *page = PagePtr();
+}
+
+// A functor that does nothing. This is used as a no-op shared pointer
+// deallocator below.
+class NullFunctor {
+ public:
+ void operator()() {}
+ void operator()(PagedByteArray * /* p */) const {}
+};
+} // namespace
+
+PagedByteArray::~PagedByteArray() {}
+
+RangeCheckedBytePtr::RangeCheckedBytePtr()
+ : array_(),
+ page_data_(NULL),
+ current_pos_(0),
+ sub_array_begin_(0),
+ sub_array_end_(0),
+ page_begin_offset_(0),
+ current_page_len_(0),
+ error_flag_(RANGE_CHECKED_BYTE_ERROR) {}
+
+RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
+ const size_t len)
+ : array_(new MemoryPagedByteArray(array, len)),
+ page_data_(NULL),
+ current_pos_(0),
+ sub_array_begin_(0),
+ sub_array_end_(len),
+ page_begin_offset_(0),
+ current_page_len_(0),
+ error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
+ assert(array);
+ if (array == NULL) {
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR;
+ }
+}
+
+RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
+ : array_(array, NullFunctor()),
+ page_data_(NULL),
+ current_pos_(0),
+ sub_array_begin_(0),
+ sub_array_end_(array->length()),
+ page_begin_offset_(0),
+ current_page_len_(0),
+ error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}
+
+RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
+ return RangeCheckedBytePtr();
+}
+
+RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
+ size_t pos, size_t length) const {
+ RangeCheckedBytePtr sub_result = (*this) + pos;
+ if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
+ sub_result.sub_array_begin_ = sub_result.current_pos_;
+ sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;
+
+ // Restrict the boundaries of the current page to the newly set sub-array.
+ sub_result.restrictPageToSubArray();
+
+ return sub_result;
+ } else {
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR;
+ return invalidPointer();
+ }
+}
+
+size_t RangeCheckedBytePtr::offsetInArray() const {
+ // sub_array_begin_ <= current_pos_ is a class invariant, but protect
+ // against violations of this invariant.
+ if (sub_array_begin_ <= current_pos_) {
+ return current_pos_ - sub_array_begin_;
+ } else {
+ assert(false);
+ return 0;
+ }
+}
+
+std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
+ std::vector<unsigned char> bytes = extractBytes(pos, length);
+ std::string result;
+ result.reserve(bytes.size());
+ for (size_t i = 0; i < bytes.size(); ++i) {
+ result.push_back(static_cast<char>(bytes[i]));
+ }
+ return result;
+}
+
+std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
+ size_t pos, size_t length) const {
+ std::vector<unsigned char> result;
+ if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
+ BREAK_IF_DEBUGGING();
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
+ return result;
+ }
+ result.reserve(length);
+ for (size_t i = 0; i < length; ++i) {
+ result.push_back((*this)[pos + i]);
+ }
+ return result;
+}
+
+bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
+ if (x.array_ != y.array_) {
+ assert(false);
+ return false;
+ }
+
+ return x.current_pos_ == y.current_pos_;
+}
+
+bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
+ return !(x == y);
+}
+
+void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
+ // The offset should always lie within the bounds of the sub-array (this
+ // condition is enforced at the callsite). However, even if the offset lies
+ // outside the sub-array, the restrictPageToSubArray() call at the end
+ // ensures that the object is left in a consistent state that maintains the
+ // class invariants.
+ assert(offset >= sub_array_begin_ && offset < sub_array_end_);
+
+ // Ensure that offset lies within the array.
+ if (offset >= array_->length()) {
+ assert(false);
+ return;
+ }
+
+ // Determine the index of the page to request.
+ size_t page_index = offset / array_->pageSize();
+
+ // Get the page.
+ const unsigned char *page_begin;
+ const unsigned char *page_end;
+ array_->getPage(page_index, &page_begin, &page_end, &page_);
+
+ // Ensure that the page has the expected length (as specified in the
+ // PagedByteArray interface).
+ size_t expected_page_size = array_->pageSize();
+ if (page_index == (array_->length() - 1) / array_->pageSize()) {
+ expected_page_size = array_->length() - array_->pageSize() * page_index;
+ }
+ if ((page_end < page_begin) ||
+ (static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
+ assert(false);
+ return;
+ }
+
+ // Remember information about page.
+ page_data_ = page_begin;
+ page_begin_offset_ = page_index * array_->pageSize();
+ current_page_len_ = static_cast<size_t>(page_end - page_begin);
+
+ // Restrict the boundaries of the page to lie within the sub-array.
+ restrictPageToSubArray();
+}
+
+void RangeCheckedBytePtr::restrictPageToSubArray() const {
+ // Restrict the current page's boundaries so that it is always contained
+ // completely within the extent of the sub-array.
+ // This function is purposely designed to work correctly in the following
+ // two special cases:
+ // a) The current page lies entirely outside the sub-array. In this case,
+ // current_page_len_ will be set to zero. page_data_ may either remain
+ // unchanged or may be changed to point one element beyond the end of the
+ // page, depending on whether the current page lies before or after the
+ // sub-array.
+ // b) The current page is in the state as initialized by the constructor
+ // (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
+ // page_data_ and current_page_len_ will remain unchanged.
+
+ // Does the beginning of the page lie before the beginning of the sub-array?
+ if (page_begin_offset_ < sub_array_begin_) {
+ // Compute amount by which to shorten page.
+ size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
+ if (amount_to_shorten > current_page_len_) {
+ amount_to_shorten = current_page_len_;
+ }
+
+ // Adjust beginning of page accordingly.
+ page_begin_offset_ += amount_to_shorten;
+ page_data_ += amount_to_shorten;
+ current_page_len_ -= amount_to_shorten;
+ }
+
+ // Does the end of the page lie beyond the end of the sub-array?
+ if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
+ // Reduce length of page accordingly.
+ size_t new_len = sub_array_end_ - page_begin_offset_;
+ if (new_len > current_page_len_) {
+ new_len = current_page_len_;
+ }
+ current_page_len_ = new_len;
+ }
+}
+
+int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
+ size_t num) {
+ std::vector<unsigned char> x_vec = x.extractBytes(0, num);
+ std::vector<unsigned char> y_vec = y.extractBytes(0, num);
+
+ if (!x.errorOccurred() && !y.errorOccurred()) {
+ return ::memcmp(&x_vec[0], &y_vec[0], num);
+ } else {
+ // return an arbitrary value
+ return -1;
+ }
+}
+
+int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
+ std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());
+
+ if (!x.errorOccurred()) {
+ return ::memcmp(&x_vec[0], y.c_str(), y.length());
+ } else {
+ // return an arbitrary value
+ return -1;
+ }
+}
+
+size_t strlen(const RangeCheckedBytePtr &src) {
+ size_t len = 0;
+ RangeCheckedBytePtr str = src;
+ while (!str.errorOccurred() && (str[0] != '\0')) {
+ str++;
+ len++;
+ }
+ return len;
+}
+
+int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status) {
+ const uint16 unsigned_value = Get16u(input, big_endian, status);
+ if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
+ // Return an arbitrary value.
+ return 0;
+ }
+
+ // Convert the two's-complement signed integer encoded in 'unsigned_value'
+ // into a signed representation in the implementation's native representation
+ // for signed integers. An optimized Blaze build (x64) compiles all of the
+ // following code to a no-op (as of this writing).
+ // For further details, see the corresponding comment in Get32s().
+ if (unsigned_value == 0x8000u) {
+ return static_cast<int16>(-0x8000);
+ } else if (unsigned_value > 0x8000u) {
+ return -static_cast<int16>(0x10000u - unsigned_value);
+ } else {
+ return static_cast<int16>(unsigned_value);
+ }
+}
+
+uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status) {
+ if (input.remainingLength() < 2) {
+ if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
+ *status = RANGE_CHECKED_BYTE_ERROR;
+ }
+ // Return an arbitrary value.
+ return 0;
+ }
+ if (big_endian) {
+ return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
+ } else {
+ return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
+ }
+}
+
+int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status) {
+ const uint32 unsigned_value = Get32u(input, big_endian, status);
+ if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
+ // Return an arbitrary value.
+ return 0;
+ }
+
+ // Convert the two's-complement signed integer encoded in 'unsigned_value'
+ // into a signed representation in the implementation's native representation
+ // for signed integers.
+ // For all practical purposes, the same result could be obtained simply by
+ // casting unsigned_value to int32; the result of this is
+ // implementation-defined, but on all of the platforms we care about, it does
+ // what we want.
+ // The code below, however, arguably has the aesthetic advantage of being
+ // independent of the representation for signed integers chosen by the
+ // implementation, as long as 'int' and 'unsigned' have the required range to
+ // represent all of the required values.
+ // An optimized Blaze build (x64) compiles all of the following code to a
+ // no-op (as of this writing); i.e. the value that Get32u() returned in %eax
+ // is left unchanged.
+ if (unsigned_value == 0x80000000u) {
+ // Read here on why the constant expression is written this way:
+ // http://stackoverflow.com/questions/14695118
+ return -0x7fffffff - 1;
+ } else if (unsigned_value > 0x80000000u) {
+ // The expression
+ // 0xffffffffu - unsigned_value + 1
+ // is a portable way of flipping the sign of a twos-complement signed
+ // integer whose binary representation is stored in an unsigned integer.
+ // '0xffffffffu + 1' is used in preference to simply '0' because it makes
+ // it clearer that the correct result will be obtained even if an int is
+ // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
+ // 'unsigned_value' to prevent the compiler from warning about an
+ // integral constant overflow. ('0' would produce the correct result in
+ // this case too but would rely in a more subtle way on the rules for
+ // unsigned wraparound.)
+ return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
+ } else {
+ return static_cast<int32>(unsigned_value);
+ }
+}
+
+uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status) {
+ if (input.remainingLength() < 4) {
+ if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
+ *status = RANGE_CHECKED_BYTE_ERROR;
+ }
+ // Return an arbitrary value.
+ return 0;
+ }
+ if (big_endian) {
+ return (static_cast<uint32>(input[0]) << 24) |
+ (static_cast<uint32>(input[1]) << 16) |
+ (static_cast<uint32>(input[2]) << 8) |
+ (static_cast<uint32>(input[3]) << 0);
+ } else {
+ return (static_cast<uint32>(input[3]) << 24) |
+ (static_cast<uint32>(input[2]) << 16) |
+ (static_cast<uint32>(input[1]) << 8) |
+ (static_cast<uint32>(input[0]) << 0);
+ }
+}
+
+} // namespace binary_parse
+} // namespace piex
diff --git a/src/binary_parse/range_checked_byte_ptr.h b/src/binary_parse/range_checked_byte_ptr.h
new file mode 100755
index 0000000..a0eadbb
--- /dev/null
+++ b/src/binary_parse/range_checked_byte_ptr.h
@@ -0,0 +1,609 @@
+// Copyright 2015 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_
+#define PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_
+
+#include <assert.h>
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace piex {
+namespace binary_parse {
+
+// Since NaCl does not comply to C++11 we can not just use stdint.h.
+typedef unsigned short uint16; // NOLINT
+typedef short int16; // NOLINT
+typedef unsigned int uint32;
+typedef int int32;
+
+enum MemoryStatus {
+ RANGE_CHECKED_BYTE_SUCCESS = 0,
+ RANGE_CHECKED_BYTE_ERROR = 1,
+ RANGE_CHECKED_BYTE_ERROR_OVERFLOW = 2,
+ RANGE_CHECKED_BYTE_ERROR_UNDERFLOW = 3,
+};
+
+// Interface that RangeCheckedBytePtr uses to access the underlying array of
+// bytes. This allows RangeCheckedBytePtr to be used to access data as if it
+// were stored contiguously in memory, even if the data is in fact split up
+// into non-contiguous chunks and / or does not reside in memory.
+//
+// The only requirement is that the data can be read in pages of a fixed (but
+// configurable) size. Notionally, the byte array (which contains length()
+// bytes) is split up into non-overlapping pages of pageSize() bytes each.
+// (The last page may be shorter if length() is not a multiple of pageSize().)
+// There are therefore (length() - 1) / pageSize() + 1 such pages, with indexes
+// 0 through (length() - 1) / pageSize(). Page i contains the bytes from offset
+// i * pageSize() in the array up to and including the byte at offset
+// (i + 1) * pageSize() - 1 (or, in the case of the last page, length() - 1).
+//
+// In essence, RangeCheckedBytePtr and PagedByteArray together provide a poor
+// man's virtual-memory-and-memory-mapped-file work-alike in situations where
+// virtual memory cannot be used or would consume too much virtual address
+// space.
+//
+// Thread safety: In general, subclasses implementing this interface should
+// ensure that the member functions are thread-safe. It will then be safe to
+// access the same array from multiple threads. (Note that RangeCheckedBytePtr
+// itself is not thread-safe in the sense that a single instance of
+// RangeCheckedBytePtr cannot be used concurrently from multiple threads; it
+// is, however, safe to use different RangeCheckedBytePtr instances in
+// different threads to access the same PagedByteArray concurrently, assuming
+// that the PagedByteArray implementation is thread-safe.)
+class PagedByteArray {
+ public:
+ // Base class for pages in the byte array. Implementations of PagedByteArray
+ // can create a subclass of the Page class to manage the lifetime of buffers
+ // associated with a page returned by getPage(). For example, a
+ // PagedByteArray backed by a file might define a Page subclass like this:
+ //
+ // class FilePage : public Page {
+ // std::vector<unsigned char> bytes;
+ // };
+ //
+ // The corresponding getPage() implementation could then look like this:
+ //
+ // void getPage(size_t page_index, const unsigned char** begin,
+ // const unsigned char** end, std::shared_ptr<Page>* page)
+ // {
+ // // Create a new page.
+ // std::shared_ptr<FilePage> file_page(new FilePage());
+ //
+ // // Read contents of page from file into file_page->bytes.
+ // [...]
+ //
+ // // Set *begin and *end to point to beginning and end of
+ // // file_page->bytes vector.
+ // *begin = &file_page->bytes[0];
+ // *end = *begin + file_page->bytes.size();
+ //
+ // // Return page to caller
+ // *page = file_page;
+ // }
+ //
+ // In this way, the storage associated with the page (the FilePage::bytes
+ // vector) will be kept alive until the RangeCheckedBytePtr releases the
+ // shared pointer.
+ class Page {};
+
+ typedef std::shared_ptr<Page> PagePtr;
+
+ virtual ~PagedByteArray();
+
+ // Returns the length of the array in bytes. The value returned must remain
+ // the same on every call for the entire lifetime of the object.
+ virtual size_t length() const = 0;
+
+ // Returns the length of each page in bytes. (The last page may be shorter
+ // than pageSize() if length() is not a multiple of pageSize() -- see also
+ // the class-wide comment above.) The value returned must remain the same on
+ // every call for the entire lifetime of the object.
+ virtual size_t pageSize() const = 0;
+
+ // Returns a pointer to a memory buffer containing the data for the page
+ // with index "page_index".
+ //
+ // *begin is set to point to the first byte of the page; *end is set to point
+ // one byte beyond the last byte in the page. This implies that:
+ // - (*end - *begin) == pageSize() for every page except the last page
+ // - (*end - *begin) == length() - pageSize() * ((length() - 1) / pageSize())
+ // for the last page
+ //
+ // *page will be set to a SharedPtr that the caller will hold on to until
+ // it no longer needs to access the memory buffer. The memory buffer will
+ // remain valid until the SharedPtr is released or the PagedByteArray object
+ // is destroyed. An implementation may choose to return a null SharedPtr;
+ // this indicates that the memory buffer will remain valid until the
+ // PagedByteArray object is destroyed, even if the caller does not hold on to
+ // the SharedPtr. (This is intended as an optimization that some
+ // implementations may choose to take advantage of, as a null SharedPtr is
+ // cheaper to copy.)
+ virtual void getPage(size_t page_index, const unsigned char **begin,
+ const unsigned char **end, PagePtr *page) const = 0;
+};
+
+typedef std::shared_ptr<PagedByteArray> PagedByteArrayPtr;
+
+// Smart pointer that has the same semantics as a "const unsigned char *" (plus
+// some convenience functions) but provides range checking and the ability to
+// access arrays that are not contiguous in memory or do not reside entirely in
+// memory (through the PagedByteArray interface).
+//
+// In the following, we abbreviate RangeCheckedBytePtr as RCBP.
+//
+// The intent of this class is to allow easy security hardening of code that
+// parses binary data structures using raw byte pointers. To do this, only the
+// declarations of the pointers need to be changed; the code that uses the
+// pointers can remain unchanged.
+//
+// If an illegal operation occurs on a pointer, an error flag is set, and all
+// read operations from this point on return 0. This means that error checking
+// need not be done after every access; it is sufficient to check the error flag
+// (using errorOccurred()) once before the RCBP is destroyed. Again, this allows
+// the majority of the parsing code to remain unchanged. (Note caveats below
+// that apply if a copy of the pointer is created.)
+//
+// Legal operations are exactly the ones that would be legal on a raw C++
+// pointer. Read accesses are legal if they fall within the underlying array. A
+// RCBP may point to any element in the underlying array or one element beyond
+// the end of the array.
+//
+// For brevity, the documentation for individual member functions does not state
+// explicitly that the error flag will be set on out-of-range operations.
+//
+// Note:
+//
+// - Just as for raw pointers, it is legal for a pointer to point one element
+// beyond the end of the array, but it is illegal to use operator*() on such a
+// pointer.
+//
+// - If a copy of an RCBP is created, then performing illegal operations on the
+// copy affects the error flag of the copy, but not of the original pointer.
+// Note that using operator+ and operator- also creates a copy of the pointer.
+// For example:
+//
+// // Assume we have an RCBP called "p" and a size_t variable called
+// // "offset".
+// RangeCheckedBytePtr sub_data_structure = p + offset;
+//
+// If "offset" is large enough to cause an out-of-range access, then
+// sub_data_structure.errorOccurred() will be true, but p.errorOccurred() will
+// still be false. The error flag for sub_data_structure therefore needs to be
+// checked before it is destroyed.
+class RangeCheckedBytePtr {
+ private:
+ // This class maintains the following class invariants:
+ // - page_data_ always points to a buffer of at least current_page_len_
+ // bytes.
+ //
+ // - The current position lies within the sub-array, i.e.
+ // sub_array_begin_ <= current_pos_ <= sub_array_end_
+ //
+ // - The sub-array is entirely contained within the array, i.e.
+ // 0 <= sub_array_begin <= sub_array_end <= array_->length()
+ //
+ // - If the current page is non-empty, it lies completely within the
+ // sub-array, i.e.
+ // if _current_page_len_ >= 0, then
+ // sub_array_begin_ <= page_begin_offset_
+ // and
+ // page_begin_offset_ + current_page_len_ <= sub_array_end_
+ // (See also restrictPageToSubArray().)
+ // (If _current_page_len_ == 0, then page_begin_offset_ may lie outside
+ // the sub-array; this condition is harmless. Additional logic would be
+ // required to make page_begin_offset_ lie within the sub-array in this
+ // case, and it would serve no purpose other than to make the invariant
+ // slightly simpler.)
+ //
+ // Note that it is _not_ a class invariant that current_pos_ needs to lie
+ // within the current page. Making this an invariant would have two
+ // undesirable consequences:
+ // a) When operator[] is called with an index that lies beyond the end of
+ // the current page, it would need to temporarily load the page that
+ // contains this index, but it wouldn't be able to "retain" the page
+ // (i.e. make it the current page) because that would violate the
+ // proposed invariant. This would lead to inefficient behavior in the
+ // case where code accesses a large range of indices beyond the end of
+ // the page because a page would need to be loaded temporarily on each
+ // access.
+ // b) It would require more code: loadPageForOffset() would need to be
+ // called anywhere that current_pos_ changes (whereas, with the present
+ // approach, loadPageForOffset() is only called in operator[]).
+
+ // PagedByteArray that is accessed by this pointer.
+ PagedByteArrayPtr array_;
+
+ // Pointer to the current page.
+ mutable PagedByteArray::PagePtr page_;
+
+ // Pointer to the current page's data buffer.
+ mutable const unsigned char *page_data_;
+
+ // All of the following offsets are defined relative to the beginning of
+ // the array defined by the PagedByteArray array_.
+
+ // Array offset that the pointer points to.
+ size_t current_pos_;
+
+ // Start offset of the current sub-array.
+ size_t sub_array_begin_;
+
+ // End offset of the current sub-array.
+ size_t sub_array_end_;
+
+ // Array offset corresponding to the "page_data_" pointer.
+ mutable size_t page_begin_offset_;
+
+ // Length of the current page.
+ mutable size_t current_page_len_;
+
+ // Error flag. This is mutable because methods that don't affect the value
+ // of the pointer itself (such as operator[]) nevertheless need to be able to
+ // signal error conditions.
+ mutable MemoryStatus error_flag_;
+
+ RangeCheckedBytePtr();
+
+ public:
+ // Creates a pointer that points to the first element of 'array', which has a
+ // length of 'len'. The caller must ensure that the array remains valid until
+ // this pointer and any pointers created from it have been destroyed.
+ // Note: 'len' may be zero, but 'array' must in this case still be a valid,
+ // non-null pointer.
+ explicit RangeCheckedBytePtr(const unsigned char *array, const size_t len);
+
+ // Creates a pointer that points to the first element of the given
+ // PagedByteArray. The caller must ensure that this PagedByteArray remains
+ // valid until this pointer and any pointers created from it have been
+ // destroyed.
+ explicit RangeCheckedBytePtr(PagedByteArray *array);
+
+ // Creates an invalid RangeCheckedBytePtr. Calling errorOccurred() on the
+ // result of invalidPointer() always returns true.
+ // Do not check a RangeCheckedBytePtr for validity by comparing against
+ // invalidPointer(); use errorOccurred() instead.
+ static RangeCheckedBytePtr invalidPointer();
+
+ // Returns a RangeCheckedBytePtr that points to a sub-array of this pointer's
+ // underlying array. The sub-array starts at position 'pos' relative to this
+ // pointer and is 'length' bytes long. The sub-array must lie within this
+ // pointer's array, i.e. pos + length <= remainingLength() must hold. If this
+ // condition is violated, an invalid pointer is returned.
+ RangeCheckedBytePtr pointerToSubArray(size_t pos, size_t length) const;
+
+ // Returns the number of bytes remaining in the array from this pointer's
+ // present position.
+ inline size_t remainingLength() const;
+
+ // Returns the offset (or index) in the underlying array that this pointer
+ // points to. If this pointer was created using pointerToSubArray(), the
+ // offset is relative to the beginning of the sub-array (and not relative to
+ // the beginning of the original array).
+ size_t offsetInArray() const;
+
+ // Returns whether an out-of-bounds error has ever occurred on this pointer in
+ // the past. An error occurs if a caller attempts to read from a position
+ // outside the bounds of the array or to move the pointer outside the bounds
+ // of the array.
+ //
+ // The error flag is never reset. Once an error has occurred,
+ // all subsequent attempts to read from the pointer (even within the bounds of
+ // the array) return 0.
+ //
+ // Note that it is permissible for a pointer to point one element past the end
+ // of the array, but it is not permissible to read from this position. This is
+ // equivalent to the semantics of raw C++ pointers.
+ inline bool errorOccurred() const;
+
+ // Returns the substring of length 'length' located at position 'pos' relative
+ // to this pointer.
+ std::string substr(size_t pos, size_t length) const;
+
+ // Returns 'length' number of bytes from the array starting at position 'pos'
+ // relative to this pointer.
+ std::vector<unsigned char> extractBytes(size_t pos, size_t length) const;
+
+ // Equivalent to calling convert(0, output).
+ template <class T>
+ bool convert(T *output) const {
+ union {
+ T t;
+ unsigned char ch[sizeof(T)];
+ } buffer;
+ for (size_t i = 0; i < sizeof(T); i++) {
+ buffer.ch[i] = (*this)[i];
+ }
+ if (!errorOccurred()) {
+ *output = buffer.t;
+ }
+ return !errorOccurred();
+ }
+
+ // Reinterprets this pointer as a pointer to an array of T, then returns the
+ // element at position 'index' in this array of T. (Note that this position
+ // corresponds to position index * sizeof(T) in the underlying byte array.)
+ //
+ // Returns true if successful; false if an out-of-range error occurred or if
+ // the error flag was already set on the pointer when calling convert().
+ //
+ // The conversion from a sequence of sizeof(T) bytes to a T is performed in an
+ // implementation-defined fashion. This conversion is equivalent to the one
+ // obtained using the following union by filling the array 'ch' and then
+ // reading the member 't':
+ //
+ // union {
+ // T t;
+ // unsigned char ch[sizeof(T)];
+ // };
+ //
+ // Callers should note that, among other things, the conversion is not
+ // endian-agnostic with respect to the endianness of T.
+ template <class T>
+ bool convert(size_t index, T *output) const {
+ RangeCheckedBytePtr p = (*this) + index * sizeof(T);
+ bool valid = p.convert(output);
+ if (!valid) {
+ error_flag_ = p.error_flag_;
+ }
+ return valid;
+ }
+
+ // Operators. Unless otherwise noted, these operators have the same semantics
+ // as the same operators on an unsigned char pointer.
+
+ // If an out-of-range access is attempted, returns 0 (and sets the error
+ // flag).
+ inline unsigned char operator[](size_t i) const;
+
+ inline unsigned char operator*() const;
+
+ inline RangeCheckedBytePtr &operator++();
+
+ inline RangeCheckedBytePtr operator++(int);
+
+ inline RangeCheckedBytePtr &operator--();
+
+ inline RangeCheckedBytePtr operator--(int);
+
+ inline RangeCheckedBytePtr &operator+=(size_t x);
+
+ inline RangeCheckedBytePtr &operator-=(size_t x);
+
+ inline friend RangeCheckedBytePtr operator+(const RangeCheckedBytePtr &p,
+ size_t x);
+
+ inline friend RangeCheckedBytePtr operator-(const RangeCheckedBytePtr &p,
+ size_t x);
+
+ // Tests whether x and y point at the same position in the underlying array.
+ // Two pointers that point at the same position but have different
+ // sub-arrays still compare equal. It is not legal to compare two pointers
+ // that point into different paged byte arrays.
+ friend bool operator==(const RangeCheckedBytePtr &x,
+ const RangeCheckedBytePtr &y);
+
+ // Returns !(x == y).
+ friend bool operator!=(const RangeCheckedBytePtr &x,
+ const RangeCheckedBytePtr &y);
+
+ private:
+ void loadPageForOffset(size_t offset) const;
+ void restrictPageToSubArray() const;
+};
+
+// Returns the result of calling std::memcmp() on the sequences of 'num' bytes
+// pointed to by 'x' and 'y'. The result is undefined if either
+// x.remainingLength() or y.remainingLength() is less than 'num'.
+int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
+ size_t num);
+
+// Returns the result of calling std::memcmp() (note: _not_ strcmp()) on the
+// y.length() number of bytes pointed to by 'x' and the string 'y'. The result
+// is undefined if x.remainingLength() is less than y.length().
+int strcmp(const RangeCheckedBytePtr &x, const std::string &y);
+
+// Returns the length of the zero-terminated string starting at 'src' (not
+// including the '\0' terminator). If no '\0' occurs before the end of the
+// array, the result is undefined.
+size_t strlen(const RangeCheckedBytePtr &src);
+
+// Integer decoding functions.
+//
+// These functions read signed (Get16s, Get32s) or unsigned (Get16u, Get32u)
+// integers from 'input'. The integer read from the input can be specified to be
+// either big-endian (big_endian == true) or little-endian
+// (little_endian == false). Signed integers are read in two's-complement
+// representation. The integer read in the specified format is then converted to
+// the implementation's native integer representation and returned. In other
+// words, the semantics of these functions are independent of the
+// implementation's endianness and signed integer representation.
+//
+// If an out-of-range error occurs, these functions do _not_ set the error flag
+// on 'input'. Instead, they set 'status' to RANGE_CHECKED_BYTE_ERROR and return
+// 0.
+//
+// Note:
+// - If an error occurs and 'status' is already set to an error value (i.e. a
+// value different from RANGE_CHECKED_BYTE_SUCCESS), the value of 'status' is
+// left unchanged.
+// - If the operation is successful, 'status' is left unchanged (i.e. it is not
+// actively set to RANGE_CHECKED_BYTE_SUCCESS).
+//
+// Together, these two properties mean that these functions can be used to read
+// a number of integers in succession with only a single error check, like this:
+//
+// MemoryStatus status = RANGE_CHECKED_BYTE_SUCCESS;
+// int16 val1 = Get16s(input, false, &status);
+// int32 val2 = Get32s(input + 2, false, &status);
+// uint32 val3 = Get32u(input + 6, false, &status);
+// if (status != RANGE_CHECKED_BYTE_SUCCESS) {
+// // error handling
+// }
+int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status);
+uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status);
+int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status);
+uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
+ MemoryStatus *status);
+
+size_t RangeCheckedBytePtr::remainingLength() const {
+ if (!errorOccurred()) {
+ // current_pos_ <= sub_array_end_ is a class invariant, but protect
+ // against violations of this invariant.
+ if (current_pos_ <= sub_array_end_) {
+ return sub_array_end_ - current_pos_;
+ } else {
+ assert(false);
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+}
+
+bool RangeCheckedBytePtr::errorOccurred() const {
+ return error_flag_ != RANGE_CHECKED_BYTE_SUCCESS;
+}
+
+unsigned char RangeCheckedBytePtr::operator[](size_t i) const {
+ // Check that pointer doesn't have an error flag set.
+ if (!errorOccurred()) {
+ // Offset in array to read from.
+ const size_t read_offset = current_pos_ + i;
+
+ // Check for the common case first: The byte we want to read lies in the
+ // current page. For performance reasons, we don't check for the case
+ // "read_offset < page_begin_offset_" explicitly; if it occurs, it will
+ // lead to wraparound (which is well-defined for unsigned quantities), and
+ // this will cause the test "pos_in_page < current_page_len_" to fail.
+ size_t pos_in_page = read_offset - page_begin_offset_;
+ if (pos_in_page < current_page_len_) {
+ return page_data_[pos_in_page];
+ }
+
+ // Check that the offset we're trying to read lies within the sub-array
+ // we're allowed to access.
+ if (read_offset >= sub_array_begin_ && read_offset < sub_array_end_) {
+ // Read the page that contains the offset "read_offset".
+ loadPageForOffset(read_offset);
+
+ // Compute the position within the new page from which we need to read.
+ pos_in_page = read_offset - page_begin_offset_;
+
+ // After the call to loadPageForOffset(), read_offset must lie within
+ // the current page, and therefore pos_in_page must be less than the
+ // length of the page. We nevertheless check for this to protect against
+ // potential bugs in loadPageForOffset().
+ assert(pos_in_page < current_page_len_);
+ if (pos_in_page < current_page_len_) {
+ return page_data_[pos_in_page];
+ }
+ }
+ }
+
+// All error cases fall through to here.
+#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
+ assert(false);
+#endif
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
+ // return 0, which represents the invalid value
+ return static_cast<unsigned char>(0);
+}
+
+unsigned char RangeCheckedBytePtr::operator*() const { return (*this)[0]; }
+
+RangeCheckedBytePtr &RangeCheckedBytePtr::operator++() {
+ if (current_pos_ < sub_array_end_) {
+ current_pos_++;
+ } else {
+#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
+ assert(false);
+#endif
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
+ }
+ return *this;
+}
+
+RangeCheckedBytePtr RangeCheckedBytePtr::operator++(int) {
+ RangeCheckedBytePtr result(*this);
+ ++(*this);
+ return result;
+}
+
+RangeCheckedBytePtr &RangeCheckedBytePtr::operator--() {
+ if (current_pos_ > sub_array_begin_) {
+ current_pos_--;
+ } else {
+#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
+ assert(false);
+#endif
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR_UNDERFLOW;
+ }
+ return *this;
+}
+
+RangeCheckedBytePtr RangeCheckedBytePtr::operator--(int) {
+ RangeCheckedBytePtr result(*this);
+ --(*this);
+ return result;
+}
+
+RangeCheckedBytePtr &RangeCheckedBytePtr::operator+=(size_t x) {
+ if (remainingLength() >= x) {
+ current_pos_ += x;
+ } else {
+#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
+ assert(false);
+#endif
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
+ }
+ return *this;
+}
+
+RangeCheckedBytePtr &RangeCheckedBytePtr::operator-=(size_t x) {
+ if (x <= current_pos_ - sub_array_begin_) {
+ current_pos_ -= x;
+ } else {
+#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
+ assert(false);
+#endif
+ error_flag_ = RANGE_CHECKED_BYTE_ERROR_UNDERFLOW;
+ }
+ return *this;
+}
+
+RangeCheckedBytePtr operator+(const RangeCheckedBytePtr &p, size_t x) {
+ RangeCheckedBytePtr result(p);
+ result += x;
+ return result;
+}
+
+RangeCheckedBytePtr operator-(const RangeCheckedBytePtr &p, size_t x) {
+ RangeCheckedBytePtr result(p);
+ result -= x;
+ return result;
+}
+
+} // namespace binary_parse
+} // namespace piex
+
+#endif // PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_