aboutsummaryrefslogtreecommitdiff
path: root/src/binary_parse/range_checked_byte_ptr.cc
blob: bbfdee2fac536aab673c07e3296521a2804ac8e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
// Copyright 2015 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
////////////////////////////////////////////////////////////////////////////////

#include "src/binary_parse/range_checked_byte_ptr.h"

#include <assert.h>
#include <cstddef>
#include <cstring>

namespace piex {
namespace binary_parse {

#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
#define BREAK_IF_DEBUGGING() assert(false)
#else
#define BREAK_IF_DEBUGGING() assert(true)
#endif

namespace {
class MemoryPagedByteArray : public PagedByteArray {
 public:
  MemoryPagedByteArray(const unsigned char *buffer, const size_t len);

  virtual size_t length() const;
  virtual size_t pageSize() const;
  virtual void getPage(size_t page_index, const unsigned char **begin,
                       const unsigned char **end, PagePtr *page) const;

 private:
  const unsigned char *buffer_;
  const size_t len_;
};

MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
                                           const size_t len)
    : buffer_(buffer), len_(len) {}

size_t MemoryPagedByteArray::length() const { return len_; }

size_t MemoryPagedByteArray::pageSize() const { return len_; }

void MemoryPagedByteArray::getPage(size_t /* page_index */,
                                   const unsigned char **begin,
                                   const unsigned char **end,
                                   PagePtr *page) const {
  *begin = buffer_;
  *end = buffer_ + len_;
  *page = PagePtr();
}

// A functor that does nothing. This is used as a no-op shared pointer
// deallocator below.
class NullFunctor {
 public:
  void operator()() {}
  void operator()(PagedByteArray * /* p */) const {}
};
}  // namespace

PagedByteArray::~PagedByteArray() {}

RangeCheckedBytePtr::RangeCheckedBytePtr()
    : array_(),
      page_data_(NULL),
      current_pos_(0),
      sub_array_begin_(0),
      sub_array_end_(0),
      page_begin_offset_(0),
      current_page_len_(0),
      error_flag_(RANGE_CHECKED_BYTE_ERROR) {}

RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
                                         const size_t len)
    : array_(new MemoryPagedByteArray(array, len)),
      page_data_(NULL),
      current_pos_(0),
      sub_array_begin_(0),
      sub_array_end_(len),
      page_begin_offset_(0),
      current_page_len_(0),
      error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
  assert(array);
  if (array == NULL) {
    error_flag_ = RANGE_CHECKED_BYTE_ERROR;
  }
}

RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
    : array_(array, NullFunctor()),
      page_data_(NULL),
      current_pos_(0),
      sub_array_begin_(0),
      sub_array_end_(array->length()),
      page_begin_offset_(0),
      current_page_len_(0),
      error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}

RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
  return RangeCheckedBytePtr();
}

RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
    size_t pos, size_t length) const {
  RangeCheckedBytePtr sub_result = (*this) + pos;
  if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
    sub_result.sub_array_begin_ = sub_result.current_pos_;
    sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;

    // Restrict the boundaries of the current page to the newly set sub-array.
    sub_result.restrictPageToSubArray();

    return sub_result;
  } else {
    error_flag_ = RANGE_CHECKED_BYTE_ERROR;
    return invalidPointer();
  }
}

size_t RangeCheckedBytePtr::offsetInArray() const {
  // sub_array_begin_ <= current_pos_ is a class invariant, but protect
  // against violations of this invariant.
  if (sub_array_begin_ <= current_pos_) {
    return current_pos_ - sub_array_begin_;
  } else {
    assert(false);
    return 0;
  }
}

std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
  std::vector<unsigned char> bytes = extractBytes(pos, length);
  std::string result;
  result.reserve(bytes.size());
  for (size_t i = 0; i < bytes.size(); ++i) {
    result.push_back(static_cast<char>(bytes[i]));
  }
  return result;
}

std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
    size_t pos, size_t length) const {
  std::vector<unsigned char> result;
  if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
    BREAK_IF_DEBUGGING();
    error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
    return result;
  }
  result.reserve(length);
  for (size_t i = 0; i < length; ++i) {
    result.push_back((*this)[pos + i]);
  }
  return result;
}

bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
  if (x.array_ != y.array_) {
    assert(false);
    return false;
  }

  return x.current_pos_ == y.current_pos_;
}

bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
  return !(x == y);
}

void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
  // The offset should always lie within the bounds of the sub-array (this
  // condition is enforced at the callsite). However, even if the offset lies
  // outside the sub-array, the restrictPageToSubArray() call at the end
  // ensures that the object is left in a consistent state that maintains the
  // class invariants.
  assert(offset >= sub_array_begin_ && offset < sub_array_end_);

  // Ensure that offset lies within the array.
  if (offset >= array_->length()) {
    assert(false);
    return;
  }

  // Determine the index of the page to request.
  size_t page_index = offset / array_->pageSize();

  // Get the page.
  const unsigned char *page_begin;
  const unsigned char *page_end;
  array_->getPage(page_index, &page_begin, &page_end, &page_);

  // Ensure that the page has the expected length (as specified in the
  // PagedByteArray interface).
  size_t expected_page_size = array_->pageSize();
  if (page_index == (array_->length() - 1) / array_->pageSize()) {
    expected_page_size = array_->length() - array_->pageSize() * page_index;
  }
  if ((page_end < page_begin) ||
      (static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
    assert(false);
    return;
  }

  // Remember information about page.
  page_data_ = page_begin;
  page_begin_offset_ = page_index * array_->pageSize();
  current_page_len_ = static_cast<size_t>(page_end - page_begin);

  // Restrict the boundaries of the page to lie within the sub-array.
  restrictPageToSubArray();
}

void RangeCheckedBytePtr::restrictPageToSubArray() const {
  // Restrict the current page's boundaries so that it is always contained
  // completely within the extent of the sub-array.
  // This function is purposely designed to work correctly in the following
  // two special cases:
  // a) The current page lies entirely outside the sub-array. In this case,
  //    current_page_len_ will be set to zero. page_data_ may either remain
  //    unchanged or may be changed to point one element beyond the end of the
  //    page, depending on whether the current page lies before or after the
  //    sub-array.
  // b) The current page is in the state as initialized by the constructor
  //    (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
  //    page_data_ and current_page_len_ will remain unchanged.

  // Does the beginning of the page lie before the beginning of the sub-array?
  if (page_begin_offset_ < sub_array_begin_) {
    // Compute amount by which to shorten page.
    size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
    if (amount_to_shorten > current_page_len_) {
      amount_to_shorten = current_page_len_;
    }

    // Adjust beginning of page accordingly.
    page_begin_offset_ += amount_to_shorten;
    page_data_ += amount_to_shorten;
    current_page_len_ -= amount_to_shorten;
  }

  // Does the end of the page lie beyond the end of the sub-array?
  if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
    // Reduce length of page accordingly.
    size_t new_len = sub_array_end_ - page_begin_offset_;
    if (new_len > current_page_len_) {
      new_len = current_page_len_;
    }
    current_page_len_ = new_len;
  }
}

int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
           size_t num) {
  std::vector<unsigned char> x_vec = x.extractBytes(0, num);
  std::vector<unsigned char> y_vec = y.extractBytes(0, num);

  if (!x.errorOccurred() && !y.errorOccurred()) {
    return ::memcmp(&x_vec[0], &y_vec[0], num);
  } else {
    // return an arbitrary value
    return -1;
  }
}

int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
  std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());

  if (!x.errorOccurred()) {
    return ::memcmp(&x_vec[0], y.c_str(), y.length());
  } else {
    // return an arbitrary value
    return -1;
  }
}

size_t strlen(const RangeCheckedBytePtr &src) {
  size_t len = 0;
  RangeCheckedBytePtr str = src;
  while (!str.errorOccurred() && (str[0] != '\0')) {
    str++;
    len++;
  }
  return len;
}

int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
             MemoryStatus *status) {
  const uint16 unsigned_value = Get16u(input, big_endian, status);
  if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
    // Return an arbitrary value.
    return 0;
  }

  // Convert the two's-complement signed integer encoded in 'unsigned_value'
  // into a signed representation in the implementation's native representation
  // for signed integers. An optimized Blaze build (x64) compiles all of the
  // following code to a no-op (as of this writing).
  // For further details, see the corresponding comment in Get32s().
  if (unsigned_value == 0x8000u) {
    return static_cast<int16>(-0x8000);
  } else if (unsigned_value > 0x8000u) {
    return -static_cast<int16>(0x10000u - unsigned_value);
  } else {
    return static_cast<int16>(unsigned_value);
  }
}

uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
              MemoryStatus *status) {
  if (input.remainingLength() < 2) {
    if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
      *status = RANGE_CHECKED_BYTE_ERROR;
    }
    // Return an arbitrary value.
    return 0;
  }
  if (big_endian) {
    return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
  } else {
    return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
  }
}

int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
             MemoryStatus *status) {
  const uint32 unsigned_value = Get32u(input, big_endian, status);
  if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
    // Return an arbitrary value.
    return 0;
  }

  // Convert the two's-complement signed integer encoded in 'unsigned_value'
  // into a signed representation in the implementation's native representation
  // for signed integers.
  // For all practical purposes, the same result could be obtained simply by
  // casting unsigned_value to int32; the result of this is
  // implementation-defined, but on all of the platforms we care about, it does
  // what we want.
  // The code below, however, arguably has the aesthetic advantage of being
  // independent of the representation for signed integers chosen by the
  // implementation, as long as 'int' and 'unsigned' have the required range to
  // represent all of the required values.
  // An optimized Blaze build (x64) compiles all of the following code to a
  // no-op (as of this writing); i.e. the value that Get32u() returned in %eax
  // is left unchanged.
  if (unsigned_value == 0x80000000u) {
    // Read here on why the constant expression is written this way:
    // http://stackoverflow.com/questions/14695118
    return -0x7fffffff - 1;
  } else if (unsigned_value > 0x80000000u) {
    // The expression
    //   0xffffffffu - unsigned_value + 1
    // is a portable way of flipping the sign of a twos-complement signed
    // integer whose binary representation is stored in an unsigned integer.
    // '0xffffffffu + 1' is used in preference to simply '0' because it makes
    // it clearer that the correct result will be obtained even if an int is
    // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
    // 'unsigned_value' to prevent the compiler from warning about an
    // integral constant overflow. ('0' would produce the correct result in
    // this case too but would rely in a more subtle way on the rules for
    // unsigned wraparound.)
    return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
  } else {
    return static_cast<int32>(unsigned_value);
  }
}

uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
              MemoryStatus *status) {
  if (input.remainingLength() < 4) {
    if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
      *status = RANGE_CHECKED_BYTE_ERROR;
    }
    // Return an arbitrary value.
    return 0;
  }
  if (big_endian) {
    return (static_cast<uint32>(input[0]) << 24) |
           (static_cast<uint32>(input[1]) << 16) |
           (static_cast<uint32>(input[2]) << 8) |
           (static_cast<uint32>(input[3]) << 0);
  } else {
    return (static_cast<uint32>(input[3]) << 24) |
           (static_cast<uint32>(input[2]) << 16) |
           (static_cast<uint32>(input[1]) << 8) |
           (static_cast<uint32>(input[0]) << 0);
  }
}

}  // namespace binary_parse
}  // namespace piex