diff options
author | Yujie Qin <yujieqin@yujie-linux.lbc.corp.google.com> | 2015-11-25 10:46:09 +0100 |
---|---|---|
committer | Yujie Qin <yujieqin@yujie-linux.lbc.corp.google.com> | 2015-11-25 10:46:09 +0100 |
commit | b3023f44494512d077d2737de9ead724d55c4f25 (patch) | |
tree | 96f2ffd0e5dd3afe7579f67ed52ca03453ab124c | |
download | piex-b3023f44494512d077d2737de9ead724d55c4f25.tar.gz |
Add the piex project
-rw-r--r-- | LICENSE | 202 | ||||
-rw-r--r-- | OWNERS | 4 | ||||
-rw-r--r-- | README | 2 | ||||
-rwxr-xr-x | internal_include_do_not_delete.gypi | 2 | ||||
-rwxr-xr-x | piex.gyp | 76 | ||||
-rw-r--r-- | src/binary_parse/cached_paged_byte_array.cc | 76 | ||||
-rw-r--r-- | src/binary_parse/cached_paged_byte_array.h | 73 | ||||
-rw-r--r-- | src/binary_parse/range_checked_byte_ptr.cc | 400 | ||||
-rw-r--r-- | src/binary_parse/range_checked_byte_ptr.h | 503 | ||||
-rw-r--r-- | src/image_type_recognition/image_type_recognition_lite.cc | 861 | ||||
-rw-r--r-- | src/image_type_recognition/image_type_recognition_lite.h | 79 | ||||
-rw-r--r-- | src/piex.cc | 544 | ||||
-rw-r--r-- | src/piex.h | 80 | ||||
-rw-r--r-- | src/piex_types.h | 98 | ||||
-rw-r--r-- | src/tiff_directory/tiff_directory.cc | 282 | ||||
-rw-r--r-- | src/tiff_directory/tiff_directory.h | 161 | ||||
-rw-r--r-- | src/tiff_parser.cc | 570 | ||||
-rw-r--r-- | src/tiff_parser.h | 170 |
18 files changed, 4183 insertions, 0 deletions
@@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License.
\ No newline at end of file @@ -0,0 +1,4 @@ +adaubert@google.com +ebrauer@google.com +mboehme@google.com +yujieqin@google.com @@ -0,0 +1,2 @@ +The Preview Image Extractor (PIEX) is designed to find and extract the largest +JPEG compressed preview image contained in a RAW file.
\ No newline at end of file diff --git a/internal_include_do_not_delete.gypi b/internal_include_do_not_delete.gypi new file mode 100755 index 0000000..91995b8 --- /dev/null +++ b/internal_include_do_not_delete.gypi @@ -0,0 +1,2 @@ +# Do NOT touch the file. +{} diff --git a/piex.gyp b/piex.gyp new file mode 100755 index 0000000..ea3fb52 --- /dev/null +++ b/piex.gyp @@ -0,0 +1,76 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +{ +'includes': ['internal_include_do_not_delete.gypi'], +'targets': [{ + 'target_name': 'piex', + 'type': 'static_library', + 'sources': [ + 'src/piex.cc', + 'src/tiff_parser.cc', + 'src/tiff_parser.h', + ], + 'variables': { + 'headers': [ + 'src/piex.h', + 'src/piex_types.h', + ], + }, + 'include_dirs': ['.'], + 'cflags': ['-Wsign-compare'], + 'dependencies': [ + 'binary_parse', + 'image_type_recognition', + 'tiff_directory', + ], +}, { + 'target_name': 'binary_parse', + 'type': 'static_library', + 'sources': [ + 'src/binary_parse/cached_paged_byte_array.cc', + 'src/binary_parse/range_checked_byte_ptr.cc', + ], + 'variables': { + 'headers': [ + 'src/binary_parse/cached_paged_byte_array.h', + 'src/binary_parse/range_checked_byte_ptr.h', + ], + }, + 'include_dirs': ['.'], + 'cflags': ['-Wsign-compare'], +}, { + 'target_name': 'image_type_recognition', + 'type': 'static_library', + 'sources': [ + 'src/image_type_recognition/image_type_recognition_lite.cc', + ], + 'variables': { + 'headers': ['src/image_type_recognition/image_type_recognition_lite.h'], + }, + 'include_dirs': ['.'], + 'cflags': ['-Wsign-compare'], + 'dependencies': ['binary_parse'], +}, { + 'target_name': 'tiff_directory', + 'type': 'static_library', + 'sources': [ + 'src/tiff_directory/tiff_directory.cc', + ], + 'variables': { + 'headers': ['src/tiff_directory/tiff_directory.h'], + }, + 'include_dirs': ['.'], + 'dependencies': ['binary_parse'], +}], +} diff --git a/src/binary_parse/cached_paged_byte_array.cc b/src/binary_parse/cached_paged_byte_array.cc new file mode 100644 index 0000000..a6ab3b0 --- /dev/null +++ b/src/binary_parse/cached_paged_byte_array.cc @@ -0,0 +1,76 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// +// +// The cache layer works as follows: +// The cache is implemented as a vector (of size 'cache_size') of shared +// pointers to pages recently used. The least recently used page is stored +// at the begining of the vector, the most recent at the end. + +#include "src/binary_parse/cached_paged_byte_array.h" + +namespace piex { +namespace binary_parse { + +CachedPagedByteArray::CachedPagedByteArray( + const PagedByteArray* paged_byte_array, size_t cache_size) + : paged_byte_array_(paged_byte_array), cache_size_(cache_size) {} + +void CachedPagedByteArray::getPage(size_t page_index, + const unsigned char** begin, + const unsigned char** end, + PagedByteArray::PagePtr* page) const { + std::lock_guard<std::mutex> lock(mutex_); + size_t cache_index; + if (getFromCache(page_index, &cache_index)) { + // Cache hit, retrieve the page from the cache. + *begin = cached_pages_[cache_index].begin; + *end = cached_pages_[cache_index].end; + *page = cached_pages_[cache_index].page; + + // Remove the page to insert it at the end of the cache later. + cached_pages_.erase(cached_pages_.begin() + cache_index); + } else { + // Cache miss, ask PagedByteArray to load the page. + paged_byte_array_->getPage(page_index, begin, end, page); + + // If the cache is full, remove the first (least recently used) page. + if (cached_pages_.size() >= cache_size_) { + cached_pages_.erase(cached_pages_.begin()); + } + } + + // Cache the most recently used page to the end of the vector. + CachedPage cache_page; + cache_page.index = page_index; + cache_page.page = *page; + cache_page.begin = *begin; + cache_page.end = *end; + cached_pages_.push_back(cache_page); +} + +bool CachedPagedByteArray::getFromCache(size_t page_index, + size_t* cache_index) const { + for (size_t i = 0; i < cached_pages_.size(); ++i) { + if (cached_pages_[i].index == page_index) { + *cache_index = i; + return true; + } + } + return false; +} + +} // namespace binary_parse +} // namespace piex diff --git a/src/binary_parse/cached_paged_byte_array.h b/src/binary_parse/cached_paged_byte_array.h new file mode 100644 index 0000000..26f0eae --- /dev/null +++ b/src/binary_parse/cached_paged_byte_array.h @@ -0,0 +1,73 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// +// +// LRU cache decorator for binary_parse::PagedByteArray subclasses. + +#ifndef PIEX_BINARY_PARSE_CACHED_PAGED_BYTE_ARRAY_H_ +#define PIEX_BINARY_PARSE_CACHED_PAGED_BYTE_ARRAY_H_ + +#include <mutex> +#include <vector> + +#if !defined(WIN32_LEAN_AND_MEAN) +#define WIN32_LEAN_AND_MEAN +#endif +#include "src/binary_parse/range_checked_byte_ptr.h" + +namespace piex { +namespace binary_parse { + +class CachedPagedByteArray : public PagedByteArray { + public: + // Decorates 'paged_byte_array' with a LRU cache layer of the size + // 'cache_size'. + explicit CachedPagedByteArray(const PagedByteArray* paged_byte_array, + size_t cache_size); + + virtual size_t length() const { return paged_byte_array_->length(); } + + virtual size_t pageSize() const { return paged_byte_array_->pageSize(); } + + virtual void getPage(size_t page_index, const unsigned char** begin, + const unsigned char** end, + PagedByteArray::PagePtr* page) const; + + private: + struct CachedPage { + size_t index; + PagedByteArray::PagePtr page; + const unsigned char* begin; + const unsigned char* end; + }; + + // Disallow copy construction and assignment. + CachedPagedByteArray(const CachedPagedByteArray&); + void operator=(const CachedPagedByteArray&); + + // Gets the index of the page if it is in the cache and returns true, else + // returns false. + bool getFromCache(size_t page_index, size_t* cache_index) const; + + mutable std::mutex mutex_; + const PagedByteArray* paged_byte_array_; + const size_t cache_size_; + mutable std::vector<CachedPage> cached_pages_; +}; + +} // namespace binary_parse +} // namespace piex + +#endif // PIEX_BINARY_PARSE_CACHED_PAGED_BYTE_ARRAY_H_ diff --git a/src/binary_parse/range_checked_byte_ptr.cc b/src/binary_parse/range_checked_byte_ptr.cc new file mode 100644 index 0000000..1f882ed --- /dev/null +++ b/src/binary_parse/range_checked_byte_ptr.cc @@ -0,0 +1,400 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "src/binary_parse/range_checked_byte_ptr.h" + +#include <assert.h> +#include <cstddef> +#include <cstring> + +namespace piex { +namespace binary_parse { + +#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE +#define BREAK_IF_DEBUGGING() assert(false) +#else +#define BREAK_IF_DEBUGGING() assert(true) +#endif + +namespace { +class MemoryPagedByteArray : public PagedByteArray { + public: + MemoryPagedByteArray(const unsigned char *buffer, const size_t len); + + virtual size_t length() const; + virtual size_t pageSize() const; + virtual void getPage(size_t page_index, const unsigned char **begin, + const unsigned char **end, PagePtr *page) const; + + private: + const unsigned char *buffer_; + const size_t len_; +}; + +MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer, + const size_t len) + : buffer_(buffer), len_(len) {} + +size_t MemoryPagedByteArray::length() const { return len_; } + +size_t MemoryPagedByteArray::pageSize() const { return len_; } + +void MemoryPagedByteArray::getPage(size_t page_index, + const unsigned char **begin, + const unsigned char **end, + PagePtr *page) const { + assert(page_index == 0); + + *begin = buffer_; + *end = buffer_ + len_; + *page = PagePtr(); +} + +// A functor that does nothing. This is used as a no-op shared pointer +// deallocator below. +class NullFunctor { + public: + void operator()() {} + void operator()(PagedByteArray *p) const {} +}; +} // namespace + +PagedByteArray::~PagedByteArray() {} + +RangeCheckedBytePtr::RangeCheckedBytePtr() + : array_(), + page_data_(NULL), + current_pos_(0), + sub_array_begin_(0), + sub_array_end_(0), + page_begin_offset_(0), + current_page_len_(0), + error_flag_(RANGE_CHECKED_BYTE_ERROR) {} + +RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array, + const size_t len) + : array_(new MemoryPagedByteArray(array, len)), + page_data_(NULL), + current_pos_(0), + sub_array_begin_(0), + sub_array_end_(len), + page_begin_offset_(0), + current_page_len_(0), + error_flag_(RANGE_CHECKED_BYTE_SUCCESS) { + assert(array); + if (array == NULL) { + error_flag_ = RANGE_CHECKED_BYTE_ERROR; + } +} + +RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array) + : array_(array, NullFunctor()), + page_data_(NULL), + current_pos_(0), + sub_array_begin_(0), + sub_array_end_(array->length()), + page_begin_offset_(0), + current_page_len_(0), + error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {} + +RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() { + return RangeCheckedBytePtr(); +} + +RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray( + size_t pos, size_t length) const { + RangeCheckedBytePtr sub_result = (*this) + pos; + if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) { + sub_result.sub_array_begin_ = sub_result.current_pos_; + sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length; + + // Restrict the boundaries of the current page to the newly set sub-array. + sub_result.restrictPageToSubArray(); + + return sub_result; + } else { + error_flag_ = RANGE_CHECKED_BYTE_ERROR; + return invalidPointer(); + } +} + +size_t RangeCheckedBytePtr::offsetInArray() const { + // sub_array_begin_ <= current_pos_ is a class invariant, but protect + // against violations of this invariant. + if (sub_array_begin_ <= current_pos_) { + return current_pos_ - sub_array_begin_; + } else { + assert(false); + return 0; + } +} + +std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const { + std::vector<unsigned char> bytes = extractBytes(pos, length); + std::string result; + result.reserve(bytes.size()); + for (size_t i = 0; i < bytes.size(); ++i) { + result.push_back(static_cast<char>(bytes[i])); + } + return result; +} + +std::vector<unsigned char> RangeCheckedBytePtr::extractBytes( + size_t pos, size_t length) const { + std::vector<unsigned char> result; + if (pos + length < pos /* overflow */ || remainingLength() < pos + length) { + BREAK_IF_DEBUGGING(); + error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; + return result; + } + result.reserve(length); + for (size_t i = 0; i < length; ++i) { + result.push_back((*this)[pos + i]); + } + return result; +} + +bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) { + if (x.array_ != y.array_) { + assert(false); + return false; + } + + return x.current_pos_ == y.current_pos_; +} + +bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) { + return !(x == y); +} + +void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const { + // The offset should always lie within the bounds of the sub-array (this + // condition is enforced at the callsite). However, even if the offset lies + // outside the sub-array, the restrictPageToSubArray() call at the end + // ensures that the object is left in a consistent state that maintains the + // class invariants. + assert(offset >= sub_array_begin_ && offset < sub_array_end_); + + // Ensure that offset lies within the array. + if (offset >= array_->length()) { + assert(false); + return; + } + + // Determine the index of the page to request. + size_t page_index = offset / array_->pageSize(); + + // Get the page. + const unsigned char *page_begin; + const unsigned char *page_end; + array_->getPage(page_index, &page_begin, &page_end, &page_); + + // Ensure that the page has the expected length (as specified in the + // PagedByteArray interface). + size_t expected_page_size = array_->pageSize(); + if (page_index == (array_->length() - 1) / array_->pageSize()) { + expected_page_size = array_->length() - array_->pageSize() * page_index; + } + if ((page_end < page_begin) || + (static_cast<size_t>(page_end - page_begin) != expected_page_size)) { + assert(false); + return; + } + + // Remember information about page. + page_data_ = page_begin; + page_begin_offset_ = page_index * array_->pageSize(); + current_page_len_ = page_end - page_begin; + + // Restrict the boundaries of the page to lie within the sub-array. + restrictPageToSubArray(); +} + +void RangeCheckedBytePtr::restrictPageToSubArray() const { + // Restrict the current page's boundaries so that it is always contained + // completely within the extent of the sub-array. + // This function is purposely designed to work correctly in the following + // two special cases: + // a) The current page lies entirely outside the sub-array. In this case, + // current_page_len_ will be set to zero. page_data_ may either remain + // unchanged or may be changed to point one element beyond the end of the + // page, depending on whether the current page lies before or after the + // sub-array. + // b) The current page is in the state as initialized by the constructor + // (i.e. page_data_ is NULL and current_page_len_ is zero). In this case, + // page_data_ and current_page_len_ will remain unchanged. + + // Does the beginning of the page lie before the beginning of the sub-array? + if (page_begin_offset_ < sub_array_begin_) { + // Compute amount by which to shorten page. + size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_; + if (amount_to_shorten > current_page_len_) { + amount_to_shorten = current_page_len_; + } + + // Adjust beginning of page accordingly. + page_begin_offset_ += amount_to_shorten; + page_data_ += amount_to_shorten; + current_page_len_ -= amount_to_shorten; + } + + // Does the end of the page lie beyond the end of the sub-array? + if (page_begin_offset_ + current_page_len_ > sub_array_end_) { + // Reduce length of page accordingly. + size_t new_len = sub_array_end_ - page_begin_offset_; + if (new_len > current_page_len_) { + new_len = current_page_len_; + } + current_page_len_ = new_len; + } +} + +int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y, + size_t num) { + std::vector<unsigned char> x_vec = x.extractBytes(0, num); + std::vector<unsigned char> y_vec = y.extractBytes(0, num); + + if (!x.errorOccurred() && !y.errorOccurred()) { + return ::memcmp(&x_vec[0], &y_vec[0], num); + } else { + // return an arbitrary value + return -1; + } +} + +int strcmp(const RangeCheckedBytePtr &x, const std::string &y) { + std::vector<unsigned char> x_vec = x.extractBytes(0, y.length()); + + if (!x.errorOccurred()) { + return ::memcmp(&x_vec[0], y.c_str(), y.length()); + } else { + // return an arbitrary value + return -1; + } +} + +size_t strlen(const RangeCheckedBytePtr &src) { + size_t len = 0; + RangeCheckedBytePtr str = src; + while (!str.errorOccurred() && (str[0] != '\0')) { + str++; + len++; + } + return len; +} + +int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian, + MemoryStatus *status) { + const uint16 unsigned_value = Get16u(input, big_endian, status); + if (*status != RANGE_CHECKED_BYTE_SUCCESS) { + // Return an arbitrary value. + return 0; + } + + // Convert the two's-complement signed integer encoded in 'unsigned_value' + // into a signed representation in the implementation's native representation + // for signed integers. An optimized Blaze build (x64) compiles all of the + // following code to a no-op (as of this writing). + // For further details, see the corresponding comment in Get32s(). + if (unsigned_value == 0x8000u) { + return static_cast<int16>(-0x8000); + } else if (unsigned_value > 0x8000u) { + return -static_cast<int16>(0x10000u - unsigned_value); + } else { + return static_cast<int16>(unsigned_value); + } +} + +uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian, + MemoryStatus *status) { + if (input.remainingLength() < 2) { + if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) { + *status = RANGE_CHECKED_BYTE_ERROR; + } + // Return an arbitrary value. + return 0; + } + if (big_endian) { + return (input[0] << 8) | input[1]; + } else { + return (input[1] << 8) | input[0]; + } +} + +int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian, + MemoryStatus *status) { + const uint32 unsigned_value = Get32u(input, big_endian, status); + if (*status != RANGE_CHECKED_BYTE_SUCCESS) { + // Return an arbitrary value. + return 0; + } + + // Convert the two's-complement signed integer encoded in 'unsigned_value' + // into a signed representation in the implementation's native representation + // for signed integers. + // For all practical purposes, the same result could be obtained simply by + // casting unsigned_value to int32; the result of this is + // implementation-defined, but on all of the platforms we care about, it does + // what we want. + // The code below, however, arguably has the aesthetic advantage of being + // independent of the representation for signed integers chosen by the + // implementation, as long as 'int' and 'unsigned' have the required range to + // represent all of the required values. + // An optimized Blaze build (x64) compiles all of the following code to a + // no-op (as of this writing); i.e. the value that Get32u() returned in %eax + // is left unchanged. + if (unsigned_value == 0x80000000u) { + // Read here on why the constant expression is written this way: + // http://stackoverflow.com/questions/14695118 + return -0x7fffffff - 1; + } else if (unsigned_value > 0x80000000u) { + // The expression + // 0xffffffffu - unsigned_value + 1 + // is a portable way of flipping the sign of a twos-complement signed + // integer whose binary representation is stored in an unsigned integer. + // '0xffffffffu + 1' is used in preference to simply '0' because it makes + // it clearer that the correct result will be obtained even if an int is + // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around + // 'unsigned_value' to prevent the compiler from warning about an + // integral constant overflow. ('0' would produce the correct result in + // this case too but would rely in a more subtle way on the rules for + // unsigned wraparound.) + return -static_cast<int32>(0xffffffffu - unsigned_value + 1); + } else { + return static_cast<int32>(unsigned_value); + } +} + +uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian, + MemoryStatus *status) { + if (input.remainingLength() < 4) { + if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) { + *status = RANGE_CHECKED_BYTE_ERROR; + } + // Return an arbitrary value. + return 0; + } + if (big_endian) { + return (input[0] << 24) | (input[1] << 16) | (input[2] << 8) | + (input[3] << 0); + } else { + return (input[3] << 24) | (input[2] << 16) | (input[1] << 8) | + (input[0] << 0); + } +} + +} // namespace binary_parse +} // namespace piex diff --git a/src/binary_parse/range_checked_byte_ptr.h b/src/binary_parse/range_checked_byte_ptr.h new file mode 100644 index 0000000..07f8888 --- /dev/null +++ b/src/binary_parse/range_checked_byte_ptr.h @@ -0,0 +1,503 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_ +#define PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_ + +#include <assert.h> + +#include <cstddef> +#include <memory> +#include <string> +#include <vector> + +namespace piex { +namespace binary_parse { + +// Since NaCl does not comply to C++11 we can not just use stdint.h. +typedef unsigned short uint16; // NOLINT +typedef short int16; // NOLINT +typedef unsigned int uint32; +typedef int int32; + +enum MemoryStatus { + RANGE_CHECKED_BYTE_SUCCESS = 0, + RANGE_CHECKED_BYTE_ERROR = 1, + RANGE_CHECKED_BYTE_ERROR_OVERFLOW = 2, + RANGE_CHECKED_BYTE_ERROR_UNDERFLOW = 3, +}; + +// Interface that RangeCheckedBytePtr uses to access the underlying array of +// bytes. This allows RangeCheckedBytePtr to be used to access data as if it +// were stored contiguously in memory, even if the data is in fact split up +// into non-contiguous chunks and / or does not reside in memory. +// +// The only requirement is that the data can be read in pages of a fixed (but +// configurable) size. Notionally, the byte array (which contains length() +// bytes) is split up into non-overlapping pages of pageSize() bytes each. +// (The last page may be shorter if length() is not a multiple of pageSize().) +// There are therefore (length() - 1) / pageSize() + 1 such pages, with indexes +// 0 through (length() - 1) / pageSize(). Page i contains the bytes from offset +// i * pageSize() in the array up to and including the byte at offset +// (i + 1) * pageSize() - 1 (or, in the case of the last page, length() - 1). +// +// In essence, RangeCheckedBytePtr and PagedByteArray together provide a poor +// man's virtual-memory-and-memory-mapped-file work-alike in situations where +// virtual memory cannot be used or would consume too much virtual address +// space. +// +// Thread safety: In general, subclasses implementing this interface should +// ensure that the member functions are thread-safe. It will then be safe to +// access the same array from multiple threads. (Note that RangeCheckedBytePtr +// itself is not thread-safe in the sense that a single instance of +// RangeCheckedBytePtr cannot be used concurrently from multiple threads; it +// is, however, safe to use different RangeCheckedBytePtr instances in +// different threads to access the same PagedByteArray concurrently, assuming +// that the PagedByteArray implementation is thread-safe.) +class PagedByteArray { + public: + // Base class for pages in the byte array. Implementations of PagedByteArray + // can create a subclass of the Page class to manage the lifetime of buffers + // associated with a page returned by getPage(). For example, a + // PagedByteArray backed by a file might define a Page subclass like this: + // + // class FilePage : public Page { + // std::vector<unsigned char> bytes; + // }; + // + // The corresponding getPage() implementation could then look like this: + // + // void getPage(size_t page_index, const unsigned char** begin, + // const unsigned char** end, photos::ncf::util::SharedPtr<Page>* page) + // { + // // Create a new page. + // photos::ncf::util::SharedPtr<FilePage> file_page(new FilePage()); + // + // // Read contents of page from file into file_page->bytes. + // [...] + // + // // Set *begin and *end to point to beginning and end of + // // file_page->bytes vector. + // *begin = &file_page->bytes[0]; + // *end = *begin + file_page->bytes.size(); + // + // // Return page to caller + // *page = file_page; + // } + // + // In this way, the storage associated with the page (the FilePage::bytes + // vector) will be kept alive until the RangeCheckedBytePtr releases the + // shared pointer. + class Page {}; + + typedef std::shared_ptr<Page> PagePtr; + + virtual ~PagedByteArray(); + + // Returns the length of the array in bytes. The value returned must remain + // the same on every call for the entire lifetime of the object. + virtual size_t length() const = 0; + + // Returns the length of each page in bytes. (The last page may be shorter + // than pageSize() if length() is not a multiple of pageSize() -- see also + // the class-wide comment above.) The value returned must remain the same on + // every call for the entire lifetime of the object. + virtual size_t pageSize() const = 0; + + // Returns a pointer to a memory buffer containing the data for the page + // with index "page_index". + // + // *begin is set to point to the first byte of the page; *end is set to point + // one byte beyond the last byte in the page. This implies that: + // - (*end - *begin) == pageSize() for every page except the last page + // - (*end - *begin) == length() - pageSize() * ((length() - 1) / pageSize()) + // for the last page + // + // *page will be set to a SharedPtr that the caller will hold on to until + // it no longer needs to access the memory buffer. The memory buffer will + // remain valid until the SharedPtr is released or the PagedByteArray object + // is destroyed. An implementation may choose to return a null SharedPtr; + // this indicates that the memory buffer will remain valid until the + // PagedByteArray object is destroyed, even if the caller does not hold on to + // the SharedPtr. (This is intended as an optimization that some + // implementations may choose to take advantage of, as a null SharedPtr is + // cheaper to copy.) + virtual void getPage(size_t page_index, const unsigned char **begin, + const unsigned char **end, PagePtr *page) const = 0; +}; + +typedef std::shared_ptr<PagedByteArray> PagedByteArrayPtr; + +class RangeCheckedBytePtr { + private: + // This class maintains the following class invariants: + // - page_data_ always points to a buffer of at least current_page_len_ + // bytes. + // + // - The current position lies within the sub-array, i.e. + // sub_array_begin_ <= current_pos_ <= sub_array_end_ + // + // - The sub-array is entirely contained within the array, i.e. + // 0 <= sub_array_begin <= sub_array_end <= array_->length() + // + // - If the current page is non-empty, it lies completely within the + // sub-array, i.e. + // if _current_page_len_ >= 0, then + // sub_array_begin_ <= page_begin_offset_ + // and + // page_begin_offset_ + current_page_len_ <= sub_array_end_ + // (See also restrictPageToSubArray().) + // (If _current_page_len_ == 0, then page_begin_offset_ may lie outside + // the sub-array; this condition is harmless. Additional logic would be + // required to make page_begin_offset_ lie within the sub-array in this + // case, and it would serve no purpose other than to make the invariant + // slightly simpler.) + // + // Note that it is _not_ a class invariant that current_pos_ needs to lie + // within the current page. Making this an invariant would have two + // undesirable consequences: + // a) When operator[] is called with an index that lies beyond the end of + // the current page, it would need to temporarily load the page that + // contains this index, but it wouldn't be able to "retain" the page + // (i.e. make it the current page) because that would violate the + // proposed invariant. This would lead to inefficient behavior in the + // case where code accesses a large range of indices beyond the end of + // the page because a page would need to be loaded temporarily on each + // access. + // b) It would require more code: loadPageForOffset() would need to be + // called anywhere that current_pos_ changes (whereas, with the present + // approach, loadPageForOffset() is only called in operator[]). + + // PagedByteArray that is accessed by this pointer. + PagedByteArrayPtr array_; + + // Pointer to the current page. + mutable PagedByteArray::PagePtr page_; + + // Pointer to the current page's data buffer. + mutable const unsigned char *page_data_; + + // All of the following offsets are defined relative to the beginning of + // the array defined by the PagedByteArray array_. + + // Array offset that the pointer points to. + size_t current_pos_; + + // Start offset of the current sub-array. + size_t sub_array_begin_; + + // End offset of the current sub-array. + size_t sub_array_end_; + + // Array offset corresponding to the "page_data_" pointer. + mutable size_t page_begin_offset_; + + // Length of the current page. + mutable size_t current_page_len_; + + // Error flag. This is mutable because methods that don't affect the value + // of the pointer itself (such as operator+() and operator-()) + // nevertheless need to be able to signal error conditions. + mutable MemoryStatus error_flag_; + + RangeCheckedBytePtr(); + + public: + explicit RangeCheckedBytePtr(const unsigned char *array, const size_t len); + + // Creates a pointer that points to the first element of the given + // PagedByteArray. The caller must ensure that this PagedByteArray remains + // valid until this pointer and any pointers created from it have been + // destroyed. + explicit RangeCheckedBytePtr(PagedByteArray *array); + + // Creates an invalid RangeCheckedBytePtr. Calling errorOccurred() on the + // result of invalidPointer() always returns true. + // Do not check a RangeCheckedBytePtr for validity by comparing against + // invalidPointer(); use errorOccurred() instead. + static RangeCheckedBytePtr invalidPointer(); + + // Returns a RangeCheckedBytePtr points to an array which start at the byte + // position "pos" and spans length bytes. + // If the desired range is is out of the RangeCheckedBytePtr's range returns + // an invalid pointer. + RangeCheckedBytePtr pointerToSubArray(size_t pos, size_t length) const; + + inline size_t remainingLength() const; + + size_t offsetInArray() const; + + // Returns whether an out-of-bounds error has ever occurred on this pointer in + // the past. An error occurs if a caller attempts to read from a position + // outside the bounds of the array or to move the pointer outside the bounds + // of the array. + // + // The error flag is never reset. Once an error has occurred, + // all subsequent attempts to read from the pointer (even within the bounds of + // the array) return 0. + // + // Note that it is permissible for a pointer to point one element past the end + // of the array, but it is not permissible to read from this position. This is + // equivalent to the semantics of raw C++ pointers. + inline bool errorOccurred() const; + + // DEPRECATED: Use "!errorOccurred()" instead (note negation), which returns + // the same result as isValid() in all cases. + inline bool isValid() const; + + std::string substr(size_t pos, size_t length) const; + + std::vector<unsigned char> extractBytes(size_t pos, size_t length) const; + + // This function is not endian-agnostic. But we think it better than using + // reinterpret_cast or simply casting the unsigned char * pointer to T * + // which is also not endian-agnostic + template <class T> + bool convert(T *output) const { + union { + T t; + unsigned char ch[sizeof(T)]; + } buffer; + for (size_t i = 0; i < sizeof(T); i++) { + buffer.ch[i] = (*this)[i]; + } + if (!errorOccurred()) { + *output = buffer.t; + } + return !errorOccurred(); + } + + template <class T> + bool convert(size_t index, T *output) const { + RangeCheckedBytePtr p = (*this) + index * sizeof(T); + bool valid = p.convert(output); + if (!valid) { + error_flag_ = p.error_flag_; + } + return valid; + } + + // operators + + // this returns a 0 (static_cast<unsigned char>(0)) if out of range + inline unsigned char operator[](size_t i) const; + + inline unsigned char operator*() const; + + inline RangeCheckedBytePtr &operator++(); + + inline RangeCheckedBytePtr operator++(int); + + inline RangeCheckedBytePtr &operator--(); + + inline RangeCheckedBytePtr operator--(int); + + inline RangeCheckedBytePtr &operator+=(size_t x); + + inline RangeCheckedBytePtr &operator-=(size_t x); + + inline friend RangeCheckedBytePtr operator+(const RangeCheckedBytePtr &p, + size_t x); + + inline friend RangeCheckedBytePtr operator-(const RangeCheckedBytePtr &p, + size_t x); + + // Tests whether x and y point at the same position in the underlying array. + // Two pointers that point at the same position but have different + // sub-arrays still compare equal. It is not legal to compare two pointers + // that point into different paged byte arrays. + friend bool operator==(const RangeCheckedBytePtr &x, + const RangeCheckedBytePtr &y); + + // Returns !(x == y). + friend bool operator!=(const RangeCheckedBytePtr &x, + const RangeCheckedBytePtr &y); + + private: + void loadPageForOffset(size_t offset) const; + void restrictPageToSubArray() const; +}; + +// util functions +int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y, + size_t num); + +int strcmp(const RangeCheckedBytePtr &x, const std::string &y); + +size_t strlen(const RangeCheckedBytePtr &src); + +// Decode 16-bit signed integer from binary input. +int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian, + MemoryStatus *status); + +// Decode 16-bit unsigned integer from binary input. +uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian, + MemoryStatus *status); + +// Decode 32-bit signed integer from binary input. +int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian, + MemoryStatus *status); + +// Decode 32-bit unsigned integer from binary input. +uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian, + MemoryStatus *status); + +size_t RangeCheckedBytePtr::remainingLength() const { + if (!errorOccurred()) { + // current_pos_ <= sub_array_end_ is a class invariant, but protect + // against violations of this invariant. + if (current_pos_ <= sub_array_end_) { + return sub_array_end_ - current_pos_; + } else { + assert(false); + return 0; + } + } else { + return 0; + } +} + +bool RangeCheckedBytePtr::errorOccurred() const { + return error_flag_ != RANGE_CHECKED_BYTE_SUCCESS; +} + +bool RangeCheckedBytePtr::isValid() const { + return error_flag_ == RANGE_CHECKED_BYTE_SUCCESS; +} + +unsigned char RangeCheckedBytePtr::operator[](size_t i) const { + // Check that pointer doesn't have an error flag set. + if (!errorOccurred()) { + // Offset in array to read from. + const size_t read_offset = current_pos_ + i; + + // Check for the common case first: The byte we want to read lies in the + // current page. For performance reasons, we don't check for the case + // "read_offset < page_begin_offset_" explicitly; if it occurs, it will + // lead to wraparound (which is well-defined for unsigned quantities), and + // this will cause the test "pos_in_page < current_page_len_" to fail. + size_t pos_in_page = read_offset - page_begin_offset_; + if (pos_in_page < current_page_len_) { + return page_data_[pos_in_page]; + } + + // Check that the offset we're trying to read lies within the sub-array + // we're allowed to access. + if (read_offset >= sub_array_begin_ && read_offset < sub_array_end_) { + // Read the page that contains the offset "read_offset". + loadPageForOffset(read_offset); + + // Compute the position within the new page from which we need to read. + pos_in_page = read_offset - page_begin_offset_; + + // After the call to loadPageForOffset(), read_offset must lie within + // the current page, and therefore pos_in_page must be less than the + // length of the page. We nevertheless check for this to protect against + // potential bugs in loadPageForOffset(). + assert(pos_in_page < current_page_len_); + if (pos_in_page < current_page_len_) { + return page_data_[pos_in_page]; + } + } + } + +// All error cases fall through to here. +#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE + assert(false); +#endif + error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; + // return 0, which represents the invalid value + return static_cast<unsigned char>(0); +} + +unsigned char RangeCheckedBytePtr::operator*() const { return (*this)[0]; } + +RangeCheckedBytePtr &RangeCheckedBytePtr::operator++() { + if (current_pos_ < sub_array_end_) { + current_pos_++; + } else { +#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE + assert(false); +#endif + error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; + } + return *this; +} + +RangeCheckedBytePtr RangeCheckedBytePtr::operator++(int) { + RangeCheckedBytePtr result(*this); + ++(*this); + return result; +} + +RangeCheckedBytePtr &RangeCheckedBytePtr::operator--() { + if (current_pos_ > sub_array_begin_) { + current_pos_--; + } else { +#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE + assert(false); +#endif + error_flag_ = RANGE_CHECKED_BYTE_ERROR_UNDERFLOW; + } + return *this; +} + +RangeCheckedBytePtr RangeCheckedBytePtr::operator--(int) { + RangeCheckedBytePtr result(*this); + --(*this); + return result; +} + +RangeCheckedBytePtr &RangeCheckedBytePtr::operator+=(size_t x) { + if (remainingLength() >= x) { + current_pos_ += x; + } else { +#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE + assert(false); +#endif + error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; + } + return *this; +} + +RangeCheckedBytePtr &RangeCheckedBytePtr::operator-=(size_t x) { + if (x <= current_pos_ - sub_array_begin_) { + current_pos_ -= x; + } else { +#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE + assert(false); +#endif + error_flag_ = RANGE_CHECKED_BYTE_ERROR_UNDERFLOW; + } + return *this; +} + +RangeCheckedBytePtr operator+(const RangeCheckedBytePtr &p, size_t x) { + RangeCheckedBytePtr result(p); + result += x; + return result; +} + +RangeCheckedBytePtr operator-(const RangeCheckedBytePtr &p, size_t x) { + RangeCheckedBytePtr result(p); + result -= x; + return result; +} + +} // namespace binary_parse +} // namespace piex + +#endif // PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_ diff --git a/src/image_type_recognition/image_type_recognition_lite.cc b/src/image_type_recognition/image_type_recognition_lite.cc new file mode 100644 index 0000000..520688a --- /dev/null +++ b/src/image_type_recognition/image_type_recognition_lite.cc @@ -0,0 +1,861 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// +// +// This file implements the image type recognition algorithm. Functions, which +// will check each single image type, are implemented based on the comparisons +// of magic numbers or signature strings. Other checks (e.g endianness, general +// tiff magic number "42", etc.) could also be used in some of those functions +// to make the type recognition more stable. Those checks are designed +// according to the format spcifications and our own experiments. Notice that +// the magic numbers and signature strings may have different binary values +// according to different endiannesses. +#include "src/image_type_recognition/image_type_recognition_lite.h" + +#include <algorithm> +#include <cassert> +#include <string> +#include <vector> + +#include "src/binary_parse/range_checked_byte_ptr.h" + +namespace piex { +namespace image_type_recognition { +namespace { + +using std::string; +using binary_parse::MemoryStatus; +using binary_parse::RangeCheckedBytePtr; + +// Base class for checking image type. For each image type, one should create an +// inherited class and do the implementation. +class TypeChecker { + public: + // Comparing function, whihc is used for sorting. + static bool Compare(const TypeChecker* a, const TypeChecker* b) { + assert(a); + assert(b); + return a->RequestedSize() < b->RequestedSize(); + } + + virtual ~TypeChecker() {} + + // Returns the type of current checker. + virtual RawImageTypes Type() const = 0; + + // Returns the requested data size (in bytes) for current checker. The checker + // guarantees that it will not read more than this size. + virtual size_t RequestedSize() const = 0; + + // Checks if source data belongs to current checker type. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const = 0; +}; + +// Check if the uint16 value at (source + offset) is equal to the target value. +bool CheckUInt16Value(const RangeCheckedBytePtr& source, + const size_t source_offset, const bool use_big_endian, + const unsigned short target_value) { // NOLINT + MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS; + const unsigned short value = binary_parse::Get16u( // NOLINT + source + source_offset, use_big_endian, &status); + if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) { + return false; + } + return (target_value == value); +} + +// Check if the uint32 value at (source + offset) is equal to the target value. +bool CheckUInt32Value(const RangeCheckedBytePtr& source, + const size_t source_offset, const bool use_big_endian, + const unsigned int target_value) { + MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS; + const unsigned int value = + binary_parse::Get32u(source + source_offset, use_big_endian, &status); + if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) { + return false; + } + return (target_value == value); +} + +// Determine the endianness. The return value is NOT the endianness indicator, +// it's just that this function was successful. +bool DetermineEndianness(const RangeCheckedBytePtr& source, + bool* is_big_endian) { + if (source.remainingLength() < 2) { + return false; + } + + if (source[0] == 0x49 && source[1] == 0x49) { + *is_big_endian = false; + } else if (source[0] == 0x4D && source[1] == 0x4D) { + *is_big_endian = true; + } else { + return false; + } + return true; +} + +// Check if signature string can match to the same length string start from +// (source + offset). The signature string will be used as longer magic number +// series. +bool IsSignatureMatched(const RangeCheckedBytePtr& source, + const size_t source_offset, const string& signature) { + return source.substr(source_offset, signature.size()) == signature; +} + +// Check if signature is found in [source + offset, source + offset + range]. +bool IsSignatureFound(const RangeCheckedBytePtr& source, + const size_t search_offset, const size_t search_range, + const string& signature, size_t* first_matched) { + if (source.remainingLength() < search_offset + search_range) { + return false; + } + + // The index must be in range [offset, offset + range - sizeof(signature)], so + // that it can guarantee that it will not read outside of range. + for (size_t i = search_offset; + i < search_offset + search_range - signature.size(); ++i) { + if (IsSignatureMatched(source, i, signature)) { + if (first_matched) { + *first_matched = i; + } + return true; + } + } + return false; +} + +// Sony RAW format. +class ArwTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kArwImage; } + + virtual size_t RequestedSize() const { return 5000; } + + // Check multiple points: + // 1. valid endianness at the beginning of the file; + // 2. correct tiff magic number at the (offset == 8) position of the file; + // 3. signature "SONY" in first requested bytes; + // 4. correct signature for (section + version) in first requested bytes. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + const unsigned short kTiffMagic = 0x2A; // NOLINT + const unsigned int kTiffOffset = 8; + if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, + kTiffMagic) || + !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian, + kTiffOffset)) { + return false; + } + + // Search for kSignatureSony in first requested bytes + const string kSignatureSony("SONY"); + if (!IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kSignatureSony, NULL)) { + return false; + } + + // Search for (kSignatureFileTypeSection + kSignatureVersions[i]) in first + // requested bytes + const string kSignatureSection("\x00\xb0\x01\x00\x04\x00\x00\x00", 8); + const int kSignatureVersionsSize = 5; + const string kSignatureVersions[kSignatureVersionsSize] = { + string("\x02\x00", 2), // ARW 1.0 + string("\x03\x00", 2), // ARW 2.0 + string("\x03\x01", 2), // ARW 2.1 + string("\x03\x02", 2), // ARW 2.2 + string("\x03\x03", 2), // ARW 2.3 + }; + bool matched = false; + for (int i = 0; i < kSignatureVersionsSize; ++i) { + matched = matched || IsSignatureFound( + limited_source, 0 /* offset */, RequestedSize(), + kSignatureSection + kSignatureVersions[i], NULL); + } + return matched; + } +}; + +// Canon RAW (CR2 extension). +class Cr2TypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kCr2Image; } + + virtual size_t RequestedSize() const { return 16; } + + // Check multiple points: + // 1. valid endianness at the beginning of the file; + // 2. magic number "42" at the (offset == 2) position of the file; + // 3. signature "CR2" at the (offset == 8) position of the file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + const unsigned short kTag = 42; // NOLINT + if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, + kTag)) { + return false; + } + + const string kSignature("CR\2\0", 4); + return IsSignatureMatched(limited_source, 8 /* offset */, kSignature); + } +}; + +// Canon RAW (CRW extension). +class CrwTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kCrwImage; } + + virtual size_t RequestedSize() const { return 14; } + + // Check only the signature at the (offset == 6) position of the file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + string signature; + if (use_big_endian) { + signature = string("\x00\x10\xba\xb0\xac\xbb\x00\x02", 8); + } else { + signature = string("HEAPCCDR"); + } + return IsSignatureMatched(limited_source, 6 /* offset */, signature); + } +}; + +// Kodak RAW. +class DcrTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kDcrImage; } + + virtual size_t RequestedSize() const { return 5000; } + + // Check two different cases, only need to fulfill one of the two: + // 1. signature at the (offset == 16) position of the file; + // 2. two tags (OriginalFileName and FirmwareVersion) can be found in the + // first requested bytes of the file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + // Case 1: has signature + const string kSignature( + "\x4b\x4f\x44\x41\x4b\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20", 16); + if (IsSignatureMatched(limited_source, 16 /* offset */, kSignature)) { + return true; + } + + // Case 2: search for tags in first requested bytes + string kIfdTags[2]; + if (use_big_endian) { + kIfdTags[0] = string("\x03\xe9\x00\x02", 4); // OriginalFileName + kIfdTags[1] = string("\x0c\xe5\x00\x02", 4); // FirmwareVersion + } else { + kIfdTags[0] = string("\xe9\x03\x02\x00", 4); // OriginalFileName + kIfdTags[1] = string("\xe5\x0c\x02\x00", 4); // FirmwareVersion + } + return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kIfdTags[0], NULL) && + IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kIfdTags[1], NULL); + } +}; + +// Digital Negative RAW. +class DngTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kDngImage; } + + virtual size_t RequestedSize() const { return 1024; } + + // Check multiple points: + // 1. valid endianness at the beginning of the file; + // 2. at least two dng specific tags in the first requested bytes of the + // file + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + // Search tags in first requested bytes and verify the order of them. + const int kTagsCount = 5; + string dng_tags[kTagsCount]; + if (use_big_endian) { + dng_tags[0] = + string("\xc6\x12\x00\x01\x00\x00\x00\x04", 8); // tag: 50706 + dng_tags[1] = + string("\xc6\x13\x00\x01\x00\x00\x00\x04", 8); // tag: 50707 + dng_tags[2] = string("\xc6\x14\x00\x02", 4); // tag: 50708 + dng_tags[3] = string("\xc6\x20", 2); // tag: 50720 + dng_tags[4] = + string("\xc6\x2d\x00\x04\x00\x00\x00\x01", 8); // tag: 50733 + } else { + dng_tags[0] = + string("\x12\xc6\x01\x00\x04\x00\x00\x00", 8); // tag: 50706 + dng_tags[1] = + string("\x13\xc6\x01\x00\x04\x00\x00\x00", 8); // tag: 50707 + dng_tags[2] = string("\x14\xc6\x02\x00", 4); // tag: 50708 + dng_tags[3] = string("\x20\xc6", 2); // tag: 50720 + dng_tags[4] = + string("\x2d\xc6\x04\x00\x01\x00\x00\x00", 8); // tag: 50733 + } + int tags_found = 0; + for (int i = 0; i < kTagsCount; ++i) { + if (IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + dng_tags[i], NULL)) { + tags_found++; + } + } + return tags_found >= 2; + } +}; + +// Kodak RAW. +class KdcTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kKdcImage; } + + virtual size_t RequestedSize() const { return 5000; } + + // Check two points: + // 1. valid endianness at the beginning of the file; + // 2. two tags (WhiteBalance and SerialNumber) in the first requested bytes. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + // Search in first requested bytes + const size_t kIfdTagsSize = 2; + string kIfdTags[kIfdTagsSize]; + if (use_big_endian) { + kIfdTags[0] = string("\xfa\x0d\x00\x01", 4); // WhiteBalance + kIfdTags[1] = string("\xfa\x00\x00\x02", 4); // SerialNumber + } else { + kIfdTags[0] = string("\x0d\xfa\x01\x00", 4); // WhiteBalance + kIfdTags[1] = string("\x00\xfa\x02\x00", 4); // SerialNumber + } + + return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kIfdTags[0], NULL) && + IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kIfdTags[1], NULL); + } +}; + +// Leaf RAW. +class MosTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kMosImage; } + + virtual size_t RequestedSize() const { return 5000; } + + // Check two points: + // 1. valid endianness at the beginning of the file; + // 2. signature "PKTS " in the first requested bytes. Note the + // "whitespace". It's important as they are special binary values. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(source, &use_big_endian)) { + return false; + } + + // Search kSignaturePKTS in first requested bytes + const string kSignaturePKTS("PKTS\x00\x00\x00\x001", 8); + return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kSignaturePKTS, NULL); + } +}; + +// Minolta RAW. +class MrwTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kMrwImage; } + + virtual size_t RequestedSize() const { return 4; } + + // Check only the signature at the beginning of the file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + const string kSignature("\0MRM", 4); + return IsSignatureMatched(limited_source, 0 /* offset */, kSignature); + } +}; + +// Check if the file contains a NRW signature "NRW " in the first requested +// bytes. Note the "whitespace". It's important as they are special binary +// values. +const size_t kRequestedSizeForNrwSignature = 4000; +bool ContainsNrwSignature(const RangeCheckedBytePtr& source) { + // Search for kSignatureNrw. + const string kSignatureNrw("NRW\x20\x20\x20", 6); + return IsSignatureFound(source, 0 /* offset */, kRequestedSizeForNrwSignature, + kSignatureNrw, NULL); +} + +// Checks if the file contains the signatures for Nikon formats: +// * the general Nikon singature "NIKON" string. +// * the ReferenceBlackWhite tag. +const size_t kRequestedSizeForNikonSignatures = 4000; +bool ContainsNikonSignatures(const RangeCheckedBytePtr& source, + const bool use_big_endian) { + const string kSignatureNikon("NIKON"); + const string kReferenceBlackWhiteTag = use_big_endian + ? string("\x02\x14\x00\x05", 4) + : string("\x14\x02\x05\x00", 4); + const std::vector<string> kSignatures = {kSignatureNikon, + kReferenceBlackWhiteTag}; + for (auto const& signature : kSignatures) { + if (!IsSignatureFound(source, 0, kRequestedSizeForNikonSignatures, + signature, NULL)) { + return false; + } + } + return true; +} + +// Nikon RAW (NEF extension). +class NefTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kNefImage; } + + virtual size_t RequestedSize() const { + return std::max(kRequestedSizeForNikonSignatures, + kRequestedSizeForNrwSignature); + } + + // Check multiple points: + // 1. valid endianness at the beginning of the file; + // 2. magic number at the (offset == 2) position of the file; + // 3. the signature "NIKON" in the requested bytes of the file; + // 4. the ReferenceBlackWhite tag in the requested bytes of the file; + // 5. does not contain the NRW signature. We may also check a special + // signature "RAW " similar to the NRW case, but we got issues in some + // special images that the signature locates in the middle of the file, and it + // costs too long time to check; + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + const unsigned short kTiffMagic = 0x2A; // NOLINT + if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, + kTiffMagic)) { + return false; + } + + return ContainsNikonSignatures(limited_source, use_big_endian) && + !ContainsNrwSignature(limited_source); // not NRW + } +}; + +// Nikon RAW (NRW extension). +class NrwTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kNrwImage; } + + virtual size_t RequestedSize() const { + return std::max(kRequestedSizeForNikonSignatures, + kRequestedSizeForNrwSignature); + } + + // Check multiple points: + // 1. valid endianness at the beginning of the file; + // 2. magic numbers at the (offset == 2 and offset == 4) positions of the + // file; + // 3. the signature "NIKON" in the first requested bytes of the file; + // 4. the ReferenceBlackWhite tag in the requested bytes of the file; + // 5. contains the NRW signature; + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + const unsigned short kTiffMagic = 0x2A; // NOLINT + const unsigned int kTiffOffset = 8; + if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, + kTiffMagic) || + !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian, + kTiffOffset)) { + return false; + } + + return ContainsNikonSignatures(limited_source, use_big_endian) && + ContainsNrwSignature(limited_source); + } +}; + +// Olympus RAW. +class OrfTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kOrfImage; } + + virtual size_t RequestedSize() const { return 3000; } + + // Check multiple points: + // 1. valid endianness at the beginning of the file; + // 2. tag at the (offset == 2) position of the file; + // 3. signature "OLYMP" in the first requested bytes. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + const size_t kTagSize = 2; + const unsigned short kTag[kTagSize] = {0x4F52, 0x5352}; // NOLINT + if (!(CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, + kTag[0]) || + CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, + kTag[1]))) { + return false; + } + + // Search for kSignatureOlymp in first requested bytes + const string kSignatureOlymp("OLYMP"); + return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kSignatureOlymp, NULL); + } +}; + +// Pentax RAW. +class PefTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kPefImage; } + + virtual size_t RequestedSize() const { return 1280; } + + // Check multiple points: + // 1. valid big endianness at the beginning of the file; + // 2. magic numbers at the (offset == 2 and offset==4) positions of the file; + // 3. signature "AOC " or "PENTAX " in first requested bytes. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(limited_source, &use_big_endian)) { + return false; + } + + const unsigned short kTiffMagic = 0x2A; // NOLINT + const unsigned int kTiffOffset = 8; + if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, + kTiffMagic) || + !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian, + kTiffOffset)) { + return false; + } + + // Search for kSignatureAOC or kSignaturePENTAX in first requested bytes + const string kSignatureAOC("\x41\x4f\x43\x00\x4d\x4d", 6); + const string kSignaturePENTAX("\x50\x45\x4e\x54\x41\x58\x20\x00", 8); + return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kSignatureAOC, NULL) || + IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), + kSignaturePENTAX, NULL); + } +}; + +// Apple format. +class QtkTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kQtkImage; } + + virtual size_t RequestedSize() const { return 8; } + + // Check only the signature at the beginning of the file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + const size_t kSignatureSize = 2; + const string kSignature[kSignatureSize] = { + string("qktk\x00\x00\x00\x08", 8), string("qktn\x00\x00\x00\x08", 8), + }; + return IsSignatureMatched(limited_source, 0 /* offset */, kSignature[0]) || + IsSignatureMatched(limited_source, 0 /* offset */, kSignature[1]); + } +}; + +// Fuji RAW. +class RafTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kRafImage; } + + virtual size_t RequestedSize() const { return 8; } + + // Check only the signature at the beginning of the file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + const string kSignature("FUJIFILM"); + return IsSignatureMatched(limited_source, 0 /* offset */, kSignature); + } +}; + +// Contax N RAW. +class RawContaxNTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kRawContaxNImage; } + + virtual size_t RequestedSize() const { return 36; } + + // Check only the signature at the (offset == 25) position of the + // file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + const string kSignature("ARECOYK"); + return IsSignatureMatched(limited_source, 25, kSignature); + } +}; + +// Panasonic RAW. +class Rw2TypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kRw2Image; } + + virtual size_t RequestedSize() const { return 4; } + + // Check two points: 1. valid endianness at the beginning of the + // file; 2. tag at the (offset == 2) position of the file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + bool use_big_endian; + if (!DetermineEndianness(source, &use_big_endian)) { + return false; + } + + const unsigned short kTag = 0x55; // NOLINT + return CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, + kTag); + } +}; + +// Sigma / Polaroid RAW. +class X3fTypeChecker : public TypeChecker { + public: + virtual RawImageTypes Type() const { return kX3fImage; } + + virtual size_t RequestedSize() const { return 4; } + + // Check only the signature at the beginning of the file. + virtual bool IsMyType(const RangeCheckedBytePtr& source) const { + // Limits the source length to the RequestedSize(), using it guarantees that + // we will not read more than this size from the source. + RangeCheckedBytePtr limited_source = + source.pointerToSubArray(0 /* pos */, RequestedSize()); + + const string kSignature("FOVb", 4); + return IsSignatureMatched(limited_source, 0 /* offset */, kSignature); + } +}; + +// This class contains the list of all type checkers. One should used this list +// as a whole to execute the image type recognition. +class TypeCheckerList { + public: + TypeCheckerList() { + // Add all supported RAW type checkers here. + checkers_.push_back(new ArwTypeChecker()); + checkers_.push_back(new Cr2TypeChecker()); + checkers_.push_back(new CrwTypeChecker()); + checkers_.push_back(new DcrTypeChecker()); + checkers_.push_back(new DngTypeChecker()); + checkers_.push_back(new KdcTypeChecker()); + checkers_.push_back(new MosTypeChecker()); + checkers_.push_back(new MrwTypeChecker()); + checkers_.push_back(new NefTypeChecker()); + checkers_.push_back(new NrwTypeChecker()); + checkers_.push_back(new OrfTypeChecker()); + checkers_.push_back(new PefTypeChecker()); + checkers_.push_back(new QtkTypeChecker()); + checkers_.push_back(new RafTypeChecker()); + checkers_.push_back(new RawContaxNTypeChecker()); + checkers_.push_back(new Rw2TypeChecker()); + checkers_.push_back(new X3fTypeChecker()); + + // Sort the checkers by the ascending RequestedSize() to get better + // performance when checking type. + std::sort(checkers_.begin(), checkers_.end(), TypeChecker::Compare); + } + + ~TypeCheckerList() { + for (size_t i = 0; i < checkers_.size(); ++i) { + delete checkers_[i]; + checkers_[i] = NULL; + } + } + + // Returns the type of source data. If it can not be identified, returns + // kNonRawImage. + RawImageTypes GetType(const RangeCheckedBytePtr& source) const { + for (size_t i = 0; i < checkers_.size(); ++i) { + if (checkers_[i]->IsMyType(source)) { + return checkers_[i]->Type(); + } + } + return kNonRawImage; + } + + // Returns the maximum size of requested size of data for identifying image + // type using this class. The class guarantees that it will not read more than + // this size. + size_t RequestedSize() const { + assert(!checkers_.empty()); + // The checkers_ is ascending sorted. The last element is the maximum. + return checkers_.back()->RequestedSize(); + } + + private: + std::vector<TypeChecker*> checkers_; +}; + +} // namespace + +bool IsRaw(const RawImageTypes type) { + switch (type) { + // Non-RAW-image type + case kNonRawImage: { + return false; + } + + // Raw image types + case kArwImage: + case kCr2Image: + case kCrwImage: + case kDcrImage: + case kDngImage: + case kKdcImage: + case kMosImage: + case kMrwImage: + case kNefImage: + case kNrwImage: + case kOrfImage: + case kPefImage: + case kQtkImage: + case kRafImage: + case kRawContaxNImage: + case kRw2Image: + case kX3fImage: { + return true; + } + + default: { + // Unsupported type! + assert(false); + } + } + return false; +} + +RawImageTypes RecognizeRawImageTypeLite(const RangeCheckedBytePtr& source) { + return TypeCheckerList().GetType(source); +} + +size_t GetNumberOfBytesForIsRawLite() { + return TypeCheckerList().RequestedSize(); +} + +bool IsRawLite(const RangeCheckedBytePtr& source) { + return IsRaw(RecognizeRawImageTypeLite(source)); +} + +} // namespace image_type_recognition +} // namespace piex diff --git a/src/image_type_recognition/image_type_recognition_lite.h b/src/image_type_recognition/image_type_recognition_lite.h new file mode 100644 index 0000000..da9caf5 --- /dev/null +++ b/src/image_type_recognition/image_type_recognition_lite.h @@ -0,0 +1,79 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// +// +// This file offers functions to determine the type of binary input source. The +// type recognition here is not 100% accurate, it only offers a quick and rough +// check about the input source. The general functions use RangeCheckedBytePtr +// as input, there are also linux only functions that use StringPiece as input. +// A linux only IsRawLite() method is also implemented. +// The "lite" implementation focuses on performance and guarantees to not read +// more than specified by GetNumberOfBytesForIsRawLite. + +#ifndef PIEX_IMAGE_TYPE_RECOGNITION_IMAGE_TYPE_RECOGNITION_LITE_H_ +#define PIEX_IMAGE_TYPE_RECOGNITION_IMAGE_TYPE_RECOGNITION_LITE_H_ + +#include <stddef.h> + +#include "src/binary_parse/range_checked_byte_ptr.h" + +namespace piex { +namespace image_type_recognition { + +// Type of RAW images. Keep the order in alphabet. +enum RawImageTypes { + // Non-RAW-image type + kNonRawImage = 0, + + // raw image types + kArwImage, + kCr2Image, + kCrwImage, + kDcrImage, + kDngImage, + kKdcImage, + kMosImage, + kMrwImage, + kNefImage, + kNrwImage, + kOrfImage, + kPefImage, + kQtkImage, + kRafImage, + kRawContaxNImage, + kRw2Image, + kX3fImage, +}; + +// Checks if the given type is a RAW image type. +bool IsRaw(const RawImageTypes type); + +// This function will check the source and return the corresponding image type. +// If the source is not a recognizable type, this function will return +// kNonRawImage. +RawImageTypes RecognizeRawImageTypeLite( + const binary_parse::RangeCheckedBytePtr& source); + +// Returns the maximum number of bytes needed to recognize a RAW image type in +// IsRawLite(). +size_t GetNumberOfBytesForIsRawLite(); + +// This function will check if the source belongs to one of the known RAW types. +bool IsRawLite(const binary_parse::RangeCheckedBytePtr& source); + +} // namespace image_type_recognition +} // namespace piex + +#endif // PIEX_IMAGE_TYPE_RECOGNITION_IMAGE_TYPE_RECOGNITION_LITE_H_ diff --git a/src/piex.cc b/src/piex.cc new file mode 100644 index 0000000..83906f7 --- /dev/null +++ b/src/piex.cc @@ -0,0 +1,544 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "src/piex.h" + +#include <cstdint> +#include <set> +#include <vector> + +#include "src/binary_parse/range_checked_byte_ptr.h" +#include "src/image_type_recognition/image_type_recognition_lite.h" +#include "src/tiff_parser.h" + +namespace piex { +namespace { + +using binary_parse::RangeCheckedBytePtr; +using image_type_recognition::RawImageTypes; +using image_type_recognition::RecognizeRawImageTypeLite; +using tiff_directory::Endian; +using tiff_directory::TiffDirectory; + +Error GetPreviewData(const TagSet& extended_tags, + const std::uint32_t tiff_offset, + const std::uint32_t number_of_ifds, + StreamInterface* stream, TiffContent* tiff_content, + PreviewImageData* preview_image_data) { + TagSet desired_tags = {kExifTagColorSpace, kExifTagDateTimeOriginal, + kExifTagExposureTime, kExifTagFnumber, + kExifTagFocalLength, kExifTagGps, + kExifTagIsoSpeed, kTiffTagDateTime, + kTiffTagExifIfd, kTiffTagMake, + kTiffTagModel, kTiffTagOrientation}; + desired_tags.insert(extended_tags.cbegin(), extended_tags.cend()); + + TiffParser tiff_parser(stream, tiff_offset); + Error error = tiff_parser.Parse(desired_tags, number_of_ifds, tiff_content); + if (error != kOk) { + return error; + } + if (tiff_content->tiff_directory.empty()) { + // Returns kFail if the stream does not contain any TIFF structure. + return kFail; + } + return tiff_parser.GetPreviewImageData(*tiff_content, preview_image_data); +} + +Error GetPreviewData(const TagSet& extended_tags, + const std::uint32_t number_of_ifds, + StreamInterface* stream, + PreviewImageData* preview_image_data) { + const std::uint32_t kTiffOffset = 0; + TiffContent tiff_content; + return GetPreviewData(extended_tags, kTiffOffset, number_of_ifds, stream, + &tiff_content, preview_image_data); +} + +Error GetExifData(const std::uint32_t exif_offset, StreamInterface* stream, + PreviewImageData* preview_image_data) { + const TagSet kExtendedTags = {kTiffTagImageWidth, kTiffTagImageLength}; + const std::uint32_t kNumberOfIfds = 1; + TiffContent tiff_content; + return GetPreviewData(kExtendedTags, exif_offset, kNumberOfIfds, stream, + &tiff_content, preview_image_data); +} + +Error GetExifIfd(const Endian endian, StreamInterface* stream, + TiffDirectory* exif_ifd) { + const std::uint32_t kTiffOffset = 0; + std::uint32_t offset_to_ifd; + if (!Get32u(stream, sizeof(offset_to_ifd), endian, &offset_to_ifd)) { + return kFail; + } + + std::uint32_t next_ifd_offset; + TiffDirectory tiff_ifd(endian); + Error error = + ParseDirectory(kTiffOffset, offset_to_ifd, endian, {kTiffTagExifIfd}, + stream, &tiff_ifd, &next_ifd_offset); + if (error != kOk) { + return error; + } + + std::uint32_t exif_offset; + if (!tiff_ifd.Get(kTiffTagExifIfd, &exif_offset)) { + return kUnsupported; + } + + return ParseDirectory(kTiffOffset, exif_offset, endian, {kExifTagMakernotes}, + stream, exif_ifd, &next_ifd_offset); +} + +Error GetMakernoteIfd(const TiffDirectory& exif_ifd, const Endian endian, + StreamInterface* stream, std::uint32_t* makernote_offset, + TiffDirectory* makernote_ifd) { + std::uint32_t makernote_length; + if (!exif_ifd.GetOffsetAndLength(kExifTagMakernotes, + tiff_directory::TIFF_TYPE_UNDEFINED, + makernote_offset, &makernote_length)) { + return kUnsupported; + } + + std::uint32_t next_ifd_offset; + return ParseDirectory(*makernote_offset, *makernote_offset + 12, endian, + {kOlymTagCameraSettings, kOlymTagRawProcessing}, stream, + makernote_ifd, &next_ifd_offset); +} + +Error GetCameraSettingsIfd(const TiffDirectory& makernote_ifd, + const std::uint32_t makernote_offset, + const Endian endian, StreamInterface* stream, + TiffDirectory* camera_settings_ifd) { + std::uint32_t camera_settings_offset; + std::uint32_t camera_settings_length; + if (!makernote_ifd.GetOffsetAndLength( + kOlymTagCameraSettings, tiff_directory::TIFF_IFD, + &camera_settings_offset, &camera_settings_length)) { + return kUnsupported; + } + + std::uint32_t next_ifd_offset; + if (!Get32u(stream, camera_settings_offset, endian, + &camera_settings_offset)) { + return kFail; + } + return ParseDirectory(makernote_offset, + makernote_offset + camera_settings_offset, endian, + {kTiffTagBitsPerSample, kTiffTagImageLength}, stream, + camera_settings_ifd, &next_ifd_offset); +} + +Error GetRawProcessingIfd(const TagSet& desired_tags, + const TiffDirectory& makernote_ifd, + const std::uint32_t makernote_offset, + const Endian endian, StreamInterface* stream, + TiffDirectory* raw_processing_ifd) { + std::uint32_t raw_processing_offset; + std::uint32_t raw_processing_length; + if (!makernote_ifd.GetOffsetAndLength( + kOlymTagRawProcessing, tiff_directory::TIFF_IFD, + &raw_processing_offset, &raw_processing_length)) { + return kUnsupported; + } + + std::uint32_t next_ifd_offset; + if (!Get32u(stream, raw_processing_offset, endian, &raw_processing_offset)) { + return kFail; + } + + return ParseDirectory( + makernote_offset, makernote_offset + raw_processing_offset, endian, + desired_tags, stream, raw_processing_ifd, &next_ifd_offset); +} + +// Retrieves the preview image offset and length from the camera settings and +// the 'full_width' and 'full_height' from the raw processing ifd in 'stream'. +// Returns kUnsupported if the camera settings are missing, since it is not able +// to get the preview data. +Error GetOlympusPreviewImage(StreamInterface* stream, + PreviewImageData* preview_image_data) { + Endian endian; + if (!GetEndianness(0 /* tiff offset */, stream, &endian)) { + return kFail; + } + + TiffDirectory exif_ifd(endian); + Error error = GetExifIfd(endian, stream, &exif_ifd); + if (error != kOk) { + return error; + } + + std::uint32_t makernote_offset; + TiffDirectory makernote_ifd(endian); + error = GetMakernoteIfd(exif_ifd, endian, stream, &makernote_offset, + &makernote_ifd); + if (error != kOk) { + return error; + } + + TiffDirectory camera_settings_ifd(endian); + error = GetCameraSettingsIfd(makernote_ifd, makernote_offset, endian, stream, + &camera_settings_ifd); + if (error != kOk) { + return error; + } + + const std::uint32_t kPreviewOffset = 0x0101; + const std::uint32_t kPreviewLength = 0x0102; + if (!camera_settings_ifd.Has(kPreviewOffset) || + !camera_settings_ifd.Has(kPreviewLength)) { + return kUnsupported; + } + + camera_settings_ifd.Get(kPreviewOffset, &preview_image_data->jpeg_offset); + preview_image_data->jpeg_offset += makernote_offset; + camera_settings_ifd.Get(kPreviewLength, &preview_image_data->jpeg_length); + + // Get the crop size from the raw processing ifd. + TiffDirectory raw_processing_ifd(endian); + error = GetRawProcessingIfd({kOlymTagAspectFrame}, makernote_ifd, + makernote_offset, endian, stream, + &raw_processing_ifd); + if (error != kOk) { + return error; + } + + if (raw_processing_ifd.Has(kOlymTagAspectFrame)) { + std::vector<std::uint32_t> aspect_frame(4); + if (raw_processing_ifd.Get(kOlymTagAspectFrame, &aspect_frame) && + aspect_frame[2] > aspect_frame[0] && + aspect_frame[3] > aspect_frame[1]) { + preview_image_data->full_width = aspect_frame[2] - aspect_frame[0] + 1; + preview_image_data->full_height = aspect_frame[3] - aspect_frame[1] + 1; + if (preview_image_data->full_width < preview_image_data->full_height) { + std::swap(preview_image_data->full_width, + preview_image_data->full_height); + } + } + } + + return kOk; +} + +// Parses the Fuji Cfa header for the image width and height. +bool RafGetDimension(StreamInterface* stream, std::uint32_t* width, + std::uint32_t* height) { + const Endian endian = tiff_directory::kBigEndian; + std::uint32_t cfa_header_index = 0; // actual position in the cfa header. + std::uint32_t cfa_header_entries = 0; + if (!Get32u(stream, 92 /* cfa header offset */, endian, &cfa_header_index) || + !Get32u(stream, cfa_header_index, endian, &cfa_header_entries)) { + return false; + } + + // Add 4 to point to the actual read position in the cfa header. + cfa_header_index += 4; + + for (std::uint32_t i = 0; i < cfa_header_entries; ++i) { + std::uint16_t id = 0; + std::uint16_t length = 0; + if (!Get16u(stream, cfa_header_index, endian, &id) || + !Get16u(stream, cfa_header_index + 2, endian, &length)) { + return false; + } + + std::uint16_t tmp_width = 0; + std::uint16_t tmp_height = 0; + if (id == 0x0111 /* tags the crop dimensions */ && + Get16u(stream, cfa_header_index + 4, endian, &tmp_height) && + Get16u(stream, cfa_header_index + 6, endian, &tmp_width)) { + *width = tmp_width; + *height = tmp_height; + return true; + } + cfa_header_index += 4 + length; + } + return false; +} + +Error ArwGetPreviewData(StreamInterface* stream, + PreviewImageData* preview_image_data) { + const TagSet extended_tags = {kExifTagHeight, kExifTagWidth, + kTiffTagJpegByteCount, kTiffTagJpegOffset, + kTiffTagSubIfd}; + // This camera maker doesn't embed a full jpeg. + preview_image_data->full_preview = false; + const std::uint32_t kNumberOfIfds = 1; + return GetPreviewData(extended_tags, kNumberOfIfds, stream, + preview_image_data); +} + +Error Cr2GetPreviewData(StreamInterface* stream, + PreviewImageData* preview_image_data) { + const TagSet extended_tags = {kExifTagHeight, kExifTagWidth, + kTiffTagStripByteCounts, kTiffTagStripOffsets}; + // This camera maker embeds at least a full sized jpeg. + preview_image_data->full_preview = true; + const std::uint32_t kNumberOfIfds = 1; + return GetPreviewData(extended_tags, kNumberOfIfds, stream, + preview_image_data); +} + +Error DngGetPreviewData(StreamInterface* stream, + PreviewImageData* preview_image_data) { + const TagSet extended_tags = { + kExifTagDefaultCropSize, kTiffTagCompression, kTiffTagPhotometric, + kTiffTagStripByteCounts, kTiffTagStripOffsets, kTiffTagSubIfd}; + + TiffContent tiff_content; + const std::uint32_t kNumberOfIfds = 4; + Error error = GetPreviewData(extended_tags, 0, kNumberOfIfds, stream, + &tiff_content, preview_image_data); + if (error != kOk) { + return error; + } + + // Find the largest jpeg compressed preview image. + std::uint32_t jpeg_length = 0; + for (const auto& ifd : tiff_content.tiff_directory[0].GetSubDirectories()) { + std::uint32_t compression; + std::uint32_t photometric_interpretation; + if (!ifd.Get(kTiffTagPhotometric, &photometric_interpretation) || + !ifd.Get(kTiffTagCompression, &compression)) { + continue; + } + if (photometric_interpretation == 6 /* YCbCr */ && + (compression == 6 /* JPEG(old) */ || compression == 7 /* JPEG */)) { + std::vector<std::uint32_t> strip_offsets; + std::vector<std::uint32_t> byte_counts; + if (ifd.Get(kTiffTagStripOffsets, &strip_offsets) && + ifd.Get(kTiffTagStripByteCounts, &byte_counts) && + strip_offsets.size() == 1 && byte_counts.size() == 1 && + byte_counts[0] > jpeg_length) { + jpeg_length = byte_counts[0]; + preview_image_data->jpeg_length = jpeg_length; + preview_image_data->jpeg_offset = strip_offsets[0]; + } + } + } + + // A 'jpeg_length' of 0 indicates that we could not find any jpeg preview. + if (jpeg_length == 0) { + return kUnsupported; + } + + // This format doesn't necessarily embed a full jpeg. + preview_image_data->full_preview = false; + + return kOk; +} + +Error NefGetPreviewData(StreamInterface* stream, + PreviewImageData* preview_image_data) { + const TagSet extended_tags = {kTiffTagImageWidth, kTiffTagImageLength, + kTiffTagJpegByteCount, kTiffTagJpegOffset, + kTiffTagSubIfd}; + // This camera maker embeds a full jpeg. + preview_image_data->full_preview = true; + const std::uint32_t kNumberOfIfds = 2; + Error error = + GetPreviewData(extended_tags, kNumberOfIfds, stream, preview_image_data); + if (error != kOk) { + return error; + } + + // The Nikon RAW data provides the dimensions of the sensor image, which are + // slightly larger than the dimensions of the preview image. In order to + // determine the correct full width and height of the image, the preview image + // size needs to be taken into account. Based on experiments the preview image + // dimensions must be at least 90% of the sensor image dimensions to let it be + // a full size preview image. + const float kEpsilon = 0.9f; + + std::uint16_t width; + std::uint16_t height; + if (!GetPreviewDimensions(preview_image_data->jpeg_offset, stream, &width, + &height) || + preview_image_data->full_width == 0 || + preview_image_data->full_height == 0) { + return kUnsupported; + } + + if (static_cast<float>(width) / + static_cast<float>(preview_image_data->full_width) > + kEpsilon || + static_cast<float>(height) / + static_cast<float>(preview_image_data->full_height) > + kEpsilon) { + preview_image_data->full_width = width; + preview_image_data->full_height = height; + } + return kOk; +} + +Error OrfGetPreviewData(StreamInterface* stream, + PreviewImageData* preview_image_data) { + // This camera maker embeds a full jpeg. + preview_image_data->full_preview = true; + + // Omit kUnsupported, because the exif data does not contain any preview + // image. + if (GetExifData(0, stream, preview_image_data) == kFail) { + return kFail; + } + + return GetOlympusPreviewImage(stream, preview_image_data); +} + +Error RafGetPreviewData(StreamInterface* stream, + PreviewImageData* preview_image_data) { + // Parse the Fuji RAW header to get the offset and length of the preview + // image, which contains the Exif information. + const Endian endian = tiff_directory::kBigEndian; + std::uint32_t jpeg_offset = 0; + std::uint32_t jpeg_length = 0; + if (!Get32u(stream, 84 /* jpeg offset */, endian, &jpeg_offset) || + !Get32u(stream, 88 /* jpeg length */, endian, &jpeg_length)) { + return kFail; + } + + if (!RafGetDimension(stream, &preview_image_data->full_width, + &preview_image_data->full_height)) { + return kFail; + } + + // Parse the Exif information from the preview image. Omit kUnsupported, + // because the exif data does not contain any preview image. + const std::uint32_t exif_offset = jpeg_offset + 12; + if (GetExifData(exif_offset, stream, preview_image_data) == kFail) { + return kFail; + } + + // Merge the Exif data with the RAW data to form the preview_image_data. + preview_image_data->jpeg_offset = jpeg_offset; + preview_image_data->jpeg_length = jpeg_length; + + // This camera maker doesn't embed a full jpeg. + preview_image_data->full_preview = false; + + return kOk; +} + +Error Rw2GetPreviewData(StreamInterface* stream, + PreviewImageData* preview_image_data) { + const TagSet extended_tags = {kPanaTagTopBorder, kPanaTagLeftBorder, + kPanaTagBottomBorder, kPanaTagRightBorder, + kPanaTagIso, kPanaTagJpegImage, + kTiffTagJpegByteCount, kTiffTagJpegOffset}; + + // This camera maker embeds not a full jpeg. + preview_image_data->full_preview = false; + + // Parse the RAW data to get the ISO, offset and length of the preview image, + // which contains the Exif information. + const std::uint32_t kNumberOfIfds = 1; + PreviewImageData preview_data; + Error error = + GetPreviewData(extended_tags, kNumberOfIfds, stream, &preview_data); + if (error != kOk) { + return error; + } + + // Parse the Exif information from the preview image. Omit kUnsupported, + // because the exif data does not contain any preview image. + const std::uint32_t exif_offset = preview_data.jpeg_offset + 12; + if (GetExifData(exif_offset, stream, preview_image_data) == kFail) { + return kFail; + } + + // Merge the Exif data with the RAW data to form the preview_image_data. + preview_image_data->jpeg_offset = preview_data.jpeg_offset; + preview_image_data->jpeg_length = preview_data.jpeg_length; + preview_image_data->iso = preview_data.iso; + preview_image_data->full_width = preview_data.full_width; + preview_image_data->full_height = preview_data.full_height; + + return kOk; +} + +} // namespace + +size_t BytesRequiredForIsRaw() { + return image_type_recognition::GetNumberOfBytesForIsRawLite(); +} + +bool IsRaw(StreamInterface* data) { + const size_t bytes = BytesRequiredForIsRaw(); + if (data == nullptr) { + return false; + } + + // Read required number of bytes into a vector. + std::vector<std::uint8_t> file_header(bytes); + if (data->GetData(0, file_header.size(), file_header.data()) != kOk) { + return false; + } + + RangeCheckedBytePtr data_buffer(file_header.data(), file_header.size()); + + return image_type_recognition::IsRawLite(data_buffer); +} + +Error GetPreviewImageData(StreamInterface* data, + PreviewImageData* preview_image_data) { + const size_t bytes = BytesRequiredForIsRaw(); + if (data == nullptr || bytes == 0) { + return kFail; + } + + std::vector<std::uint8_t> file_header(bytes); + Error error = data->GetData(0, file_header.size(), file_header.data()); + if (error != kOk) { + return error; + } + RangeCheckedBytePtr header_buffer(file_header.data(), file_header.size()); + + switch (RecognizeRawImageTypeLite(header_buffer)) { + case image_type_recognition::kArwImage: + return ArwGetPreviewData(data, preview_image_data); + case image_type_recognition::kCr2Image: + return Cr2GetPreviewData(data, preview_image_data); + case image_type_recognition::kDngImage: + return DngGetPreviewData(data, preview_image_data); + case image_type_recognition::kNefImage: + case image_type_recognition::kNrwImage: + return NefGetPreviewData(data, preview_image_data); + case image_type_recognition::kOrfImage: + return OrfGetPreviewData(data, preview_image_data); + case image_type_recognition::kRafImage: + return RafGetPreviewData(data, preview_image_data); + case image_type_recognition::kRw2Image: + return Rw2GetPreviewData(data, preview_image_data); + default: + return kUnsupported; + } +} + +std::vector<std::string> SupportedExtensions() { + std::vector<std::string> extensions; + extensions.push_back("ARW"); + extensions.push_back("CR2"); + extensions.push_back("DNG"); + extensions.push_back("NEF"); + extensions.push_back("NRW"); + extensions.push_back("ORF"); + extensions.push_back("RAF"); + extensions.push_back("RW2"); + return extensions; +} + +} // namespace piex diff --git a/src/piex.h b/src/piex.h new file mode 100644 index 0000000..39ad578 --- /dev/null +++ b/src/piex.h @@ -0,0 +1,80 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// +// +// The purpose of the preview-image-extractor (piex) is to find and extract the +// largest JPEG compressed preview image contained in a RAW file. +// For details: go/piex +// +// Even for unsupported RAW files we want to provide high quality images using a +// dedicated, small and portable library. That is possible by taking the preview +// image contained in all RAW files. +// +// Typically a preview image is stored as JPEG compressed, full size (or at +// least half size) image in a RAW file. +// +// A typical client code snippet: +// +// // In C++ +// PreviewImageData image_data; +// unique_ptr<StreamInterface> data_stream(new DataStream(file)); +// Error err = GetPreviewImageData(data_stream.get(), &image_data)); +// if (err == Error::kFail) { +// // The input data seems to be broken. +// return; +// } else if (err == Error::kUnsupported) { +// // The input data is not supported. +// return; +// } +// +// // Uncompress the JPEG as usual, e.g. on Android with the BitmapFactory: +// // In Java +// Bitmap bitmap = BitmapFactory.decodeByteArray( +// file.at(image_data.jpeg_offset), image_data.jpeg_length); + +#ifndef PIEX_PIEX_H_ +#define PIEX_PIEX_H_ + +#include <string> +#include <vector> + +#include "src/piex_types.h" + +namespace piex { + +// Returns the maximum number of bytes IsRaw() will read from the stream. +size_t BytesRequiredForIsRaw(); + +// Returns true if 'data' contains a RAW file format, even if it is not +// supported by Piex, false otherwise. Reads at most BytesRequiredForIsRaw() +// from the stream. +bool IsRaw(StreamInterface* data); + +// Gets the largest JPEG compressed preview image data. On success +// 'preview_image_data' contains image metadata, the unverified length and the +// offset to a JPEG compressed image from the beginning of the file. +// +// Returns 'kFail' when something with the data is wrong. +// Returns 'kUnsupported' if no preview image data was found. +Error GetPreviewImageData(StreamInterface* data, + PreviewImageData* preview_image_data); + +// Returns a vector of upper case file extensions, which are used as a first +// step to quickly guess a supported file format. +std::vector<std::string> SupportedExtensions(); + +} // namespace piex + +#endif // PIEX_PIEX_H_ diff --git a/src/piex_types.h b/src/piex_types.h new file mode 100644 index 0000000..58a1567 --- /dev/null +++ b/src/piex_types.h @@ -0,0 +1,98 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef PIEX_PIEX_TYPES_H_ +#define PIEX_PIEX_TYPES_H_ + +#include <cstdint> +#include <string> + +namespace piex { + +// Defines the error codes used by piex. +enum Error { + kOk, + kFail, + kUnsupported, +}; + +// Contains relevant image information as well as the 'jpeg_offset' and the +// 'jpeg_length' which are used to obtain the JPEG compressed preview image. +// 'full_width' and 'full_height' are correctly cropped but not rotated. +struct PreviewImageData { + enum ColorSpace { + kSrgb, + kAdobeRgb, + }; + struct Rational { + std::uint32_t numerator = 0; + std::uint32_t denominator = 1; + }; + struct Gps { + // Indicates if the gps data is valid to use. + bool is_valid = false; + + char latitude_ref; // Either 'N' or 'S' + Rational latitude[3]; + char longitude_ref; // Either 'E' or 'W' + Rational longitude[3]; + bool altitude_ref = false; // true is above, false below sea level + Rational altitude; + }; + + // Required data to find the preview image and to handle it correctly. + std::uint32_t jpeg_offset = 0; + std::uint32_t jpeg_length = 0; + std::uint32_t exif_orientation = 1; // horizontal as default + ColorSpace color_space = kSrgb; + + // Optional Exif metadata that describes the image. + std::uint32_t full_width = 0; + std::uint32_t full_height = 0; + std::string maker; + std::string model; + std::string date_time; + std::uint32_t iso = 0; + Rational exposure_time; + Rational fnumber; + Rational focal_length; + Gps gps; + + // Hint of the extracted preview size compared to the actual RAW image. + // If full_preview == true, then the preview suppose to have a similar or + // larger size then the RAW image, else the preview image might be + // significantly smaller, e.g. only half of the RAW image size. + bool full_preview; +}; + +// Defines the StreamInterface that needs to be implemented by the client. +class StreamInterface { + public: + virtual ~StreamInterface() {} + + // Reads 'length' amount of bytes from 'offset' to 'data'. The 'data' buffer + // provided by the caller, guaranteed to be at least "length" bytes long. + // On 'kOk' the 'data' pointer contains 'length' valid bytes beginning at + // 'offset' bytes from the start of the stream. + // Returns 'kFail' if 'offset' + 'length' exceeds the stream and does not + // change the contents of 'data'. + virtual Error GetData(const size_t offset, const size_t length, + std::uint8_t* data) = 0; +}; + +} // namespace piex + +#endif // PIEX_PIEX_TYPES_H_ diff --git a/src/tiff_directory/tiff_directory.cc b/src/tiff_directory/tiff_directory.cc new file mode 100644 index 0000000..c0f2c49 --- /dev/null +++ b/src/tiff_directory/tiff_directory.cc @@ -0,0 +1,282 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "src/tiff_directory/tiff_directory.h" + +#include <assert.h> +#include <climits> + +#include "src/binary_parse/range_checked_byte_ptr.h" + +namespace piex { +namespace tiff_directory { +namespace { + +using binary_parse::Get16s; +using binary_parse::Get16u; +using binary_parse::Get32s; +using binary_parse::Get32u; +using binary_parse::MemoryStatus; +using binary_parse::RANGE_CHECKED_BYTE_SUCCESS; +using binary_parse::RangeCheckedBytePtr; + +} // namespace + +TiffDirectory::TiffDirectory(Endian endian) : endian_(endian) {} + +bool TiffDirectory::Has(const Tag tag) const { + return directory_entries_.count(tag) == 1; +} + +bool TiffDirectory::Get(const Tag tag, std::vector<std::uint8_t>* value) const { + const DirectoryEntry* directory_entry = Find(tag); + if (directory_entry == NULL || + (directory_entry->type != TIFF_TYPE_BYTE && + directory_entry->type != TIFF_TYPE_UNDEFINED)) { + return false; + } + + *value = directory_entry->value; + return true; +} + +bool TiffDirectory::Get(const Tag tag, std::string* value) const { + const DirectoryEntry* directory_entry = Find(tag); + if (directory_entry == NULL || directory_entry->type != TIFF_TYPE_ASCII) { + return false; + } + *value = + std::string(directory_entry->value.begin(), directory_entry->value.end()); + return true; +} + +bool TiffDirectory::Get(const Tag tag, std::uint32_t* value) const { + std::vector<std::uint32_t> my_values; + if (!Get(tag, &my_values) || my_values.size() != 1) { + return false; + } + *value = my_values[0]; + return true; +} + +bool TiffDirectory::Get(const Tag tag, + std::vector<std::uint32_t>* value) const { + const DirectoryEntry* directory_entry = Find(tag); + if (directory_entry == NULL || (directory_entry->type != TIFF_TYPE_SHORT && + directory_entry->type != TIFF_TYPE_LONG)) { + return false; + } + + RangeCheckedBytePtr value_ptr(&directory_entry->value[0], + directory_entry->value.size()); + std::vector<std::uint32_t> my_value(directory_entry->count); + const bool is_big_endian = (endian_ == kBigEndian); + + MemoryStatus err = RANGE_CHECKED_BYTE_SUCCESS; + for (std::uint32_t c = 0; c < directory_entry->count; ++c) { + if (directory_entry->type == TIFF_TYPE_SHORT) { + my_value[c] = Get16u(value_ptr + c * 2, is_big_endian, &err); + } else { + my_value[c] = Get32u(value_ptr + c * 4, is_big_endian, &err); + } + } + if (err != RANGE_CHECKED_BYTE_SUCCESS) { + return false; + } + + *value = my_value; + return true; +} + +bool TiffDirectory::Get(const Tag tag, Rational* value) const { + std::vector<Rational> my_values; + if (!Get(tag, &my_values) || my_values.size() != 1) { + return false; + } + *value = my_values[0]; + return true; +} + +bool TiffDirectory::Get(const Tag tag, std::vector<Rational>* value) const { + const DirectoryEntry* directory_entry = Find(tag); + if (directory_entry == NULL || + (directory_entry->type != TIFF_TYPE_SHORT && + directory_entry->type != TIFF_TYPE_LONG && + directory_entry->type != TIFF_TYPE_RATIONAL)) { + return false; + } + + RangeCheckedBytePtr value_ptr(&directory_entry->value[0], + directory_entry->value.size()); + std::vector<Rational> my_value(directory_entry->count); + const bool is_big_endian = (endian_ == kBigEndian); + + MemoryStatus err = RANGE_CHECKED_BYTE_SUCCESS; + for (std::uint32_t c = 0; c < directory_entry->count; ++c) { + switch (directory_entry->type) { + case TIFF_TYPE_SHORT: { + my_value[c].numerator = Get16u(value_ptr + c * 2, is_big_endian, &err); + my_value[c].denominator = 1; + break; + } + case TIFF_TYPE_LONG: { + my_value[c].numerator = Get32u(value_ptr + c * 4, is_big_endian, &err); + my_value[c].denominator = 1; + break; + } + case TIFF_TYPE_RATIONAL: { + my_value[c].numerator = Get32u(value_ptr + c * 8, is_big_endian, &err); + my_value[c].denominator = + Get32u(value_ptr + c * 8 + 4, is_big_endian, &err); + if (my_value[c].denominator == 0) { + return false; + } + break; + } + } + } + if (err != RANGE_CHECKED_BYTE_SUCCESS) { + return false; + } + + *value = my_value; + return true; +} + +bool TiffDirectory::Get(const Tag tag, SRational* value) const { + std::vector<SRational> my_values; + if (!Get(tag, &my_values) || my_values.size() != 1) { + return false; + } + *value = my_values[0]; + return true; +} + +bool TiffDirectory::Get(const Tag tag, std::vector<SRational>* value) const { + const DirectoryEntry* directory_entry = Find(tag); + if (directory_entry == NULL || + (directory_entry->type != TIFF_TYPE_SSHORT && + directory_entry->type != TIFF_TYPE_SLONG && + directory_entry->type != TIFF_TYPE_SRATIONAL)) { + return false; + } + + RangeCheckedBytePtr value_ptr(&directory_entry->value[0], + directory_entry->value.size()); + std::vector<SRational> my_value(directory_entry->count); + const bool is_big_endian = (endian_ == kBigEndian); + + MemoryStatus err = RANGE_CHECKED_BYTE_SUCCESS; + for (std::uint32_t c = 0; c < directory_entry->count; ++c) { + switch (directory_entry->type) { + case TIFF_TYPE_SSHORT: { + my_value[c].numerator = Get16s(value_ptr + c * 2, is_big_endian, &err); + my_value[c].denominator = 1; + break; + } + case TIFF_TYPE_SLONG: { + my_value[c].numerator = Get32s(value_ptr + c * 4, is_big_endian, &err); + my_value[c].denominator = 1; + break; + } + case TIFF_TYPE_SRATIONAL: { + my_value[c].numerator = Get32s(value_ptr + c * 8, is_big_endian, &err); + my_value[c].denominator = + Get32s(value_ptr + c * 8 + 4, is_big_endian, &err); + if (my_value[c].denominator == 0) { + return false; + } + break; + } + } + } + if (err != RANGE_CHECKED_BYTE_SUCCESS) { + return false; + } + + *value = my_value; + return true; +} + +bool TiffDirectory::GetOffsetAndLength(const Tag tag, const Type type, + std::uint32_t* offset, + std::uint32_t* length) const { + const DirectoryEntry* directory_entry = Find(tag); + if (directory_entry == NULL || directory_entry->type != type) { + return false; + } + *offset = directory_entry->offset; + *length = directory_entry->value.size(); + return true; +} + +void TiffDirectory::AddEntry(const Tag tag, const Type type, + const std::uint32_t count, + const std::uint32_t offset, + const std::vector<std::uint8_t>& value) { + assert(SizeOfType(type, NULL /* success */) * count == value.size()); + + const DirectoryEntry directory_entry = {type, count, offset, value}; + directory_entries_[tag] = directory_entry; + tag_order_.push_back(tag); +} + +void TiffDirectory::AddSubDirectory(const TiffDirectory& sub_directory) { + sub_directories_.push_back(sub_directory); +} + +const std::vector<TiffDirectory>& TiffDirectory::GetSubDirectories() const { + return sub_directories_; +} + +const TiffDirectory::DirectoryEntry* TiffDirectory::Find(const Tag tag) const { + std::map<Tag, DirectoryEntry>::const_iterator iter = + directory_entries_.find(tag); + if (iter == directory_entries_.end()) { + return NULL; + } + return &iter->second; +} + +size_t SizeOfType(const TiffDirectory::Type type, bool* success) { + switch (type) { + case TIFF_TYPE_BYTE: + case TIFF_TYPE_ASCII: + case TIFF_TYPE_SBYTE: + case TIFF_TYPE_UNDEFINED: + return 1; + case TIFF_TYPE_SHORT: + case TIFF_TYPE_SSHORT: + return 2; + case TIFF_TYPE_LONG: + case TIFF_TYPE_SLONG: + case TIFF_TYPE_FLOAT: + case TIFF_IFD: + return 4; + case TIFF_TYPE_RATIONAL: + case TIFF_TYPE_SRATIONAL: + case TIFF_TYPE_DOUBLE: + return 8; + } + + if (success != NULL) { + *success = false; + } + return 0; +} + +} // namespace tiff_directory +} // namespace piex diff --git a/src/tiff_directory/tiff_directory.h b/src/tiff_directory/tiff_directory.h new file mode 100644 index 0000000..855adfc --- /dev/null +++ b/src/tiff_directory/tiff_directory.h @@ -0,0 +1,161 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// +// +// TiffDirectory contains an abstraction of an image file directory (IFD) as +// proposed by the TIFF specification. + +#ifndef PIEX_TIFF_DIRECTORY_TIFF_DIRECTORY_H_ +#define PIEX_TIFF_DIRECTORY_TIFF_DIRECTORY_H_ + +#include <cstdint> +#include <map> +#include <string> +#include <vector> + +namespace piex { +namespace tiff_directory { + +enum Endian { + kLittleEndian = 0, + kBigEndian = 1, +}; + +struct Rational { + std::uint32_t numerator; + std::uint32_t denominator; +}; + +struct SRational { + std::int32_t numerator; + std::int32_t denominator; +}; + +enum TiffTypes { + TIFF_TYPE_NONE = 0, + TIFF_TYPE_BYTE, /* 8bit unsigned */ + TIFF_TYPE_ASCII, /* Ascii string (terminated by \0) */ + TIFF_TYPE_SHORT, /* 16bit unsigned */ + TIFF_TYPE_LONG, /* 32bit unsigned */ + TIFF_TYPE_RATIONAL, /* 32bit/32bit unsigned */ + TIFF_TYPE_SBYTE, /* 8bit signed */ + TIFF_TYPE_UNDEFINED, /* undefined (depend of tag) */ + TIFF_TYPE_SSHORT, /* 16bit signed*/ + TIFF_TYPE_SLONG, /* 32bit signed */ + TIFF_TYPE_SRATIONAL, /* 32bit/32bit signed */ + TIFF_TYPE_FLOAT, /* 32-bit IEEE float */ + TIFF_TYPE_DOUBLE, /* 64-bit IEEE float */ + TIFF_IFD, /* IFD type */ +}; + +// The TiffDirectory class stores all information necessary to interpret TIFF +// tags and manages also potential sub directories. +class TiffDirectory { + public: + typedef std::uint32_t Tag; + typedef std::uint32_t Type; + + explicit TiffDirectory(Endian endianness); + + // Returns true if the directory contains the specified tag. + bool Has(const Tag tag) const; + + // Gets the value of a tag of byte vector type. + // Returns false if the tag is not part of the directory or if the + // type is not BYTE or UNDEFINED. + bool Get(const Tag tag, std::vector<std::uint8_t>* value) const; + + // Gets the value of a tag of type "ASCII". + // Returns false if the tag is not part of the directory or if its + // type is not ASCII. + // If *err is not equal to ERR_OK initially, this method does nothing. + bool Get(const Tag tag, std::string* value) const; + + // Gets the value of a tag of type "SHORT" or "LONG". + // Returns false + // - if the tag is not part of the directory or + // - if the type is not SHORT or LONG, or + // - if, for the non-vector version, the number of elements is unequal to 1. + bool Get(const Tag tag, std::uint32_t* value) const; + bool Get(const Tag tag, std::vector<std::uint32_t>* value) const; + + // Gets the value of a tag of type "SHORT", "LONG" or "RATIONAL". + // Returns false + // - if the tag is not part of the directory or + // - if the type is not SHORT, LONG or RATIONAL, or + // - if, for the non-vector version, the number of elements is unequal to 1. + bool Get(const Tag tag, Rational* value) const; + bool Get(const Tag tag, std::vector<Rational>* value) const; + + // Gets the value of a tag of type "SSHORT", "SLONG" or "SRATIONAL". + // Returns false + // - if the tag is not part of the directory or + // - if the type is not SSHORT, SLONG or SRATIONAL, or + // - if, for the non-vector version, the number of elements is unequal to 1. + bool Get(const Tag tag, SRational* value) const; + bool Get(const Tag tag, std::vector<SRational>* value) const; + + // Gets the 'offset' to the value data in the file and its 'length' in bytes. + // Returns false if the 'tag' is not part of the directory or if its type does + // not match the desired 'type'. + bool GetOffsetAndLength(const Tag tag, const Type type, std::uint32_t* offset, + std::uint32_t* length) const; + + // Adds a tag to the directory, setting its type, number of elements + // ('count'), the offset to the binary data in the file ('offset') and the + // associated binary data ('value'). The binary data is encoded according to + // the TIFF specification with the endianness that was specified when this + // object was constructed. The caller must ensure that the size of 'value' and + // the data it contains are consistent with 'type' and 'count'. It is not + // legal to call this method with a tag that is already contained in the + // directory. + void AddEntry(const Tag tag, const Type type, const std::uint32_t count, + const std::uint32_t offset, + const std::vector<std::uint8_t>& value); + + // Add a subdirectory to the directory. + void AddSubDirectory(const TiffDirectory& sub_directory); + + // Returns a vector of all subdirectories contained in this directory. + const std::vector<TiffDirectory>& GetSubDirectories() const; + + private: + struct DirectoryEntry { + Type type; + std::uint32_t count; // The number of values of type, not a byte count. + std::uint32_t offset; // Offset of the entry's data in the file. '0' means + // the offset is not set. + std::vector<std::uint8_t> value; + }; + + const DirectoryEntry* Find(const Tag tag) const; + + std::map<Tag, DirectoryEntry> directory_entries_; + std::vector<Tag> tag_order_; + std::vector<TiffDirectory> sub_directories_; + Endian endian_; +}; + +// Returns the number of bytes a single value of 'type' requires; this is +// guaranteed to be in the range of 0 to 8. +// Returns 0 if 'type' is TIFF_TYPE_NONE or invalid. Sets 'success' to false if +// 'type' is invalid. If you are not interested in 'success' you can set it to +// a nullptr. +size_t SizeOfType(const TiffDirectory::Type type, bool* success); + +} // namespace tiff_directory +} // namespace piex + +#endif // PIEX_TIFF_DIRECTORY_TIFF_DIRECTORY_H_ diff --git a/src/tiff_parser.cc b/src/tiff_parser.cc new file mode 100644 index 0000000..f46762a --- /dev/null +++ b/src/tiff_parser.cc @@ -0,0 +1,570 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "src/tiff_parser.h" + +#include "src/tiff_directory/tiff_directory.h" + +namespace piex { +namespace { + +using tiff_directory::Endian; +using tiff_directory::Rational; +using tiff_directory::SRational; +using tiff_directory::SizeOfType; +using tiff_directory::TIFF_TYPE_LONG; +using tiff_directory::TIFF_TYPE_UNDEFINED; +using tiff_directory::TiffDirectory; +using tiff_directory::kBigEndian; +using tiff_directory::kLittleEndian; + +// Specifies all tags that might be of interest to parse JPEG data. +const std::uint32_t kStartOfFrame = 0xFFC0; +const std::uint32_t kStartOfImage = 0xFFD8; +const std::uint32_t kStartOfScan = 0xFFDA; + +// Reads the width and height of the full resolution image. The tag groups are +// exclusive. +bool GetFullDimension(const TiffDirectory& tiff_directory, std::uint32_t* width, + std::uint32_t* height) { + if (tiff_directory.Has(kExifTagWidth) && tiff_directory.Has(kExifTagHeight)) { + if (!tiff_directory.Get(kExifTagWidth, width) || + !tiff_directory.Get(kExifTagHeight, height)) { + return false; + } + } else if (tiff_directory.Has(kTiffTagImageWidth) && + tiff_directory.Has(kTiffTagImageLength)) { + if (!tiff_directory.Get(kTiffTagImageWidth, width) || + !tiff_directory.Get(kTiffTagImageLength, height)) { + return false; + } + } else if (tiff_directory.Has(kPanaTagTopBorder) && + tiff_directory.Has(kPanaTagLeftBorder) && + tiff_directory.Has(kPanaTagBottomBorder) && + tiff_directory.Has(kPanaTagRightBorder)) { + std::uint32_t left; + std::uint32_t right; + std::uint32_t top; + std::uint32_t bottom; + if (tiff_directory.Get(kPanaTagLeftBorder, &left) && + tiff_directory.Get(kPanaTagRightBorder, &right) && + tiff_directory.Get(kPanaTagTopBorder, &top) && + tiff_directory.Get(kPanaTagBottomBorder, &bottom) && bottom > top && + right > left) { + *height = bottom - top; + *width = right - left; + } else { + return false; + } + } else if (tiff_directory.Has(kExifTagDefaultCropSize)) { + std::vector<std::uint32_t> crop(2); + std::vector<Rational> crop_rational(2); + if (tiff_directory.Get(kExifTagDefaultCropSize, &crop)) { + *width = crop[0]; + *height = crop[1]; + } else if (tiff_directory.Get(kExifTagDefaultCropSize, &crop_rational) && + crop_rational[0].denominator != 0 && + crop_rational[1].denominator != 0) { + *width = crop_rational[0].numerator / crop_rational[0].denominator; + *height = crop_rational[1].numerator / crop_rational[1].denominator; + } else { + return false; + } + } + return true; +} + +bool GetRational(const Tags& tag, const TiffDirectory& directory, + const int data_size, PreviewImageData::Rational* data) { + std::vector<Rational> value; + if (directory.Get(tag, &value)) { + for (size_t i = 0; i < value.size(); ++i) { + data[i].numerator = value[i].numerator; + data[i].denominator = value[i].denominator; + } + return true; + } + return false; +} + +void FillGpsPreviewImageData(const TiffDirectory& gps_directory, + PreviewImageData* preview_image_data) { + if (gps_directory.Has(kGpsTagLatitudeRef) && + gps_directory.Has(kGpsTagLatitude) && + gps_directory.Has(kGpsTagLongitudeRef) && + gps_directory.Has(kGpsTagLongitude)) { + preview_image_data->gps.is_valid = false; + std::string value; + if (!gps_directory.Get(kGpsTagLatitudeRef, &value) || value.empty() || + (value[0] != 'N' && value[0] != 'S') || + !GetRational(kGpsTagLatitude, gps_directory, 3, + &preview_image_data->gps.latitude[0])) { + return; + } + preview_image_data->gps.latitude_ref = value[0]; + + if (!gps_directory.Get(kGpsTagLongitudeRef, &value) || value.empty() || + (value[0] != 'E' && value[0] != 'W') || + !GetRational(kGpsTagLongitude, gps_directory, 3, + &preview_image_data->gps.longitude[0])) { + return; + } + preview_image_data->gps.longitude_ref = value[0]; + + if (gps_directory.Has(kGpsTagAltitudeRef) && + gps_directory.Has(kGpsTagAltitude)) { + std::vector<std::uint8_t> bytes; + if (!gps_directory.Get(kGpsTagAltitudeRef, &bytes) || bytes.empty() || + !GetRational(kGpsTagAltitude, gps_directory, 1, + &preview_image_data->gps.altitude)) { + return; + } + preview_image_data->gps.altitude_ref = bytes[0] != 0; + } + preview_image_data->gps.is_valid = true; + } +} + +Error FillPreviewImageData(const TiffDirectory& tiff_directory, + PreviewImageData* preview_image_data, + bool* has_preview) { + bool success = true; + // Get jpeg_offset and jpeg_length + if (tiff_directory.Has(kTiffTagStripOffsets) && + tiff_directory.Has(kTiffTagStripByteCounts)) { + std::vector<std::uint32_t> strip_offsets; + std::vector<std::uint32_t> strip_byte_counts; + if (!tiff_directory.Get(kTiffTagStripOffsets, &strip_offsets) || + !tiff_directory.Get(kTiffTagStripByteCounts, &strip_byte_counts)) { + return kFail; + } + if (strip_offsets.size() != 1 || strip_byte_counts.size() != 1) { + return kUnsupported; + } + preview_image_data->jpeg_offset = strip_offsets[0]; + preview_image_data->jpeg_length = strip_byte_counts[0]; + *has_preview = true; + } else if (tiff_directory.Has(kTiffTagJpegOffset) && + tiff_directory.Has(kTiffTagJpegByteCount)) { + success &= tiff_directory.Get(kTiffTagJpegOffset, + &preview_image_data->jpeg_offset); + success &= tiff_directory.Get(kTiffTagJpegByteCount, + &preview_image_data->jpeg_length); + *has_preview = true; + } else if (tiff_directory.Has(kPanaTagJpegImage)) { + if (!tiff_directory.GetOffsetAndLength(kPanaTagJpegImage, + TIFF_TYPE_UNDEFINED, + &preview_image_data->jpeg_offset, + &preview_image_data->jpeg_length)) { + return kFail; + } + *has_preview = true; + } + + // Get exif_orientation + if (tiff_directory.Has(kTiffTagOrientation)) { + success &= tiff_directory.Get(kTiffTagOrientation, + &preview_image_data->exif_orientation); + } + + // Get color_space + if (tiff_directory.Has(kExifTagColorSpace)) { + std::uint32_t color_space; + success &= tiff_directory.Get(kExifTagColorSpace, &color_space); + if (color_space == 1) { + preview_image_data->color_space = PreviewImageData::kSrgb; + } else if (color_space == 65535) { + preview_image_data->color_space = PreviewImageData::kAdobeRgb; + } + } + + success &= GetFullDimension(tiff_directory, &preview_image_data->full_width, + &preview_image_data->full_height); + + if (tiff_directory.Has(kTiffTagMake)) { + success &= tiff_directory.Get(kTiffTagMake, &preview_image_data->maker); + } + + if (tiff_directory.Has(kTiffTagModel)) { + success &= tiff_directory.Get(kTiffTagModel, &preview_image_data->model); + } + + if (tiff_directory.Has(kExifTagDateTimeOriginal)) { + success &= tiff_directory.Get(kExifTagDateTimeOriginal, + &preview_image_data->date_time); + } + + if (tiff_directory.Has(kExifTagIsoSpeed)) { + success &= tiff_directory.Get(kExifTagIsoSpeed, &preview_image_data->iso); + } else if (tiff_directory.Has(kPanaTagIso)) { + success &= tiff_directory.Get(kPanaTagIso, &preview_image_data->iso); + } + + if (tiff_directory.Has(kExifTagExposureTime)) { + success &= GetRational(kExifTagExposureTime, tiff_directory, 1, + &preview_image_data->exposure_time); + } + + if (tiff_directory.Has(kExifTagFnumber)) { + success &= GetRational(kExifTagFnumber, tiff_directory, 1, + &preview_image_data->fnumber); + } + + if (tiff_directory.Has(kExifTagFocalLength)) { + success &= GetRational(kExifTagFocalLength, tiff_directory, 1, + &preview_image_data->focal_length); + } + + if (!success) { + return kFail; + } + + return kOk; +} + +const TiffDirectory* FindFirstTagInIfds(const Tags& tag, + const IfdVector& tiff_directory) { + for (std::uint32_t i = 0; i < tiff_directory.size(); ++i) { + if (tiff_directory[i].Has(tag)) { + return &tiff_directory[i]; + } + + // Recursively search sub directories. + const TiffDirectory* sub_directory = + FindFirstTagInIfds(tag, tiff_directory[i].GetSubDirectories()); + if (sub_directory != NULL) { + return sub_directory; + } + } + return NULL; +} + +// Gets the SubIfd content. +void ParseSubIfds(const std::uint32_t tiff_offset, const TagSet& desired_tags, + const std::uint32_t max_number_ifds, const Endian endian, + StreamInterface* stream, TiffDirectory* tiff_ifd, + Error* error) { + if (*error == kOk && tiff_ifd->Has(kTiffTagSubIfd)) { + std::uint32_t offset = 0; + std::uint32_t length = 0; + tiff_ifd->GetOffsetAndLength(kTiffTagSubIfd, TIFF_TYPE_LONG, &offset, + &length); + length /= 4; // length in bytes divided by 4 gives number of IFDs. + for (std::uint32_t j = 0; j < length && j < max_number_ifds; ++j) { + std::uint32_t sub_offset; + if (!Get32u(stream, offset + 4 * j, endian, &sub_offset)) { + *error = kFail; + return; + } + + std::uint32_t next_ifd_offset; + TiffDirectory sub_ifd(static_cast<Endian>(endian)); + *error = ParseDirectory(tiff_offset, sub_offset, endian, desired_tags, + stream, &sub_ifd, &next_ifd_offset); + if (*error != kOk) { + return; + } + + tiff_ifd->AddSubDirectory(sub_ifd); + } + } +} + +} // namespace + +bool Get16u(StreamInterface* stream, const std::uint32_t offset, + const Endian& endian, std::uint16_t* value) { + std::uint8_t data[2]; + if (stream->GetData(offset, 2, data) == kOk) { + if (endian == kBigEndian) { + *value = (data[0] * 0x100) | data[1]; + } else { + *value = (data[1] * 0x100) | data[0]; + } + return true; + } else { + return false; + } +} + +bool Get32u(StreamInterface* stream, const std::uint32_t offset, + const Endian& endian, std::uint32_t* value) { + std::uint8_t data[4]; + if (stream->GetData(offset, 4, data) == kOk) { + if (endian == kBigEndian) { + *value = (data[0] * 0x1000000) | (data[1] * 0x10000) | (data[2] * 0x100) | + data[3]; + } else { + *value = (data[3] * 0x1000000) | (data[2] * 0x10000) | (data[1] * 0x100) | + data[0]; + } + return true; + } else { + return false; + } +} + +std::vector<std::uint8_t> GetData(const size_t offset, const size_t length, + StreamInterface* stream, Error* error) { + // Read in chunks with a maximum size of 1 MiB. + const size_t kChunkSize = 1048576; + + std::vector<std::uint8_t> data; + size_t processed_data = 0; + while (*error == kOk && processed_data < length) { + size_t chunk_length = kChunkSize; + if (length - data.size() < kChunkSize) { + chunk_length = length - data.size(); + } + + data.resize(processed_data + chunk_length); + *error = stream->GetData(offset + processed_data, chunk_length, + &data[processed_data]); + + processed_data += chunk_length; + } + return data; +} + +bool GetEndianness(const std::uint32_t tiff_offset, StreamInterface* stream, + Endian* endian) { + const std::uint8_t kTiffBigEndianMagic[] = {'M', 'M'}; + const std::uint8_t kTiffLittleEndianMagic[] = {'I', 'I'}; + std::uint8_t tiff_endian[sizeof(kTiffBigEndianMagic)]; + if (stream->GetData(tiff_offset, sizeof(tiff_endian), &tiff_endian[0]) != + kOk) { + return false; + } + + if (!memcmp(tiff_endian, kTiffLittleEndianMagic, sizeof(tiff_endian))) { + *endian = kLittleEndian; + return true; + } else if (!memcmp(tiff_endian, kTiffBigEndianMagic, sizeof(tiff_endian))) { + *endian = kBigEndian; + return true; + } else { + return false; + } +} + +bool GetPreviewDimensions(const std::uint32_t jpeg_offset, + StreamInterface* stream, std::uint16_t* width, + std::uint16_t* height) { + const Endian endian = kBigEndian; + std::uint32_t offset = jpeg_offset; + std::uint16_t segment; + + // Parse the JPEG header until we find Frame0 which contains the image width + // and height or the actual image data starts (StartOfScan) + do { + if (!Get16u(stream, offset, endian, &segment)) { + return false; + } + offset += 2; + + switch (segment) { + case kStartOfImage: + break; + case kStartOfFrame: + return Get16u(stream, offset + 3, endian, height) && + Get16u(stream, offset + 5, endian, width); + default: { + std::uint16_t length; + if (!Get16u(stream, offset, endian, &length)) { + return false; + } + offset += length; + } + } + } while (segment != kStartOfScan); + + // No width and hight information found. + return false; +} + +Error ParseDirectory(const std::uint32_t tiff_offset, + const std::uint32_t ifd_offset, const Endian endian, + const TagSet& desired_tags, StreamInterface* stream, + TiffDirectory* tiff_directory, + std::uint32_t* next_ifd_offset) { + std::uint16_t number_of_entries; + if (!Get16u(stream, ifd_offset, endian, &number_of_entries)) { + return kFail; + } + + for (std::uint32_t i = 0; + i < static_cast<std::uint32_t>(number_of_entries) * 12; i += 12) { + std::uint16_t tag; + std::uint16_t type; + std::uint32_t number_of_elements; + if (Get16u(stream, ifd_offset + 2 + i, endian, &tag) && + Get16u(stream, ifd_offset + 4 + i, endian, &type) && + Get32u(stream, ifd_offset + 6 + i, endian, &number_of_elements)) { + // Check if the current tag should be handled. + if (desired_tags.count(static_cast<Tags>(tag)) != 1) { + continue; + } + } else { + return kFail; + } + + const size_t type_size = SizeOfType(type, nullptr /* no error */); + + // Check that type_size * number_of_elements does not exceed UINT32_MAX. + if (type_size != 0 && number_of_elements > UINT32_MAX / type_size) { + return kFail; + } + const size_t byte_count = + type_size * static_cast<size_t>(number_of_elements); + + std::uint32_t value_offset; + if (byte_count > 4 && + Get32u(stream, ifd_offset + 10 + i, endian, &value_offset)) { + value_offset += tiff_offset; + } else if (byte_count != 0) { + value_offset = ifd_offset + 10 + i; + } else { + // Ignore entries with an invalid byte count. + continue; + } + + Error error = kOk; + const std::vector<std::uint8_t> data = + GetData(value_offset, byte_count, stream, &error); + if (error != kOk) { + return error; + } + tiff_directory->AddEntry(tag, type, number_of_elements, value_offset, data); + } + + if (Get32u(stream, ifd_offset + 2 + number_of_entries * 12, endian, + next_ifd_offset)) { + return kOk; + } else { + return kFail; + } +} + +TiffParser::TiffParser(StreamInterface* stream) : stream_(stream) {} + +TiffParser::TiffParser(StreamInterface* stream, const std::uint32_t offset) + : stream_(stream), tiff_offset_(offset) {} + +Error TiffParser::GetPreviewImageData(const TiffContent& tiff_content, + PreviewImageData* preview_image_data) { + bool has_preview = false; + Error error = kOk; + for (const auto& tiff_directory : tiff_content.tiff_directory) { + error = + FillPreviewImageData(tiff_directory, preview_image_data, &has_preview); + if (error == kOk && tiff_directory.Has(kTiffTagExifIfd) && + tiff_content.exif_directory) { + error = FillPreviewImageData(*tiff_content.exif_directory, + preview_image_data, &has_preview); + } + if (error == kOk && tiff_directory.Has(kExifTagGps) && + tiff_content.gps_directory) { + FillGpsPreviewImageData(*tiff_content.gps_directory, preview_image_data); + } + for (const auto& sub_directory : tiff_directory.GetSubDirectories()) { + if (error == kOk) { + error = FillPreviewImageData(sub_directory, preview_image_data, + &has_preview); + } + } + } + + if (error == kOk && !has_preview) { + return kUnsupported; + } + return error; +} + +Error TiffParser::Parse(const TagSet& desired_tags, + const std::uint16_t max_number_ifds, + TiffContent* tiff_content) { + if (!tiff_content->tiff_directory.empty()) { + return kFail; // You shall call Parse() only once. + } + + const std::uint32_t kTiffIdentifierSize = 4; + std::uint32_t offset_to_ifd = 0; + if (!GetEndianness(tiff_offset_, stream_, &endian_) || + !Get32u(stream_, tiff_offset_ + kTiffIdentifierSize, endian_, + &offset_to_ifd)) { + return kFail; + } + + Error error = ParseIfd(tiff_offset_ + offset_to_ifd, desired_tags, + max_number_ifds, &tiff_content->tiff_directory); + if (error != kOk) { + return error; + } + + // Get the Exif data. + const TiffDirectory* tiff_ifd = + FindFirstTagInIfds(kTiffTagExifIfd, tiff_content->tiff_directory); + if (tiff_ifd != NULL) { + std::uint32_t offset; + if (tiff_ifd->Get(kTiffTagExifIfd, &offset)) { + tiff_content->exif_directory.reset(new TiffDirectory(endian_)); + std::uint32_t next_ifd_offset; + error = ParseDirectory( + tiff_offset_, tiff_offset_ + offset, endian_, desired_tags, stream_, + tiff_content->exif_directory.get(), &next_ifd_offset); + if (error != kOk) { + return error; + } + + if (tiff_ifd->Get(kExifTagGps, &offset)) { + tiff_content->gps_directory.reset(new TiffDirectory(endian_)); + const TagSet gps_tags = {kGpsTagLatitudeRef, kGpsTagLatitude, + kGpsTagLongitudeRef, kGpsTagLongitude, + kGpsTagAltitudeRef, kGpsTagAltitude}; + return ParseDirectory( + tiff_offset_, tiff_offset_ + offset, endian_, gps_tags, stream_, + tiff_content->gps_directory.get(), &next_ifd_offset); + } + } + } + + return error; +} + +Error TiffParser::ParseIfd(const std::uint32_t offset_to_ifd, + const TagSet& desired_tags, + const std::uint16_t max_number_ifds, + IfdVector* tiff_directory) { + std::uint32_t next_ifd_offset; + TiffDirectory tiff_ifd(static_cast<Endian>(endian_)); + Error error = + ParseDirectory(tiff_offset_, offset_to_ifd, endian_, desired_tags, + stream_, &tiff_ifd, &next_ifd_offset); + + ParseSubIfds(tiff_offset_, desired_tags, max_number_ifds, endian_, stream_, + &tiff_ifd, &error); + if (error == kOk) { + tiff_directory->push_back(tiff_ifd); + if (next_ifd_offset != 0 && tiff_directory->size() < max_number_ifds) { + error = ParseIfd(tiff_offset_ + next_ifd_offset, desired_tags, + max_number_ifds, tiff_directory); + } + } + + return error; +} + +} // namespace piex diff --git a/src/tiff_parser.h b/src/tiff_parser.h new file mode 100644 index 0000000..64dd99c --- /dev/null +++ b/src/tiff_parser.h @@ -0,0 +1,170 @@ +// Copyright 2015 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef PIEX_TIFF_PARSER_H_ +#define PIEX_TIFF_PARSER_H_ + +#include <cstdint> +#include <memory> +#include <set> +#include <vector> + +#include "src/piex_types.h" +#include "src/tiff_directory/tiff_directory.h" + +namespace piex { + +// Specifies all tags that might be of interest to get the preview data. +enum Tags { + kExifTagColorSpace = 0xA001, + kExifTagDateTimeOriginal = 0x9003, + kExifTagDefaultCropSize = 0xC620, + kExifTagExposureTime = 0x829a, + kExifTagFnumber = 0x829d, + kExifTagFocalLength = 0x920A, + kExifTagGps = 0x8825, + kExifTagHeight = 0xA003, + kExifTagIsoSpeed = 0x8827, + kExifTagMakernotes = 0x927C, + kExifTagWidth = 0xA002, + kGpsTagLatitudeRef = 1, + kGpsTagLatitude = 2, + kGpsTagLongitudeRef = 3, + kGpsTagLongitude = 4, + kGpsTagAltitudeRef = 5, + kGpsTagAltitude = 6, + kOlymTagAspectFrame = 0x1113, + kOlymTagCameraSettings = 0x2020, + kOlymTagRawProcessing = 0x2040, + kPanaTagBottomBorder = 0x006, + kPanaTagIso = 0x0017, + kPanaTagJpegImage = 0x002E, + kPanaTagLeftBorder = 0x0005, + kPanaTagRightBorder = 0x007, + kPanaTagTopBorder = 0x0004, + kTiffTagArtist = 0x013B, + kTiffTagBitsPerSample = 0x0102, + kTiffTagCompression = 0x0103, + kTiffTagDateTime = 0x0132, + kTiffTagExifIfd = 0x8769, + kTiffTagImageDescription = 0x010E, + kTiffTagImageLength = 0x0101, + kTiffTagImageWidth = 0x0100, + kTiffTagJpegByteCount = 0x0202, + kTiffTagJpegOffset = 0x0201, + kTiffTagMake = 0x010F, + kTiffTagModel = 0x0110, + kTiffTagOrientation = 0x0112, + kTiffTagPhotometric = 0x0106, + kTiffTagPlanarConfig = 0x011C, + kTiffTagResolutionUnit = 0x0128, + kTiffTagRowsPerStrip = 0x0116, + kTiffTagSamplesPerPixel = 0x0115, + kTiffTagSoftware = 0x0131, + kTiffTagStripByteCounts = 0x0117, + kTiffTagStripOffsets = 0x0111, + kTiffTagSubIfd = 0x014A, + kTiffTagTileByteCounts = 0x0145, + kTiffTagTileLength = 0x0143, + kTiffTagTileOffsets = 0x0144, + kTiffTagTileWidth = 0x0142, + kTiffTagXresolution = 0x011A, + kTiffTagYresolution = 0x011B, +}; + +typedef std::set<Tags> TagSet; +typedef std::vector<tiff_directory::TiffDirectory> IfdVector; + +struct TiffContent { + IfdVector tiff_directory; + std::unique_ptr<tiff_directory::TiffDirectory> exif_directory; + std::unique_ptr<tiff_directory::TiffDirectory> gps_directory; +}; + +// Reads 2 bytes, an unsigned 16bit from 'stream' at a certain 'offset'. The +// bytes get swapped according to the desired endianness returning true on +// success. Returns false when something is wrong. +bool Get16u(StreamInterface* stream, const std::uint32_t offset, + const tiff_directory::Endian& endian, std::uint16_t* value); + +// Reads 4 bytes, an unsigned 32bit 'value' from 'stream' at a certain 'offset'. +// The bytes get swapped according to the desired endianness returning true on +// success. Returns false when something is wrong. +bool Get32u(StreamInterface* stream, const std::uint32_t offset, + const tiff_directory::Endian& endian, std::uint32_t* value); + +// Retrieves a byte vector of size 'length' from 'stream' beginning at some +// 'offset' reading the data in chunks of one MiB. +// If 'error' is not set to kOk the returned value is invalid. +std::vector<std::uint8_t> GetData(const size_t offset, const size_t length, + StreamInterface* stream, Error* error); + +// Retrieves the endianness of TIFF compliant data at 'tiff_offset' from +// 'stream' returning true on success. Returns false if when something is wrong. +bool GetEndianness(const std::uint32_t tiff_offset, StreamInterface* stream, + tiff_directory::Endian* endian); + +// Retrieves the width and height from the jpeg preview returning true on +// success. Returns false when something is wrong. +bool GetPreviewDimensions(const std::uint32_t jpeg_offset, + StreamInterface* stream, std::uint16_t* width, + std::uint16_t* height); + +// Parses through a Tiff IFD and writes all 'desired_tags' to a +// 'tiff_directory'. +// Sets 'error' to kFail if something with the Tiff data is wrong. +Error ParseDirectory(const std::uint32_t tiff_offset, + const std::uint32_t ifd_offset, + const tiff_directory::Endian endian, + const TagSet& desired_tags, StreamInterface* stream, + tiff_directory::TiffDirectory* tiff_directory, + std::uint32_t* next_ifd_offset); + +// Enables us to parse through data that complies to the Tiff/EP specification. +class TiffParser { + public: + // The caller owns 'stream' and is responsible to keep it alive while the + // TiffParser object is used. + explicit TiffParser(StreamInterface* stream); + TiffParser(StreamInterface* stream, const std::uint32_t offset); + + // Runs over the Tiff IFD, Exif IFD and subIFDs to get the preview image data. + // Returns kFail if something with the Tiff tags is wrong. + Error GetPreviewImageData(const TiffContent& tiff_content, + PreviewImageData* image_metadata); + + // Returns kFail if called more that once or something with the Tiff data is + // wrong. + Error Parse(const TagSet& desired_tags, const std::uint16_t max_number_ifds, + TiffContent* tiff_content); + + private: + // Disallow copy and assignment. + TiffParser(const TiffParser&) = delete; + TiffParser& operator=(const TiffParser&) = delete; + + Error ParseIfd(const std::uint32_t ifd_offset, const TagSet& desired_tags, + const std::uint16_t max_number_ifds, + IfdVector* tiff_directory); + + StreamInterface* stream_ = nullptr; + std::uint32_t tiff_offset_ = 0; + tiff_directory::Endian endian_; +}; + +} // namespace piex + +#endif // PIEX_TIFF_PARSER_H_ |