diff options
author | Eino-Ville Talvala <etalvala@google.com> | 2018-11-15 16:07:46 -0800 |
---|---|---|
committer | Eino-Ville Talvala <etalvala@google.com> | 2018-11-15 16:07:46 -0800 |
commit | 2d6d3250dcb304c8ad081dedc8eef6ea48fd669d (patch) | |
tree | 68cc8d5a9bf5a558f46025d740c47cb292eea9f0 | |
parent | 840fc3b66a9e6593d542ada6fe14d91107fab98d (diff) | |
download | image_io-2d6d3250dcb304c8ad081dedc8eef6ea48fd669d.tar.gz |
Initial commit of libimage_io
Image_io is a library for manipulating image files, especially XMP
metadata within them.
Test: m libimage_io
Bug: 109735087
Bug: 119211681
Change-Id: I657f307be0459fe40154806c7cd388b97bcb0ea5
82 files changed, 7442 insertions, 0 deletions
diff --git a/Android.bp b/Android.bp new file mode 100644 index 0000000..37b4d9f --- /dev/null +++ b/Android.bp @@ -0,0 +1,36 @@ +cc_defaults { + name: "libimage_io-defaults", + cflags: [ + "-DUNIX_ENV=1", + "-Werror", + "-Wno-reorder", + "-Wno-unused-parameter", + ], + rtti: true, + cppflags: ["-fno-exceptions"], + clang: true, + sanitize: { + misc_undefined: [ + "unsigned-integer-overflow", + "signed-integer-overflow", + ], + }, +} + +cc_library_headers { + name: "libimage_io-headers", + export_include_dirs: ["includes"], +} + + +cc_library { + name: "libimage_io", + defaults: ["libimage_io-defaults"], + vendor_available: false, + header_libs: ["libimage_io-headers"], + export_include_dirs: ["includes"], + srcs: ["src/**/*.cc"], + static_libs: [ + "libmodpb64", + ], +} @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/METADATA b/METADATA new file mode 100644 index 0000000..e887ec6 --- /dev/null +++ b/METADATA @@ -0,0 +1,17 @@ +name: "image_io" +description: + "Image_io is a library for manipulating image files, especially XMP metadata" + +third_party { + url { + type: PIPER + value: "http://google3/photos/editing/formats/image_io" + } + version: "221162778" + last_upgrade_date { + year: 2018 + month: 11 + day: 12 + } + license_type: NOTICE +} diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2 new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/MODULE_LICENSE_APACHE2 @@ -0,0 +1 @@ +LICENSE
\ No newline at end of file @@ -0,0 +1,2 @@ +include platform/frameworks/av/camera:/OWNERS + diff --git a/includes/image_io/base/byte_buffer.h b/includes/image_io/base/byte_buffer.h new file mode 100644 index 0000000..77a55bc --- /dev/null +++ b/includes/image_io/base/byte_buffer.h @@ -0,0 +1,57 @@ +#ifndef IMAGE_IO_BASE_BYTE_BUFFER_H_ // NOLINT +#define IMAGE_IO_BASE_BYTE_BUFFER_H_ // NOLINT + +#include <memory> +#include <vector> + +#include "image_io/base/byte_data.h" + +namespace photos_editing_formats { +namespace image_io { + +/// This class provides a means to allocate and fill a Byte buffer with the +/// data specified in a vector of ByteData objects, and then to release that +/// buffer to be used in a DataSegment. This is used for testing purposes +/// initially, but has applicability for use in the image_io itself. +class ByteBuffer { + public: + /// Constructs a ByteBuffer using a previously allocated buffer. + /// @param size The size of the buffer. + /// @param buffer The previously allocated buffer + ByteBuffer(size_t size, std::unique_ptr<Byte[]> buffer); + + /// Constructs a ByteBuffer using the vector of byte data. + /// @param byte_data_vector The data to used to define the length and value of + /// the buffer. If any ByteData in the vector is of kHex type, and it + /// contains invalid hex digits, the size value will be set to 0, + /// resulting in a ByteBuffer the IsValid() function of which will return + /// false. + explicit ByteBuffer(const std::vector<ByteData>& byte_data_vector); + + /// @return Whether the byte buffer is valid. + bool IsValid() const { return size_ > 0; } + + /// @return The size of the byte buffer. + size_t GetSize() const { return size_; } + + /// @param location The location in the byte buffer to set. + /// @param value The two-byte value. + /// @return Whether the value was set successfully. + bool SetBigEndianValue(size_t location, std::uint16_t value); + + /// Releases the buffer to the caller and sets this ByteBuffer object to an + /// invalid state. That is, after this call IsValid() will return false, and + /// GetSize() will return 0. + /// @return The buffer pointer or nullptr if the ByteBuffer was invalid. The + /// caller is responsible for deleting the buffer when done. + Byte* Release(); + + private: + std::unique_ptr<Byte[]> buffer_; + size_t size_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_BYTE_BUFFER_H_ // NOLINT diff --git a/includes/image_io/base/byte_data.h b/includes/image_io/base/byte_data.h new file mode 100644 index 0000000..7bfc97e --- /dev/null +++ b/includes/image_io/base/byte_data.h @@ -0,0 +1,137 @@ +#ifndef IMAGE_IO_BASE_BYTE_DATA_H_ // NOLINT +#define IMAGE_IO_BASE_BYTE_DATA_H_ // NOLINT + +#include <cctype> +#include <string> + +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A string representation of byte data destined to be added to a ByteBuffer, +/// and thence defining a portion of a DataSegment. +class ByteData { + public: + /// The type of data represented in the string value. + enum Type { + /// The string value contains hex digits. + kHex, + + /// The string value contains ascii text. When adding the string to + /// a ByteBuffer, do not add the terminating null character. + kAscii, + + /// The string value contains ascii text. When adding the string to + /// a ByteBuffer, add the terminating null character as well. + kAscii0 + }; + + /// @param type The type of byte data + /// @param value The string value of the byte data. + ByteData(Type type, const std::string& value) : type_(type), value_(value) {} + + /// @return The type of byte data. + Type GetType() const { return type_; } + + /// @return The string value of the byte data. + const std::string& GetValue() const { return value_; } + + /// @return Whether the byte data string value has a valid length and is made + /// up of a valid set of characters. + bool IsValid() const { return IsValidLength() && HasValidCharacters(); } + + /// @return Whether the byte data string value has a valid length. The kAscii + /// and kAscii0 type values have no restrictions, but the kHex type values + /// must have an even number of characters (zero length is ok). + bool IsValidLength() const { + return type_ != kHex || ((value_.length() % 2) == 0u); + } + + /// @return Whether the byte data string value is made up of valid characters. + /// The kAscii and kAscii0 type values have no restrictions, but the kHex + /// type values can only have these characters: [0-9][a-f][A-F] + bool HasValidCharacters() const { + if (type_ != kHex) { + return true; + } + for (const auto& chr : value_) { + if (!isxdigit(chr)) { + return false; + } + } + return true; + } + + /// @return The number of bytes this data requires when converted to Bytes, + /// or 0 if the byte data is invalid. + size_t GetByteCount() const { + if (!IsValid()) { + return 0; + } else if (type_ == kHex) { + return value_.length() / 2; + } else if (type_ == kAscii) { + return value_.length(); + } else { + return value_.length() + 1; + } + } + + /// @param hex_digit The hex character to convert to its decimal equivalent. + /// @return The decimal equivalent of the hex_digit, or -1 if the character is + /// not a valid hex digit. + static int Hex2Decimal(char hex_digit) { + if (hex_digit >= '0' && hex_digit <= '9') { + return static_cast<int>(hex_digit - '0'); + } else if (hex_digit >= 'a' && hex_digit <= 'f') { + return static_cast<int>(hex_digit - 'a' + 10); + } else if (hex_digit >= 'A' && hex_digit <= 'F') { + return static_cast<int>(hex_digit - 'A' + 10); + } else { + return -1; + } + } + + /// @param hi_char The hi-order nibble of the byte. + /// @param hi_char The lo-order nibble of the byte. + /// @param value The pointer to the Byte to receive the value. + /// @return Whether the conversion was successful. + static bool Hex2Byte(char hi_char, char lo_char, Byte* value) { + int hi = Hex2Decimal(hi_char); + int lo = Hex2Decimal(lo_char); + if (hi < 0 || lo < 0 || value == nullptr) { + return false; + } + *value = ((hi << 4) | lo); + return true; + } + + /// @param value The byte value to convert to a two digit hex string. + /// @return The hex string equivalent of the value. + static std::string Byte2Hex(Byte value) { + const char kHexChars[] = "0123456789ABCDEF"; + std::string str(2, ' '); + str[0] = kHexChars[(value >> 4) & 0xF]; + str[1] = kHexChars[value & 0xF]; + return str; + } + + /// @param value The size_t value to convert to an eight digit hex string. + /// @return The big endian hex string equivalent of the value. + static std::string Size2BigEndianHex(size_t value) { + std::string hex_string = Byte2Hex((value >> 24) & 0xFF); + hex_string += Byte2Hex((value >> 16) & 0xFF); + hex_string += Byte2Hex((value >> 8) & 0xFF); + hex_string += Byte2Hex(value & 0xFF); + return hex_string; + } + + private: + Type type_; + std::string value_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_BYTE_DATA_H_ // NOLINT diff --git a/includes/image_io/base/cout_message_writer.h b/includes/image_io/base/cout_message_writer.h new file mode 100644 index 0000000..a124ff6 --- /dev/null +++ b/includes/image_io/base/cout_message_writer.h @@ -0,0 +1,22 @@ +#ifndef IMAGE_IO_BASE_COUT_MESSAGE_WRITER_H_ // NOLINT +#define IMAGE_IO_BASE_COUT_MESSAGE_WRITER_H_ // NOLINT + +#include <iostream> + +#include "image_io/base/message_writer.h" + +namespace photos_editing_formats { +namespace image_io { + +/// This subclass of MessageWriter writes messages to cout. +class CoutMessageWriter : public MessageWriter { + public: + void WriteMessage(const Message& message) override { + std::cout << GetFormattedMessage(message) << std::endl; + } +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_COUT_MESSAGE_WRITER_H_ // NOLINT diff --git a/includes/image_io/base/data_context.h b/includes/image_io/base/data_context.h new file mode 100644 index 0000000..bef5c98 --- /dev/null +++ b/includes/image_io/base/data_context.h @@ -0,0 +1,143 @@ +#ifndef IMAGE_IO_BASE_DATA_CONTEXT_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_CONTEXT_H_ // NOLINT + +#include <list> +#include <string> + +#include "image_io/base/data_line_map.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class to represent a position in a textual subrange of a DataSegment, and +/// a means to create an usable error message that shows the relevant line +/// number and line text and the location as a "caret" position. The class also +/// provides a list of names that can be used to add context to the errors. +class DataContext { + public: + /// @param location A location in the data segment. + /// @param range A subrange of the data segment's range. + /// @param data_line_map A map for obtaining the line number and range given + /// the location. + DataContext(size_t location, const DataRange& range, + const DataSegment& segment, const DataLineMap& data_line_map) + : location_(location), + range_(range), + segment_(segment), + line_info_map_(data_line_map) {} + + /// @return The location of the context. + size_t GetLocation() const { return location_; } + + /// @param location A new value to use to set the location of the context. + void SetLocation(size_t location) { location_ = location; } + + /// @param delta A delta value that is added to the location of the context. + /// @return The new location of the context. + size_t IncrementLocation(size_t delta) { + location_ += delta; + return location_; + } + + /// @return The range of the data segment defined by this context. + const DataRange& GetRange() const { return range_; } + + /// @param range Sets a new range to use for this context. + void SetRange(const DataRange& range) { range_ = range; } + + /// @return The data segment of this context. + const DataSegment& GetSegment() const { return segment_; } + + /// @return The line info map of this context. + const DataLineMap& GetDataLineMap() const { return line_info_map_; } + + /// @return Whether the context's location and range are valid for use with + /// the data segment's range. + bool IsValidLocationAndRange() const { + return range_.IsValid() && range_.Contains(location_) && + segment_.GetDataRange().Contains(range_); + } + + /// @return A pointer to the data segment's buffer, cast as a const char* type + /// pointer, or nullptr if the location and/or range are invalid. + const char* GetCharBytes() const { + return IsValidLocationAndRange() + ? reinterpret_cast<const char*>(segment_.GetBuffer(location_)) + : nullptr; + } + + /// @return The number of bytes available from the location of the context to + /// the end of the context's range, or 0 if the location and/or range are + /// invalid. + size_t GetBytesAvailable() const { + return IsValidLocationAndRange() ? range_.GetEnd() - location_ : 0; + } + + /// @return The context's name list that is used when creating error messages. + std::list<std::string>& GetNameList() { return name_list_; } + + /// @return The context's name list that is used when creating error messages. + const std::list<std::string>& GetNameList() const { return name_list_; } + + /// @return An error message that describes the location/range data segment + /// range that leads to the IsValidLocationRange() function returning false. + /// Great to user for internal error messages. + std::string GetInvalidLocationAndRangeErrorText() const; + + /// @return An error message with the given descriptions for the error and the + /// expectation. See the other GetErrorText() function documentation for more + /// details on the format of the error messsage. + std::string GetErrorText(const std::string& error_description, + const std::string& expectation_description) const; + + /// @return An error message with the given descriptions for the error and the + /// expectation. The format of the error message is: + /// error_description + /// - prefix_name_list:name_list:postfix_name_list: + /// - at line:number:line_contents + /// - ^expected:expectation_description + /// If error_description is empty then the first line containing it is not + /// written. If expectation_description is empty, then the expected:... part + /// of the last line is not written. If the context's name list, and the + /// pre/postfix name lists are all empty, then that line is not written. + std::string GetErrorText(const std::list<std::string>& prefix_name_list, + const std::list<std::string>& postfix_name_list, + const std::string& error_description, + const std::string& expectation_description) const; + + private: + /// @return The string with the contents of the prefix_name_list, name_list_ + /// and the postfix namelist concatenated with a ":" separator. + std::string GetNamesString( + const std::list<std::string>& prefix_name_list, + const std::list<std::string>& postfix_name_list) const; + + /// @return The line number string of the form line:XX, where XX is the data + /// line's number or "?" if the nmber is zero. + std::string GetLineNumberString(const DataLine& data_line) const; + + /// Gets the clipped and line ranges using the data line's range value. + void GetClippedAndLineRange(const DataLine& data_line, + DataRange* clipped_range, + DataRange* line_range) const; + + /// Gets the line string using the clipped and line ranges and updates the + /// number of spaces before the caret depending on the contents of the line. + std::string GetLineString(const DataRange& clipped_range, + const DataRange& line_range, + size_t* spaces_before_caret) const; + + /// See the constructor for documentation on the data members. + size_t location_; + DataRange range_; + const DataSegment& segment_; + const DataLineMap& line_info_map_; + std::list<std::string> name_list_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_CONTEXT_H_ // NOLINT diff --git a/includes/image_io/base/data_destination.h b/includes/image_io/base/data_destination.h new file mode 100644 index 0000000..e3c7466 --- /dev/null +++ b/includes/image_io/base/data_destination.h @@ -0,0 +1,74 @@ +#ifndef IMAGE_IO_BASE_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_DESTINATION_H_ // NOLINT + +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// DataDestination is the abstract base class for implementations that can +/// efficiently move data from one location and/or form to another. In such +/// a transfer, the StartTransfer() and FinishTransfer() functions are always +/// called, and in between the Transfer() function may be called zero or more +/// times. See the DataSource class to see how to initiate a transfer operation. +class DataDestination { + public: + /// These values indicate what should be done after a DataSource calls a + /// DataDestination's Transfer() function. + enum TransferStatus { + /// An error occurred in the transfer process. DataSource's TransferData() + /// function should stop calling DataDestination's Transfer() function, and + /// return to its caller. + kTransferError, + + /// The transfer was successful. DataSource's TransferData() function can + /// keep calling DataDestination's Transfer() of needed, or if not, + /// return to its caller. + kTransferOk, + + /// The transfer was successful and the DataDestination has decided that + /// it has enough data. DataSource's TransferData() function should stop + /// calling DataDestination's Transfer() function and return to its caller. + kTransferDone + }; + + virtual ~DataDestination() = default; + + /// This function is called prior to the first call to the Transfer() function + /// to allow implementation subclasses a chance to initialize their data + /// members for the transfer process. If a data destination sends its bytes + /// to another data destination, this function must call its StartTransfer() + /// function. + virtual void StartTransfer() = 0; + + /// This function is called to transfer a portion or all of the data in the + /// data segment from the caller to wherever the receiver needs it to go. + /// @param transfer_range The portion of the data in the data_segment that is + /// to be transferred. + /// @param data_segment The data, some or all of which is to be transferred. + /// @return A transfer status value indicating what should be done next. + virtual TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) = 0; + + /// This function is called after the final call to the Transfer() function to + /// allow implementation subclasses a chance to finalize their transfer + /// operations. If a data destination sends its bytes to another data + /// destination, this function must call its FinishTransfer() function. + virtual void FinishTransfer() = 0; + + /// @return The number of bytes written to the data destination. There is some + /// flexibility in the actual value returned. Most "end-point" destination + /// subclasses return the actual number of bytes received/written. Other + /// "mid-point" destinations are allowed to return the value from the next + /// destination in the chain, or the actual number of bytes they are asked + /// to transfer via the transfer_range parameter of the Transfer() + /// function. + virtual size_t GetBytesTransferred() const = 0; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/base/data_line_map.h b/includes/image_io/base/data_line_map.h new file mode 100644 index 0000000..d934410 --- /dev/null +++ b/includes/image_io/base/data_line_map.h @@ -0,0 +1,55 @@ +#ifndef IMAGE_IO_BASE_DATA_LINE_MAP_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_LINE_MAP_H_ // NOLINT + +#include <vector> + +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The line number and range of a text line in a data source. The range does +/// not include the terminating new line. Valid line numbers are greater than 0. +struct DataLine { + DataLine() : number(0) {} + DataLine(size_t a_number, const DataRange& a_range) + : number(a_number), range(a_range) {} + size_t number; + DataRange range; +}; + +/// A class that maps a data source location to a data line structure that has +/// the line number and data range of the line. +class DataLineMap { + public: + DataLineMap() : last_line_incomplete_(false) {} + + /// Returns the number of data lines in the map. + size_t GetDataLineCount() const; + + /// Returns the data line assocated with the location, or one the number of + /// which is zero and the range of which is invalid. + DataLine GetDataLine(size_t location) const; + + /// Finds the next set of data line numbers and ranges in the segment and adds + /// them to the map. If the map is empty, the line numbers will start at 1; + /// otherwise the numbering of the new lines will start at the next line + /// number indicated in the map. + void FindDataLines(const DataRange& range, const DataSegment& segment); + + /// Clears the map and returns it to its startup state. + void Clear(); + + private: + /// The data lines in the map, sorted by ascending range.GetBegin() value. + std::vector<DataLine> data_lines_; + + /// Whether the last data line in the vector is complete (ended in a newline). + bool last_line_incomplete_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_ BASE_DATA_LINE_MAP_H_ // NOLINT diff --git a/includes/image_io/base/data_match_result.h b/includes/image_io/base/data_match_result.h new file mode 100644 index 0000000..3bde081 --- /dev/null +++ b/includes/image_io/base/data_match_result.h @@ -0,0 +1,127 @@ +#ifndef IMAGE_IO_BASE_DATA_MATCH_RESULT_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_MATCH_RESULT_H_ // NOLINT + +#include "image_io/base/message.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The result of a some sort of match operation of the text in a data segment. +/// The data associated with a match result include the number of bytes +/// consumed to produce the result, type of match, and in the case of an error +/// an optional Message describing the error. +class DataMatchResult { + public: + /// The type of match. + enum Type { + /// An error occurred while performing the match operation. + kError = -1, + + /// No match was found. + kNone = 0, + + /// A partial match of some sort was found. + kPartial = 1, + + /// A partial match was found, but the end of the data in the segment or + /// the available range was found. + kPartialOutOfData = 2, + + /// A full match was found. + kFull = 3, + }; + + DataMatchResult() : DataMatchResult(kNone, 0) {} + explicit DataMatchResult(Type type) : DataMatchResult(type, 0) {} + DataMatchResult(Type type, size_t bytes_consumed) + : message_(Message::kStatus, 0, ""), + bytes_consumed_(bytes_consumed), + type_(type), + has_message_(false), + can_continue_(true) {} + + /// @return The type of the match result. + Type GetType() const { return type_; } + + /// @return Whether the result indicates processing can continue. + bool CanContinue() const { return can_continue_; } + + /// @return Whether the match result has a message associated with it. + bool HasMessage() const { return has_message_; } + + /// @return The message associated with the result. + const Message& GetMessage() const { return message_; } + + /// @return The number of bytes consumed to produce the result. + size_t GetBytesConsumed() const { return bytes_consumed_; } + + /// @param delta The byte count to increase the bytes consumed value with. + size_t IncrementBytesConsumed(size_t delta) { + bytes_consumed_ += delta; + return bytes_consumed_; + } + + /// @param type The type to use for this match result. + /// @return A reference to this match result. + DataMatchResult& SetType(Type type) { + type_ = type; + return *this; + } + + /// Sets the flag that indicates whether processing can continue. + /// @param can_continue The new value for the can_continue_ flag. + DataMatchResult& SetCanContinue(bool can_continue) { + can_continue_ = can_continue; + return *this; + } + + /// @param bytes_consumed The byte count to use for this match result. + /// @return A reference to this match result. + DataMatchResult& SetBytesConsumed(size_t bytes_consumed) { + bytes_consumed_ = bytes_consumed; + return *this; + } + + /// @param message The message to use for this match result. + /// @return A reference to this match result. + DataMatchResult& SetMessage(const Message& message) { + message_ = message; + has_message_ = true; + return *this; + } + + /// @param type The message type to use for this match result. + /// @param text The message text to use for this match result. + /// @return A reference to this match result. + DataMatchResult& SetMessage(const Message::Type type, + const std::string& text) { + return SetMessage(Message(type, 0, text)); + } + + /// @param other The other result to test for equality with this one. + /// @return Whether this and the other results are equal + bool operator==(const DataMatchResult& other) const { + return can_continue_ == other.can_continue_ && + has_message_ == other.has_message_ && type_ == other.type_ && + bytes_consumed_ == other.bytes_consumed_ && + message_ == other.message_; + } + + /// @param other The other result to test for inequality with this one. + /// @return Whether this and the other results are not equal + bool operator!=(const DataMatchResult& other) const { + return !(*this == other); + } + + private: + Message message_; + size_t bytes_consumed_; + Type type_; + bool has_message_; + bool can_continue_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_MATCH_RESULT_H_ // NOLINT diff --git a/includes/image_io/base/data_range.h b/includes/image_io/base/data_range.h new file mode 100644 index 0000000..e2e339a --- /dev/null +++ b/includes/image_io/base/data_range.h @@ -0,0 +1,89 @@ +#ifndef IMAGE_IO_BASE_DATA_RANGE_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_RANGE_H_ // NOLINT + +#include <algorithm> + +namespace photos_editing_formats { +namespace image_io { + +/// A class to specify a range of bytes in some sort of array. The range is +/// defined like others in STL to include the begin value and exclude the end +/// value: [begin,end). Invalid ranges where end <= begin are ok - no exceptions +/// are ever thrown - but the IsValid() function will return false, and other +/// functions will behave in an appropriate fashion. +class DataRange { + public: + /// The main constructor to define a range. + /// @param begin The begin location of the range. + /// @param end The end location of the range. + DataRange(size_t begin, size_t end) : begin_(begin), end_(end) {} + + /// The default construtor defines an invalid range in which both begin and + /// end are set to 0. + DataRange() : begin_(0), end_(0) {} + + DataRange(const DataRange& data_range) = default; + DataRange& operator=(const DataRange& data_range) = default; + + /// @return The begin value of the range. + size_t GetBegin() const { return begin_; } + + /// @return The end value of the rangel. + size_t GetEnd() const { return end_; } + + /// @return Whether the range is valid. + bool IsValid() const { return begin_ < end_; } + + /// @return The length of the range, or 0 if the range is invalid. + size_t GetLength() const { return IsValid() ? end_ - begin_ : 0; } + + /// Determines if the location is in this range or not. + /// @param location The location being considered for this test. + /// @return True if the location is in the range, else false. + bool Contains(size_t location) const { + return location >= begin_ && location < end_; + } + + /// Determines if another DataRange is a subrange of this range or not. + /// @param data_range The DataRange being considered for this test. + /// @return True if data_range is subrange of this range, else not. + bool Contains(const DataRange& data_range) const { + return IsValid() && data_range.IsValid() && data_range.begin_ >= begin_ && + data_range.end_ <= end_; + } + + /// Computes the DataRange that is the intersection of another range with this + /// one. If there is no intersection, the resulting range will be invalid. + /// @param data_range The DataRange to use compute the intersection with this + /// one. + /// @return The DataRange that represents the intersection, or one that is + /// is invalid if the ranges do not overlap at all. + DataRange GetIntersection(const DataRange& data_range) const { + return DataRange(std::max(data_range.begin_, begin_), + std::min(data_range.end_, end_)); + } + + /// @param rhs A DataRange to compare with this one. + /// @return True if the two ranges are equal (even if invalid), else false. + bool operator==(const DataRange& rhs) const { + return begin_ == rhs.begin_ && end_ == rhs.end_; + } + + /// @param rhs A DataRange to compare with this one. + /// @return True if the two ranges not equal (even if invalid), else false. + bool operator!=(const DataRange& rhs) const { + return begin_ != rhs.begin_ || end_ != rhs.end_; + } + + private: + /// The begin value of the range. + size_t begin_; + + /// The end value of the range. + size_t end_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_RANGE_H_ // NOLINT diff --git a/includes/image_io/base/data_range_tracking_destination.h b/includes/image_io/base/data_range_tracking_destination.h new file mode 100644 index 0000000..01fbf15 --- /dev/null +++ b/includes/image_io/base/data_range_tracking_destination.h @@ -0,0 +1,57 @@ +#ifndef IMAGE_IO_BASE_DATA_RANGE_TRACKING_DESTINATION_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_RANGE_TRACKING_DESTINATION_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataDestination that tracks the transfer_range values as they are passed +/// from the caller of the Transfer() function to next DataDestination. +/// Instances of this class can be used to track the number of bytes transferred +/// and/or to ensure that multiple calls to the Transfer() function are called +/// with transfer_range values that join in a end-to-begin fashion. This data +/// can be used to make sure that the data transferred meets the expectations of +/// the client. +class DataRangeTrackingDestination : public DataDestination { + public: + /// @param destination The DataDestination that is next in the chain, or + /// nullptr if there is no destination. + explicit DataRangeTrackingDestination(DataDestination* destination) + : destination_(destination), + bytes_transferred_(0), + has_disjoint_transfer_ranges_(false) {} + + /// @return The number of bytes written to the data destination. Bytes are + /// considered "written" even if the next destination is a nullptr. + size_t GetBytesTransferred() const override { return bytes_transferred_; } + + /// @return The tracked data range (see the class comment for how this value + /// is computed). + const DataRange& GetTrackedDataRange() const { return tracked_data_range_; } + + /// @return Whether disjoint transfer data ranges were detected by the + /// Transfer() function. Disjoint transfer ranges occur when two calls + /// to the Transfer() function occur where first_range.GetEnd() is not + //// equal to the second_range.GetBegin(). + bool HasDisjointTransferRanges() const { + return has_disjoint_transfer_ranges_; + } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + private: + DataDestination* destination_; + DataRange tracked_data_range_; + size_t bytes_transferred_; + bool has_disjoint_transfer_ranges_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_RANGE_TRACKING_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/base/data_scanner.h b/includes/image_io/base/data_scanner.h new file mode 100644 index 0000000..0d1af26 --- /dev/null +++ b/includes/image_io/base/data_scanner.h @@ -0,0 +1,188 @@ +#ifndef IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT + +#include <string> + +#include "image_io/base/data_context.h" +#include "image_io/base/data_match_result.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// Provides a means to scan a textual portion of a data segment for a sequence +/// of characters and return the data associated with the resulting match. The +/// scanners also maintain state information for repeated calling in case the +/// text data is split over multipe data segments. The scanners also maintain +/// a data range where the result of the scanner's match can be found. These +/// scanners are written to allow copy semantics to make memory management +/// easier. Several types of scanners are provided. +class DataScanner { + public: + /// The type of scanner. + enum Type { + /// A scanner to look for text that matches exactly one or more characters. + /// The text to look for is given to the CreateLiteralScanner() function. + kLiteral, + + /// A scanner to look for text that matches a name. A name must begin with + /// one of the characters in "[A-Z][a-z]:_". Subsequent characters can + /// include "[0-9]-.". + kName, + + /// A scanner to look for a quoted string. A quoted string is delimited by + /// a single (') or double (") quote, and include any character except the + /// quote mark. + kQuotedString, + + /// A scanner to look for one character from a set of characters. The set of + /// characters are given to the CreateSentinelScanner() function. + kSentinel, + + /// A scanner to accept all text up to and including a literal text value. + /// The text to look for is given to the CreateThroughLiteralScanner() + /// function. + kThroughLiteral, + + /// A scanner to skip white space characters. At least one whitespace + /// character must be scanned. The set of white space characters is given + /// by the GetWhitespaceChars() function. + kWhitespace, + + /// A scanner to skip white space characters, but unlike the kWhitespace + /// scanner, this scanner will not return an error result if there are no + /// whitespace characters scanned. + kOptionalWhitespace, + }; + + /// @return The set of whitespace characters: " \t\n\r". + static std::string GetWhitespaceChars(); + + /// @param literal The literal to use for the scanner. + /// @return A kLiteral type scanner. + static DataScanner CreateLiteralScanner(const std::string& literal); + + /// @return A kName type scanner. + static DataScanner CreateNameScanner(); + + /// @return A kQuoteString type scanner. + static DataScanner CreateQuotedStringScanner(); + + /// @param sentinels The set of sentinels to scan for. The "~" character is + /// used as an "abbreviation" for any of the characters that can make up the + /// first character of a kName type sentinel. + /// @return a kSentinel type scanner. + static DataScanner CreateSentinelScanner(const std::string& sentinels); + + /// @param literal The literal to use for the scanner. + /// @return A kThroughLiteral type scanner. + static DataScanner CreateThroughLiteralScanner(const std::string& literal); + + /// @return A kWhitespace type scanner; + static DataScanner CreateWhitespaceScanner(); + + /// @return A kOptionalWhitespace type scanner; + static DataScanner CreateOptionalWhitespaceScanner(); + + /// @return The type of the scanner. + Type GetType() const { return type_; } + + /// @return A description of the scanner, based on the type. + std::string GetDescription() const; + + /// @return The literal value of a kLiteral or kThroughLiteral type scanner, + /// or an empty string otherwise. + std::string GetLiteral() const; + + /// @return The set of sentinels for a kSentinal type scanner, or an empty + /// string otherwise. + std::string GetSentenels() const; + + /// @return The sentinel character from the set of characters passed to the + /// CreateSentinelScanner() function that was matched by a successful scan + /// operation, or 0 otherwise. + char GetSentinel() const; + + /// @return The range of characters that the scanner found during one or more + /// successful Scan() function operations. + const DataRange& GetTokenRange() const { return token_range_; } + + /// @return The number of tiomes the Scan() function has been called. + size_t GetScanCallCount() const { return scan_call_count_; } + + /// @param context The data context to use for the scan operation. + /// @return The match result of the scan operation. + DataMatchResult Scan(const DataContext& context); + + /// Reset the scanner's token range to an invalid value. + void ResetTokenRange(); + + /// Reset the scanner state to the value it had when it was first constructed. + void Reset(); + + private: + explicit DataScanner(Type type) : DataScanner(type, "") {} + DataScanner(Type type, const std::string& literal_or_sentinels) + : literal_or_sentinels_(literal_or_sentinels), + data_(0), + scan_call_count_(0), + type_(type) {} + + /// @param delta_length The byte count to use to extend the token range end. + /// @return The new length of the token range. + size_t ExtendTokenLength(size_t delta_length); + + /// The worker functions for scanning each type of literal. + /// @param cbytes The pointer value to the buffer at the context's location. + /// @param bytes_available The number of bytes available for the scan. + /// @param context The data context for message generation purposes. + DataMatchResult ScanLiteral(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanName(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanQuotedString(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanSentinel(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanThroughLiteral(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanWhitespace(const char* cbytes, size_t bytes_available, + const DataContext& context); + + /// Sets the match result to kError and generates an internal error message. + /// @param context The data context for message generation purposes. + /// @param error_description A description of the type of internal error. + /// @param result The result to receive the kError type and message. + void SetInternalError(const DataContext& context, + const std::string& error_description, + DataMatchResult* result); + + /// Sets the match result to kError and generates an syntax error message. + /// @param context The data context for message generation purposes. + /// @param error_description A description of the type of syntax error. + /// @param result The result to receive the kError type and message. + void SetSyntaxError(const DataContext& context, + const std::string& error_description, + DataMatchResult* result); + + /// The string used for kLiteral, kThroughLiteral and kSentinel type scanners. + std::string literal_or_sentinels_; + + /// The token range built by one or calls to the Scan() function. + DataRange token_range_; + + /// State data used in different ways by different scanner types. + size_t data_; + + /// The number of times the scanner's Scan function has been called. + size_t scan_call_count_; + + /// The type of scanner. + Type type_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT diff --git a/includes/image_io/base/data_segment.h b/includes/image_io/base/data_segment.h new file mode 100644 index 0000000..ec6d584 --- /dev/null +++ b/includes/image_io/base/data_segment.h @@ -0,0 +1,183 @@ +#ifndef IMAGE_IO_BASE_DATA_SEGMENT_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_SEGMENT_H_ // NOLINT + +#include <memory> + +#include "image_io/base/data_range.h" +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +class DataRange; + +/// A DataSegment represents a portion of a larger "array" of byte data. Each +/// data segment knows (via its DataRange member) its location in the larger +/// array. The byte data of each data segment is accessed by its location +/// in that larger array. Instances of DataSegment are typically created or +/// managed by instances of DataSource which offers them up to client code. +/// A shared_ptr is used to control the lifetime of DataSegments. For more +/// information on this, see the comments in DataSource. +class DataSegment { + public: + /// A creation parameter for indicating whether or not, upon destruction, the + /// DataSegment's buffer should be deallocated. + enum BufferDispositionPolicy { + /// Policy to deallocate the buffer upon destruction. + kDelete, + + /// Policy to leave the buffer upon destruction. + kDontDelete + }; + + /// Creates a new DataSegment with the given DataRange and byte buffer. + /// @param data_range The DataRange of the byte data in the buffer. + /// @param buffer The byte data of the data segment. + /// @param buffer_policy The buffer ownership policy (Delete or DontDelete). + /// @return A shared pointer to the data segment. + static std::shared_ptr<DataSegment> Create( + const DataRange& data_range, const Byte* buffer, + BufferDispositionPolicy buffer_policy); + + /// Creates a new DataSegment with the given DataRange and byte buffer. + /// The DataSegment takes ownership of the buffer and will delete the buffer + /// when the DataSegment itself is destroyed. + /// @param data_range The DataRange of the byte data in the buffer. + /// @param buffer The byte data of the data segment; The DataSegment takes + /// ownership of the buffer and will delete it when it is deleted. + /// @return A shared pointer to the data segment. + static std::shared_ptr<DataSegment> Create(const DataRange& data_range, + const Byte* buffer) { + return Create(data_range, buffer, BufferDispositionPolicy::kDelete); + } + + /// @return The DataRange of the data in the segment. + const DataRange& GetDataRange() const { return data_range_; } + + /// @return The begin location of the segment's data range. + size_t GetBegin() const { return data_range_.GetBegin(); } + + /// @return The end location of the segment's data range. + size_t GetEnd() const { return data_range_.GetEnd(); } + + /// @return The length of the segment's data range. + size_t GetLength() const { return data_range_.GetLength(); } + + /// @return Whether the segment's range is valid. + bool Contains(size_t location) const { + return data_range_.Contains(location); + } + + /// Gets the validated byte value of the segment at the given location. + /// @param location The location in the segment to get the byte value of. + /// @return The validated byte at the given location or 0/false if the + /// segment's range does does not contain the location. + ValidatedByte GetValidatedByte(size_t location) const { + return Contains(location) ? ValidatedByte(buffer_[location - GetBegin()]) + : InvalidByte(); + } + + /// Returns a pointer to the type at the give location in the dta segment. + /// @param location The location of the byte to get the buffer pointer of. + /// @return The pointer to the byte in the segment's buffer, or the nullptr + /// if the segment does not contain the location. + const Byte* GetBuffer(size_t location) const { + return Contains(location) ? &buffer_[location - GetBegin()] : nullptr; + } + + /// Finds the location of the string in the data segment. Although a data + /// segment has an array of Bytes (an unsigned quantity), very often the + /// data they contain are strings - a sequence of bytes, none of which have + /// the sign bit set. As an aid in expressing the alorithms for finding such + /// strings, this function allows client code to express the searched-for + /// string as a zero-terminated array of chars. + /// @param start_location The location at which to start looking. + /// @param str The string to find in the segment. The strlen function is + /// used to find the length of the string to search for. + /// @return The location of the start of the string, or the segment's end + /// location value. + size_t Find(size_t start_location, const char* str) const { + return Find(start_location, str, strlen(str)); + } + + /// Finds the location of the string in the data segment. Although a data + /// segment has an array of Bytes (an unsigned quantity), very often the + /// data they contain are strings - a sequence of bytes, none of which have + /// the sign bit set. As an aid in expressing the alorithms for finding such + /// strings, this function allows client code to express the searched-for + /// string as an array of chars and a length. + /// @param start_location The location at which to start looking. + /// @param str The string to find in the segment. + /// @param str_length The length of the string to find. + /// @return The location of the start of the string, or the segment's end + /// location value. + size_t Find(size_t location, const char* str, size_t str_length) const; + + /// Finds the location of the given byte value in the data segment. + /// @param start_location The location at which to start looking. + /// @param value The byte value to search for. + /// @return The location of the value, or the segment's end location value. + size_t Find(size_t start_location, Byte value) const; + + /// Sometimes the data of concern spans two data segments. For instance, a + /// JPEG data segment marker may start at the end of one data segment and + /// extend into the following segment. This helper function makes it easier to + /// write code to treat two data segments as one entity for the purpose of + /// getting the byte value at the given location. + /// @param location The location in the segment to get the byte value of. + /// @param segment1 The first data segment to use (maybe nullptr). + /// @param segment2 The second data segment to use (may be nullptr). + /// @return The validated byte at the given location, or InvalidByte() if + /// neither segment contains the location. + static ValidatedByte GetValidatedByte(size_t location, + const DataSegment* segment1, + const DataSegment* segment2); + + /// Sometimes the data of concern spans two data segments. For instance, a + /// JPEG data segment marker may start at the end of one data segment and + /// extend into the following segment. This helper function makes it easier to + /// write code to treat two data segments as one entity for the purpose of + /// finding a byte value. + /// @param start_location The location at which to start looking. + /// @param value The byte value to search for. + /// @param segment1 The first data segment to use. + /// @param segment2 The second data segment to use. + /// @return The location of the value if it's found and the two segments are + /// contiguous (i.e., if segment1->GetEnd() == segment2->GetBegin()), + /// else the max(segment1->GetEnd(), segment2->GetEnd()). + static size_t Find(size_t start_location, Byte value, + const DataSegment* segment1, const DataSegment* segment2); + + private: + DataSegment(const DataRange& data_range, const Byte* buffer, + BufferDispositionPolicy buffer_policy) + : data_range_(data_range), + buffer_(buffer), + buffer_policy_(buffer_policy) {} + + ~DataSegment() { + // If kDelete is not set (default) the buffer memory will remain allocated. + if (buffer_policy_ == BufferDispositionPolicy::kDelete) { + delete[] buffer_; + } + } + + /// The default_delete needs to be a friend so that the shared pointer can + /// call the private destructor. + friend struct std::default_delete<DataSegment>; + + private: + /// The data range of the data segment. + DataRange data_range_; + + /// The buffer that contains the segment data. + const Byte* buffer_; + + /// The policy that dictates whether or not the buffer will be deallocated. + BufferDispositionPolicy buffer_policy_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_SEGMENT_H_ // NOLINT diff --git a/includes/image_io/base/data_segment_data_source.h b/includes/image_io/base/data_segment_data_source.h new file mode 100644 index 0000000..6ef6c3e --- /dev/null +++ b/includes/image_io/base/data_segment_data_source.h @@ -0,0 +1,30 @@ +#ifndef IMAGE_IO_BASE_DATA_SEGMENT_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_SEGMENT_DATA_SOURCE_H_ // NOLINT + +#include "image_io/base/data_source.h" + +namespace photos_editing_formats { +namespace image_io { + +/// DataSegmentDataSource is an implementation of DataSource that provides +/// access to requested DataSegment instances from a single (possibly large) +/// in-memory DataSegment. +class DataSegmentDataSource : public DataSource { + public: + explicit DataSegmentDataSource( + const std::shared_ptr<DataSegment>& shared_data_segment) + : shared_data_segment_(shared_data_segment) {} + void Reset() override; + std::shared_ptr<DataSegment> GetDataSegment(size_t begin, + size_t min_size) override; + TransferDataResult TransferData(const DataRange& data_range, size_t best_size, + DataDestination* data_destination) override; + + private: + std::shared_ptr<DataSegment> shared_data_segment_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_SEGMENT_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/base/data_source.h b/includes/image_io/base/data_source.h new file mode 100644 index 0000000..8b96d4b --- /dev/null +++ b/includes/image_io/base/data_source.h @@ -0,0 +1,104 @@ +#ifndef IMAGE_IO_BASE_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_SOURCE_H_ // NOLINT + +#include <memory> + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// DataSource is the abstract base class for implementations that can provide +/// data from a file or memory buffer or some other container. A data source +/// supports both a pull model for obtaining data, via the GetDataSegment() +/// function, and a push model via a collaborating DataDestination and the +/// TransferData() function. +/// +/// Pushing with a DataSource can be a convenient alternative to using a +/// DataDestination directly when there is a large amount of data that is +/// located in a file, or some type of memory structure that be "wrapped" in +/// a DataSource. The push model provides the most efficient (i.e., least +/// copying of bytes) way to move data from one place to another. For usage of +/// this library on mobile devices with limited memory, this mode of operation +/// is the most attractive. Unfortunately, the push model typically assumes the +/// code knows what portion of bytes to push. The discovery of that portion is +/// most often easier to accomplish with a pull model. +/// +/// The pull model, while needed for efficient implementation of objects that +/// scan the contents of a data source, does represent a challenge when managing +/// the lifetime of the DataSegment instances returned by the GetDataSegment() +/// function - depending on the implementation of the DataSource, the segment it +/// returns might represent the entire array of data, or it might represent just +/// a portion of it that was read from a file. In the first case, the DataSource +/// would probably want to keep ownership of the DataSegment, while in the other +/// case, the DataSource might very well want to pass ownership on to the caller +/// of GetDataSegment(). This problem is solved by allowing sharing of the +/// ownership of the DataSegment via a std::shared_ptr. +/// +/// The push model implemented does not have these complications, so the +/// DataDestination class's Transfer() function takes a simple const reference +/// to a DataSegment, with the ownership firmly held by the DataSource. +class DataSource { + public: + /// The result of a TransferData() operation. + enum TransferDataResult { + /// An error occurred while calling DataDestination::Transfer(), or the + /// data destination was a nullptr. + kTransferDataError, + + /// The DataDestination::Transfer() function was not called because the + /// DataRange was empty or the DataSource was not able to supply any data + /// in the range. + kTransferDataNone, + + /// The data transfer was successful. + kTransferDataSuccess + }; + + virtual ~DataSource() = default; + + /// Requests the data source to return a DataSegment with a range starting at + /// the given begin location and extending best_size bytes in length if + /// possible. (If not possible, a shorter range of data may be returned. A + /// larger range may also be returned, depending on the DataSource). + /// If a non-null data segment returned, its DataRange is guarenteed to have + /// at least some overlap with the requested range. + /// @param begin The begin location of the requested data segment. + /// @param min_size The min size of the requested data segment. The size of + /// the data segment returned may be larger depending on the data source. + /// @return The data segment, or a nullptr if the range of data did not exist + /// in the data source. + virtual std::shared_ptr<DataSegment> GetDataSegment(size_t begin, + size_t min_size) = 0; + + /// Some data sources may need to be reset if they are accessed via repeated + /// calls to GetDataSegment() all the way to the end of the array of bytes. + /// (For example a file-based DataSource might have eof bits that need to be + /// cleared before re-reading data). This function does that kind of thing. + virtual void Reset() = 0; + + /// Requests the data source to transfer data in the given range to the given + /// DataDestination. Callers must call the data destination's StartTransfer() + /// function before calling this function, and call its FinishTransfer() + /// after this call. This function will call the data destination's Transfer() + /// function zero or more times. + /// @param data_range The range of data to transfer from this data source to + /// the destination. + /// @param best_size The "best" size of the requested data segment to be sent + /// to the data destination. The size of the data segment that is sent to + /// the data destination may be larger than this value, depending on the + /// data source, or it may be smaller if the requested data range extends + /// past the end of the data source's range. + /// @param data_destination The receiver of the data. + virtual TransferDataResult TransferData( + const DataRange& data_range, size_t best_size, + DataDestination* data_destination) = 0; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/base/istream_data_source.h b/includes/image_io/base/istream_data_source.h new file mode 100644 index 0000000..4564e14 --- /dev/null +++ b/includes/image_io/base/istream_data_source.h @@ -0,0 +1,46 @@ +#ifndef IMAGE_IO_BASE_ISTREAM_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_BASE_ISTREAM_DATA_SOURCE_H_ // NOLINT + +#include <iostream> +#include <memory> +#include <utility> + +#include "image_io/base/data_source.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataSource that obtains data from an istream. +class IStreamDataSource : public DataSource { + public: + /// Constructs an IStreamDataSource using the given istream. + /// @param istram_ptr The istream from which to read. + explicit IStreamDataSource(std::unique_ptr<std::istream> istream_ptr) + : istream_(std::move(istream_ptr)) {} + + void Reset() override; + std::shared_ptr<DataSegment> GetDataSegment(size_t begin, + size_t min_size) override; + TransferDataResult TransferData(const DataRange& data_range, size_t best_size, + DataDestination* data_destination) override; + + private: + /// The worker function to create a DataSegment and fill it with the given + /// number of bytes read from the istream, starting at the given location. + /// @param begin The location in the istream at which to start reading. + /// @param count The number of bytes to read. + /// @return A DataSegment pointer, or nullptr if the read failed. + std::shared_ptr<DataSegment> Read(size_t begin, size_t count); + + private: + /// The istream from which to read. + std::unique_ptr<std::istream> istream_; + + /// The current data segment that was read in the GetDataSegment() function. + std::shared_ptr<DataSegment> current_data_segment_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_ISTREAM_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/base/message.h b/includes/image_io/base/message.h new file mode 100644 index 0000000..8c225d8 --- /dev/null +++ b/includes/image_io/base/message.h @@ -0,0 +1,84 @@ +#ifndef IMAGE_IO_BASE_MESSAGE_H_ // NOLINT +#define IMAGE_IO_BASE_MESSAGE_H_ // NOLINT + +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +/// A message that is reported to and managed by the MessageHandler, and +/// possibly written by a MessageWriter. +class Message { + public: + /// The types of Messages. + enum Type { + /// A Status message. + kStatus, + + /// An error from the stdlib was detected. The std::errno variable can be + /// used to programmatically decide what to do, or use the std::strerror + /// function to get a string description of the error. + kStdLibError, + + /// A premature end of the data being processed was found. + kPrematureEndOfDataError, + + /// An expected string value was not found in the data being processed. + kStringNotFoundError, + + /// An error occurred while decoding the data being processed. + kDecodingError, + + /// An error occurred while parsing the data. + kSyntaxError, + + /// An error occurred while using the data. + kValueError, + + /// An internal error of some sort occurred. + kInternalError + }; + + /// @param type The type of message to create. + /// @param system_errno The errno value to use for kStdLibError type messages. + /// @param text The text of the message. + Message(Type type, int system_errno, const std::string& text) + : type_(type), system_errno_(system_errno), text_(text) {} + + Message() = delete; + + bool operator==(const Message& rhs) const { + return type_ == rhs.type_ && system_errno_ == rhs.system_errno_ && + text_ == rhs.text_; + } + + bool operator!=(const Message& rhs) const { + return type_ != rhs.type_ || system_errno_ != rhs.system_errno_ || + text_ != rhs.text_; + } + + /// @return The type of message. + Type GetType() const { return type_; } + + /// @return The system errno value used for kStdLibError messages. + int GetSystemErrno() const { return system_errno_; } + + /// @return The text of the message. + const std::string& GetText() const { return text_; } + + private: + /// The type of message. + Type type_; + + /// If type == kStdLibError, the system's errno value at the time + /// the error was reported, else it's value is 0. + int system_errno_; + + /// The text associated with the message. + std::string text_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_MESSAGE_H_ // NOLINT diff --git a/includes/image_io/base/message_handler.h b/includes/image_io/base/message_handler.h new file mode 100644 index 0000000..dc33679 --- /dev/null +++ b/includes/image_io/base/message_handler.h @@ -0,0 +1,102 @@ +#ifndef IMAGE_IO_BASE_MESSAGE_HANDLER_H_ // NOLINT +#define IMAGE_IO_BASE_MESSAGE_HANDLER_H_ // NOLINT + +#include <memory> +#include <vector> + +#include "image_io/base/message.h" +#include "image_io/base/message_store.h" +#include "image_io/base/message_writer.h" + +namespace photos_editing_formats { +namespace image_io { + +/// MessageHandler provides the functions that all the code in this library uses +/// to report status and error conditions. +class MessageHandler { + public: + /// Initializes the MessageHandler for client use. Multithread applications + /// might find this function useful to call in their initialization section, + /// to guarentee that threads will not create race conditions when calling the + /// Get function for the first time. + static void Init(std::unique_ptr<MessageWriter> message_writer, + std::unique_ptr<MessageStore> message_store); + + /// This function is thread-safe as long as the Init() function is called in + /// non-multiple-threaded startup code; if the Init() fucnction was not called + /// there may be race conditions that causes the message handler returned from + /// Get() called in one thread to be different from that returned by the call + /// in a different thread. + /// @return The message handler used by the code in this library. + static MessageHandler* Get(); + + /// Sets the message writer to use when ReportMessage() is called. If client + /// code does not call this function, the MessageHandler returned by the Get() + /// function will have a CoutMessageWriter by default. If client code calls + /// this function with a null, then ReportMessage() will not write messages at + /// all, but just add them to the messages store. + /// @param message_writer The message writer that ReportMessage uses, or null. + void SetMessageWriter(std::unique_ptr<MessageWriter> message_writer); + + /// Sets the message store to use when ReportMessage() is called. If client + /// code does not call this function, the MessageHandler returned by the Get() + /// function will have a VectorMessageStore by default. If client code calls + /// this function with a null, then ReportMessage() will not save messages at + /// all, but just write them to the messages writer. + /// @param message_store The message store that ReportMessage uses, or null. + void SetMessageStore(std::unique_ptr<MessageStore> message_store); + + /// Clears the messages maintained by the message handler's store. Client code + /// should call this function before calling any other standalone or class + /// function in this library so as to provide a clean starting point with + /// respect to error and status messages. Once all the calls have been made, + /// client code should examine the messages or call HasErrorMessages() to + /// determine the whether the calls succeeded or not. Finally client code + /// should call this function again so that memory is not leaked when it is + /// done using this library. + void ClearMessages() { + if (message_store_) { + message_store_->ClearMessages(); + } + } + + /// @return Whether the message handler's store has error messages or not. + bool HasErrorMessages() const { + return message_store_ ? message_store_->HasErrorMessages() : false; + } + + /// @return The vector of errors maintained by the message handler's store. + std::vector<Message> GetMessages() const { + return message_store_ ? message_store_->GetMessages() + : std::vector<Message>(); + } + + /// Reports an error or a status message. This function is called from library + /// code when it detects an error condition or wants to report status. If the + /// message type is Message::kStdLibError, then the current value of the + /// system's errno variable is used when the message is created. The message + /// is added to the messages vector and if the message writer is not null, its + /// WriteMessage function is called. + /// @param type The type of message. + /// @param text Text associated with the message. + void ReportMessage(Message::Type type, const std::string& text); + + /// @param message The message to report. + void ReportMessage(const Message& message); + + private: + MessageHandler() = default; + ~MessageHandler(); + + private: + /// The message writer used by ReportMessage, or null. + std::unique_ptr<MessageWriter> message_writer_; + + /// The message store for saving messages for later, or null. + std::unique_ptr<MessageStore> message_store_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_MESSAGE_HANDLER_H_ // NOLINT diff --git a/includes/image_io/base/message_store.h b/includes/image_io/base/message_store.h new file mode 100644 index 0000000..1d492f1 --- /dev/null +++ b/includes/image_io/base/message_store.h @@ -0,0 +1,80 @@ +#ifndef IMAGE_IO_BASE_MESSAGE_STORE_H_ // NOLINT +#define IMAGE_IO_BASE_MESSAGE_STORE_H_ // NOLINT + +#include <vector> +#include "image_io/base/message.h" + +namespace photos_editing_formats { +namespace image_io { + +/// An abstract base class for storing and reporting on Messages. +class MessageStore { + public: + virtual ~MessageStore() = default; + + /// Clears the messages maintained by the store. + virtual void ClearMessages() = 0; + + // @message The message to add to the store. + virtual void AddMessage(const Message& message) = 0; + + /// @return A vector of messages maintained by the store; this vector may be + /// empty even if the AddMessage function was called, depending on the + /// concrete subclass is implemented. + virtual std::vector<Message> GetMessages() const = 0; + + /// @return Whether the store has error messages or not. This value is + /// guarenteed to be accurate based on the latest calls to the + /// ClearMessages and AddMessage functions. + virtual bool HasErrorMessages() const = 0; +}; + +/// A MessageStore that saves the messages in a vector. The implementation of +/// this class is not thread safe. +class VectorMessageStore : public MessageStore { + public: + void ClearMessages() override { messages_.clear(); } + void AddMessage(const Message& message) override { + messages_.push_back(message); + } + std::vector<Message> GetMessages() const override { return messages_; } + bool HasErrorMessages() const override { + for (const auto& message : messages_) { + if (message.GetType() != Message::kStatus) { + return true; + } + } + return false; + } + + private: + std::vector<Message> messages_; +}; + +/// A MessageStore that simply keeps track of whether error messages have been +/// added or not, but does not store the messages themselves. The implementation +/// of this class is should not cause any crashes if run in a multi-threaded +/// environment, though there may be some cases where erroneous results are +/// returned by the HasErrorMessages function. +class ErrorFlagMessageStore : public MessageStore { + public: + ErrorFlagMessageStore() : has_error_(false) {} + void ClearMessages() override { has_error_ = false; } + void AddMessage(const Message& message) override { + if (message.GetType() != Message::kStatus) { + has_error_ = true; + } + } + std::vector<Message> GetMessages() const override { + return std::vector<Message>(); + } + bool HasErrorMessages() const override { return has_error_; } + + private: + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_MESSAGE_STORE_H_ // NOLINT diff --git a/includes/image_io/base/message_writer.h b/includes/image_io/base/message_writer.h new file mode 100644 index 0000000..d3240a0 --- /dev/null +++ b/includes/image_io/base/message_writer.h @@ -0,0 +1,46 @@ +#ifndef IMAGE_IO_BASE_MESSAGE_WRITER_H_ // NOLINT +#define IMAGE_IO_BASE_MESSAGE_WRITER_H_ // NOLINT + +#include "image_io/base/message.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A message writer is used by MessageHandler to write messages as they are +/// reported via the ReportMessage function. The main function, WriteMessage +/// must be implemented by subclasses. Subclasses can also override any or all +/// of the other virtual functions, GetFormattedMessage(), GetTypeCategory() +/// and GetTypeDescription() to suit their needs. +class MessageWriter { + public: + virtual ~MessageWriter() = default; + + /// This function is called to write a message. Implementations can call the + /// GetFormattedMessage function and write it wherever it needs to go, or + /// do something else entirely. + /// @param message The message to write. + virtual void WriteMessage(const Message& message) = 0; + + /// Formats the message into a single string suitable for writing. This + /// implementation returns a string that has the format + /// <GetTypeCategory()><GetTypeDescription()>:text + /// @param message The message for which a formatted string is wanted. + /// @return A string describing the message. + virtual std::string GetFormattedMessage(const Message& message) const; + + /// @param type The type of message to get the category of. + /// @return A string describing the type category; this implementation returns + /// (the obviously nonlocalized strings) "STATUS" or "ERROR" + virtual std::string GetTypeCategory(Message::Type type) const; + + /// @param type The type of message to get the description of. + /// @param system_errno Used for kStdLibError type messages. + /// @return A (non-localized) string description of the type. + virtual std::string GetTypeDescription(Message::Type type, + int system_errno) const; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_MESSAGE_WRITER_H_ // NOLINT diff --git a/includes/image_io/base/ostream_data_destination.h b/includes/image_io/base/ostream_data_destination.h new file mode 100644 index 0000000..15a1155 --- /dev/null +++ b/includes/image_io/base/ostream_data_destination.h @@ -0,0 +1,58 @@ +#ifndef IMAGE_IO_BASE_OSTREAM_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_BASE_OSTREAM_DATA_DESTINATION_H_ // NOLINT + +#include <iostream> +#include <memory> +#include <string> +#include <utility> + +#include "image_io/base/data_destination.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataDestination that writes its output to an ostream. +class OStreamDataDestination : public DataDestination { + public: + /// Constructs an OStreamDataDestination using the given ostream. + /// @param ostream_ptr The ostream to which data is written. + explicit OStreamDataDestination(std::unique_ptr<std::ostream> ostream_ptr) + : ostream_(std::move(ostream_ptr)), + bytes_transferred_(0), + has_error_(false) {} + + /// @param name A name to associate with the ostream. Used for error messages. + void SetName(const std::string& name) { name_ = name; } + + /// @return The name associated with the ostream. + const std::string& GetName() const { return name_; } + + /// @return The number of bytes written to the ostream. + size_t GetBytesTransferred() const override { return bytes_transferred_; } + + /// @return True if errors were encountered while writing to the ostream. + bool HasError() const { return has_error_; } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + private: + /// The ostream written to. + std::unique_ptr<std::ostream> ostream_; + + /// The number of bytes written so far. + size_t bytes_transferred_; + + /// A (file) name to associate with the ostream, used with error messages. + std::string name_; + + /// If true indicates an error has occurred writing to the ostream. + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_OSTREAM_DATA_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/base/types.h b/includes/image_io/base/types.h new file mode 100644 index 0000000..aaa88b3 --- /dev/null +++ b/includes/image_io/base/types.h @@ -0,0 +1,42 @@ +#ifndef IMAGE_IO_BASE_TYPES_H_ // NOLINT +#define IMAGE_IO_BASE_TYPES_H_ // NOLINT + +#include <cstdint> +#include <cstdlib> + +namespace photos_editing_formats { +namespace image_io { + +/// Byte is the noumenon unit of data. +using Byte = std::uint8_t; + +/// A Byte value and a validity flag. +struct ValidatedByte { + explicit ValidatedByte(Byte value_arg) : value(value_arg), is_valid(true) {} + ValidatedByte(const ValidatedByte&) = default; + ValidatedByte& operator=(const ValidatedByte&) = default; + Byte value; + bool is_valid; +}; + +/// Equality operator for ValidatedByte +inline bool operator==(const ValidatedByte& lhs, const ValidatedByte& rhs) { + return lhs.value == rhs.value && lhs.is_valid == rhs.is_valid; +} + +/// Inquality operator for ValidatedByte +inline bool operator!=(const ValidatedByte& lhs, const ValidatedByte& rhs) { + return lhs.value != rhs.value || lhs.is_valid != rhs.is_valid; +} + +/// @return a validated byte that has a false is_valid value. +inline ValidatedByte InvalidByte() { + ValidatedByte invalid_byte(0); + invalid_byte.is_valid = false; + return invalid_byte; +} + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_TYPES_H_ // NOLINT diff --git a/includes/image_io/extras/base64_decoder_data_destination.h b/includes/image_io/extras/base64_decoder_data_destination.h new file mode 100644 index 0000000..cde1dbc --- /dev/null +++ b/includes/image_io/extras/base64_decoder_data_destination.h @@ -0,0 +1,60 @@ +#ifndef IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_noumenon_base64_h + +#include <vector> + +#include "image_io/base/data_destination.h" + +namespace photos_editing_formats { +namespace image_io { + +/// Base64DecoderDataDestination is typically used in a chain of DataDestination +/// instances. For example, it can be used to decode base64 encoded JPEG data in +/// APP1/XMP data segments. +class Base64DecoderDataDestination : public DataDestination { + public: + /// @param next_destination The next DataDestination in the chain which will + /// be sent the decoded bytes received by the Transfer() function. + explicit Base64DecoderDataDestination(DataDestination* next_destination) + : next_destination_(next_destination), + next_decoded_location_(0), + has_error_(false) {} + + /// @return True if there was an error in the decoding process. + bool HasError() const { return has_error_; } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + /// @return The number of bytes written not to this decoder destination, but + /// to the next destination. Returns zero if the next destination is null. + size_t GetBytesTransferred() const override { + return next_destination_ ? next_destination_->GetBytesTransferred() : 0; + } + + private: + /// The destination that the decoded data is sent to. + DataDestination* next_destination_; + + /// If the transfer_range parameter of the Transfer function does not have a + /// length that is a multiple of 4, then the leftover bytes are placed in this + /// vector and are prepended to the data in the next call to Transfer. + std::vector<Byte> leftover_bytes_; + + /// The DataRanges supplied to the Transfer function can't be sent down the + /// chain to the next destination because the number of bytes differ (by 4/3). + /// This value records the number of bytes decoded so far, and the beginning + /// of the DataRange sent to the destination's Transfer function. + size_t next_decoded_location_; + + /// A true value indicates that an error occurred in the decoding process. + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/extras/string_view_data_source.h b/includes/image_io/extras/string_view_data_source.h new file mode 100644 index 0000000..0a90b44 --- /dev/null +++ b/includes/image_io/extras/string_view_data_source.h @@ -0,0 +1,33 @@ +#ifndef IMAGE_IO_EXTRAS_STRING_VIEW_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_EXTRAS_STRING_VIEW_DATA_SOURCE_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment_data_source.h" + +#include "third_party/absl/strings/string_view.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataSource that reads bytes from a string_view. The underlying string data +/// must have a lifetime that exceeds the lifetime of this data source, and the +/// string contents must not change while the data source is referencing it. +class StringViewDataSource : public DataSegmentDataSource { + public: + /// Constructs a StringViewDataSource using the given string_view. + /// @param str The string_view to read from. + explicit StringViewDataSource(absl::string_view string_src); + + /// Returns the string view being used as the data source. + absl::string_view GetStringView() const { return string_src_; } + + private: + /// The string_view to read from. + absl::string_view string_src_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_EXTRAS_STRING_VIEW_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/gcontainer/gcontainer.h b/includes/image_io/gcontainer/gcontainer.h new file mode 100644 index 0000000..c0bd66f --- /dev/null +++ b/includes/image_io/gcontainer/gcontainer.h @@ -0,0 +1,36 @@ +#ifndef IMAGE_IO_GCONTAINER_GCONTAINER_H_ // NOLINT +#define IMAGE_IO_GCONTAINER_GCONTAINER_H_ // NOLINT + +#include <string> +#include <vector> + +namespace photos_editing_formats { +namespace image_io { +namespace gcontainer { + +// Writes an image to a output_file_name, appending other_files (if they each +// exist) after the image's EOI marker. +// input_file_name must be a JPEG file. +bool WriteImageAndFiles(const std::string& input_file_name, + const std::vector<std::string>& other_files, + const std::string& output_file_name); + +// Retrieves the bytes (of size file_length) starting at file_starT_offset +// bytes after the EOI marker in input_file_name. Returns true if parsing was +// successful, false otherwise. GContainer callers are expected to have +// file_start_offset and file_length from the image metadata. +// +// input_file_name must be a JPEG. +// file_start_offset is the nth byte after (and excluding) the EOI marker in +// input_file_name. file_length is the size (in bytes) of content to parse. +// out_file_contents is populated with the requsted contents only if parsing is +// successful. +bool ParseFileAfterImage(const std::string& input_file_name, + size_t file_start_offset, size_t file_length, + std::string* out_file_contents); + +} // namespace gcontainer +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_GCONTAINER_GCONTAINER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_apple_depth_builder.h b/includes/image_io/jpeg/jpeg_apple_depth_builder.h new file mode 100644 index 0000000..7f5c595 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_apple_depth_builder.h @@ -0,0 +1,102 @@ +#ifndef IMAGE_IO_JPEG_JPEG_APPLE_DEPTH_BUILDER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_APPLE_DEPTH_BUILDER_H_ // NOLINT + +#include <vector> + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_source.h" + +namespace photos_editing_formats { +namespace image_io { + +/// Builds an Apple depth file containing a (possibly scaled down) primary image +/// and original depth image. +class JpegAppleDepthBuilder { + public: + JpegAppleDepthBuilder() + : primary_image_data_source_(nullptr), + depth_image_data_source_(nullptr), + data_destination_(nullptr) {} + + /// @param primary_image_data_source The data source containing the primary + /// image. The builder uses the first image in this data source. + /// @param depth_image_data_source The data source containing the depth image. + /// The builder finds the depth image using a JpegInfoBuilder and the + /// JpegInfo::GetAppleDepthImageRange() function. Consequently, this + /// image source must refer a valid Apple depth file. + /// @param data_destination The data destination for the combined primary + /// and depth images. + /// @return Whether the building and transfer was successful. + bool Run(DataSource* primary_image_data_source, + DataSource* depth_image_data_source, + DataDestination* data_destination); + + private: + /// Gets the data associated with the primary image its data source. + /// @return Whether the primary image data was gotten successfully. + bool GetPrimaryImageData(); + + /// Gets the data associated with the depth image from its data source. + /// @return Whether the depth image data was gotten successfully. + bool GetDepthImageData(); + + /// Transfers the primary image from its data source to the data destination, + /// adding and transforming the jpeg segments it needs to make the resulting + /// data destination a valid Apple depth file. + /// @return Whether the transfer was successful or not. + bool TransferPrimaryImage(); + + /// Transfers the depth image from its data source to the data destination. + /// @return Whether the transfer was successful or not. + bool TransferDepthImage(); + + /// Modifies the existing primary Jfif segment to contain the information + /// needed for a valid Apple depth file, and transfers the result to the data + /// destination. + /// @param jfif_length_delta The increased size of the Jfif segment. + /// @return Whether the transfer was successful or not. + bool TransferNewJfifSegment(size_t *jfif_length_delta); + + /// Creates a new Mpf segment needed for a valid Apple depth file and + /// transfers the result to the data destination. + /// @param jfif_length_delta The increased size of the Jfif segment. + /// @return Whether the transfer was successful or not. + bool TransferNewMpfSegment(size_t jfif_length_delta); + + /// @param data_source The data source from which to transfer bytes to the + /// data destination. + /// @param data_range The data range in the data source to transfer. + bool TransferData(DataSource *data_source, const DataRange& data_range); + + /// The data source containing the primary image. + DataSource* primary_image_data_source_; + + /// The data source representing a valid Apple depth file. + DataSource* depth_image_data_source_; + + /// The final destination of the new Apple depth data. + DataDestination* data_destination_; + + /// The range in the primary image data source containing the primary image. + DataRange primary_image_range_; + + /// The range in the primary image data source containing the primary image's + /// Jfif segment. + DataRange primary_image_jfif_segment_range_; + + /// The bytes of the primary image's Jfif segment. + std::vector<Byte> primary_image_jfif_segment_bytes_; + + /// The range in the primary image data source containing the primary images's + /// Mpf segment, or the location at a new Mpf segment should be written. + DataRange primary_image_mpf_segment_range_; + + /// The range in the depth image data source containing the depth image. + DataRange depth_image_range_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_APPLE_DEPTH_BUILDER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_image_extractor.h b/includes/image_io/jpeg/jpeg_image_extractor.h new file mode 100644 index 0000000..91237e5 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_image_extractor.h @@ -0,0 +1,73 @@ +#ifndef IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_source.h" +#include "image_io/jpeg/jpeg_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class that can make use of the data in a JpegInfo instance to transfer +/// Apple depth and GDepth/GImage images from a DataSource and ship it to a +/// DataDestination. +class JpegImageExtractor { + public: + /// @param jpeg_info The JpegInfo instance containing depth/image data. + /// @param data_source The DataSource from which to transfer depth/image data. + JpegImageExtractor(const JpegInfo& jpeg_info, DataSource* data_source) + : jpeg_info_(jpeg_info), data_source_(data_source) {} + + /// This function extracts the Apple depth image from the DataSource and sends + /// the bytes to the DataDestination. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractAppleDepthImage(DataDestination* image_destination); + + /// This function extracts the Apple matte image from the DataSource and sends + /// the bytes to the DataDestination. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractAppleMatteImage(DataDestination* image_destination); + + /// This function extracts the GDepth type image from the DataSource and + /// sends the bytes to the DataDestination. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractGDepthImage(DataDestination* image_destination); + + /// This function extracts the GImage type image from the DataSource and + /// sends the bytes to the DataDestination. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractGImageImage(DataDestination* image_destination); + + private: + /// Worker function called for GDepth/GImage type image extraction. + /// @param xmp_info_type The type of image to extract. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractImage(JpegXmpInfo::Type xmp_info_type, + DataDestination* image_destination); + + /// Worker function called for Apple depth/matte type image extraction. + /// @param image_range The range of the image data to extract. If invalid, + /// the image_destination's StartTransfer/FinishTransfer functions are + /// still called, and this function will return true (i.e., zero bytes + /// "successfully" transferred). + /// @param image_destination The DataDestination to receive the image data. + /// @return True if the transfer succeeded. + bool ExtractImage(const DataRange& image_range, + DataDestination* image_destination); + + /// The jpeg info object contains the location of the Apple and Google images. + JpegInfo jpeg_info_; + + /// The data source from which the images are extracted. + DataSource* data_source_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_info.h b/includes/image_io/jpeg/jpeg_info.h new file mode 100644 index 0000000..8aedf9e --- /dev/null +++ b/includes/image_io/jpeg/jpeg_info.h @@ -0,0 +1,153 @@ +#ifndef IMAGE_IO_JPEG_JPEG_INFO_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_INFO_H_ // NOLINT + +#include <vector> + +#include "image_io/base/data_range.h" +#include "image_io/jpeg/jpeg_segment_info.h" +#include "image_io/jpeg/jpeg_xmp_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class to represent interesting depth and image information in a JPEG file, +/// and where it is located so that it can be efficiently extracted. +class JpegInfo { + public: + JpegInfo() { JpegXmpInfo::InitializeVector(&xmp_info_vector_); } + JpegInfo(const JpegInfo&) = default; + JpegInfo& operator=(const JpegInfo&) = default; + + /// @return The vector of data ranges indicating the locations of the images. + const std::vector<DataRange>& GetImageRanges() const { return image_ranges_; } + + /// @return The vector of interesting segment info structures. + const std::vector<JpegSegmentInfo>& GetSegmentInfos() const { + return segment_infos_; + } + + /// @param image_index The image containing the sought after segment info. + /// @param type The type of segment info to get. + /// @return The segment info, or one that is invalid if not found. + JpegSegmentInfo GetSegmentInfo(size_t image_index, + const std::string& type) const { + for (const auto& segment_info : GetSegmentInfos()) { + if (segment_info.GetImageIndex() == image_index && + segment_info.GetType() == type) { + return segment_info; + } + } + return JpegSegmentInfo(0, DataRange(), ""); + } + + /// @return True if there is Apple depth information. + bool HasAppleDepth() const { return apple_depth_image_range_.IsValid(); } + + /// @return True if there is Apple matte information. + bool HasAppleMatte() const { return apple_matte_image_range_.IsValid(); } + + /// @return True if there is GDepth type depth information. + bool HasGDepth() const { + return HasImage(JpegXmpInfo::kGDepthInfoType); + } + + /// @return True if there is GImage information. + bool HasGImage() const { + return HasImage(JpegXmpInfo::kGImageInfoType); + } + + /// @return True if there is either Apple or GDepth information. + bool HasDepth() const { return HasAppleDepth() || HasGDepth(); } + + /// @return True if there is an extratable image present. + bool HasExtractableImage() const { + return HasAppleDepth() || HasAppleMatte() || HasGDepth() || HasGImage(); + } + + /// @param xmp_info_type The type of xmp image information desired. + /// @return True if there is information of the given type. + bool HasImage(JpegXmpInfo::Type xmp_info_type) const { + return !GetSegmentDataRanges(xmp_info_type).empty(); + } + + /// @return The DataRange where the Apple depth information is located. + const DataRange& GetAppleDepthImageRange() const { + return apple_depth_image_range_; + } + + /// @return The DataRange where the Apple matte information is located. + const DataRange& GetAppleMatteImageRange() const { + return apple_matte_image_range_; + } + + /// @param type The type of Xmp data to get the mime type of. + /// @return The mime type for the Xmp data of the given type. + std::string GetMimeType(JpegXmpInfo::Type type) const { + return xmp_info_vector_[type].GetMimeType(); + } + + /// @param type The type of Xmp data to get the segment data ranges of. + /// @return The segment data ranges containing the Xmp data of the given type. + const std::vector<DataRange>& GetSegmentDataRanges( + JpegXmpInfo::Type type) const { + return xmp_info_vector_[type].GetSegmentDataRanges(); + } + + /// Adds a DataRange to the vector of image DataRanges. + /// @param image_range The data range of an image. + void AddImageRange(const DataRange& image_range) { + image_ranges_.push_back(image_range); + } + + /// Adds a JpegSegmentInfo to the vector of JpegSegmentInfos. + /// @param jpeg_segment_info The info structure to add. + void AddSegmentInfo(const JpegSegmentInfo& segment_info) { + segment_infos_.push_back(segment_info); + } + + /// @param data_range The DataRange where Apple depth information is located. + void SetAppleDepthImageRange(const DataRange& data_range) { + apple_depth_image_range_ = data_range; + } + + /// @param data_range The DataRange where Apple matte information is located. + void SetAppleMatteImageRange(const DataRange& data_range) { + apple_matte_image_range_ = data_range; + } + + /// @param type The type of Xmp data to set the mime type of. + /// @param mime_type The mime type of the Xmp data. + void SetMimeType(JpegXmpInfo::Type type, const std::string& mime_type) { + xmp_info_vector_[type].SetMimeType(mime_type); + } + + /// @param type The type of Xmp data to set segment data ranges of. + /// @param segment_data_ranges The segment that contain the Xmp data. + void SetSegmentDataRanges(JpegXmpInfo::Type type, + const std::vector<DataRange>& segment_data_ranges) { + xmp_info_vector_[type].SetSegmentDataRanges(segment_data_ranges); + } + + private: + /// The DataRanges of all images. + std::vector<DataRange> image_ranges_; + + /// Interesting segment information. Currently information about APP0/JFIF, + /// APP1/EXIF and APP2/MPF segments are saved here. + std::vector<JpegSegmentInfo> segment_infos_; + + /// The DataRange of the Apple depth information. + DataRange apple_depth_image_range_; + + /// The DataRange of the Apple depth information. + DataRange apple_matte_image_range_; + + /// A vector holding information about the Xmp segments containing GDepth and + /// GImage data. + std::vector<JpegXmpInfo> xmp_info_vector_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_INFO_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_info_builder.h b/includes/image_io/jpeg/jpeg_info_builder.h new file mode 100644 index 0000000..ee4d611 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_info_builder.h @@ -0,0 +1,133 @@ +#ifndef IMAGE_IO_JPEG_JPEG_INFO_BUILDER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_INFO_BUILDER_H_ // NOLINT + +#include <set> +#include <string> +#include <vector> + +#include "image_io/base/data_range.h" +#include "image_io/jpeg/jpeg_info.h" +#include "image_io/jpeg/jpeg_segment_processor.h" +#include "image_io/jpeg/jpeg_xmp_info_builder.h" + +namespace photos_editing_formats { +namespace image_io { + +/// JpegInfoBuilder is JpegSegmentProcessor that collects the location and type +/// of depth information in the JPEG file so that subsequent operations can +/// efficiently maniuplate it. +class JpegInfoBuilder : public JpegSegmentProcessor { + public: + JpegInfoBuilder(); + + /// @return The JpegInfo with the depth information obtained from the + /// scanner as a result of processing the segments it processes. + const JpegInfo& GetInfo() const { return jpeg_info_; } + + /// @param image_limit The max number of images to process. By default there + /// is no limit on the number of images processed. + void SetImageLimit(int image_limit) { image_limit_ = image_limit; } + + /// By default the info builder does not capture the value of the segment in + /// the segment infos contained in the @c JpegInfo object. Call this function + /// to capture the bytes of the indicated segment types. + /// @param type The type of segment info to capture the value of. + void SetCaptureSegmentBytes(const std::string& segment_info_type); + + void Start(JpegScanner* scanner) override; + void Process(JpegScanner* scanner, const JpegSegment& segment) override; + void Finish(JpegScanner* scanner) override; + + private: + /// @return True if the data members indicate Apple depth is present. + bool HasAppleDepth() const; + + /// @return True if the data members indicate Apple matte is present. + bool HasAppleMatte() const; + + /// @return True if the segment is a primary Xmp segment. + bool IsPrimaryXmpSegment(const JpegSegment& segment) const; + + /// @return True if the segment is an extended Xmp segment. + bool IsExtendedXmpSegment(const JpegSegment& segment) const; + + /// @return True if the segment is an Mpf segment. + bool IsMpfSegment(const JpegSegment& segment) const; + + /// @return True if the segment is an Exif segment. + bool IsExifSegment(const JpegSegment& segment) const; + + /// @return True if the segment is an Jfif segment. + bool IsJfifSegment(const JpegSegment& segment) const; + + /// Captures the segment bytes into the a JpegSegmentInfo's byte vector if + /// the SetCaptureSegmentBytes() has been called for the segment info type. + /// @param type The type of segment info being processed. + /// @param segment The segment being processed. + /// @param bytes A vector to hold the segment bytes. + void MaybeCaptureSegmentBytes(const std::string& type, + const JpegSegment& segment, + std::vector<Byte>* bytes) const; + + /// @return True if the segment's extended xmp guid matches the one from the + /// primary xmp segment. + bool HasMatchingExtendedXmpGuid(const JpegSegment& segment) const; + + /// @return True if the segment contains the given id. + bool HasId(const JpegSegment& segment, const char* id) const; + + /// Sets the primary segment guid value using properties in the given segment. + /// @param The segment from which to obtain the primary xmp guid value. + void SetPrimaryXmpGuid(const JpegSegment& segment); + + /// Sets the Xmp mime type using property values in the given segment. + /// @param The segment from which to obtain the mime property value. + /// @param xmp_info_type The type of xmp data that determines the mime + /// property name to look for. + void SetXmpMimeType(const JpegSegment& segment, + JpegXmpInfo::Type xmp_info_type); + + /// The limit on the number of images to process. After this many images have + /// been found, the Process() function will tell the JpegScanner to stop. + int image_limit_; + + /// The number of images encountered in the JPEG file so far. + int image_count_; + + /// The number of APP2/MPF segments encountered per image. One criterial used + /// to determine if Apple depth data is present is that the first image has + /// an APP2/MPF segment. + std::vector<int> image_mpf_count_; + + /// The number of APP1/XMP segments encountered per image. Another criteria + /// used to determine if Apple depth data is present is that the second or + /// following image contains one of these segments. + std::vector<int> image_xmp_apple_depth_count_; + + /// The number of APP1/XMP segments encountered per image. Another criteria + /// used to determine if Apple matte data is present is that the second or + /// following image contains one of these segments. + std::vector<int> image_xmp_apple_matte_count_; + + /// The DataRange of the most recent SOI type segment. This is used to compute + /// the range of the image that represents the Apple depth data. + DataRange most_recent_soi_marker_range_; + + /// The GUID value of the APP1/XMP segments that contain GDepth/GImage data. + std::string primary_xmp_guid_; + + /// Builder helpers for gdepth and gimage xmp type segments. + JpegXmpInfoBuilder gdepth_info_builder_; + JpegXmpInfoBuilder gimage_info_builder_; + + /// The collected data describing the type/location of data in the JPEG file. + JpegInfo jpeg_info_; + + /// The types of the segment info type to capture the bytes of. + std::set<std::string> capture_segment_bytes_types_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_INFO_BUILDER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_marker.h b/includes/image_io/jpeg/jpeg_marker.h new file mode 100644 index 0000000..507382d --- /dev/null +++ b/includes/image_io/jpeg/jpeg_marker.h @@ -0,0 +1,86 @@ +#ifndef IMAGE_IO_JPEG_JPEG_MARKER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_MARKER_H_ // NOLINT + +#include <bitset> +#include <string> + +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The size of the array that would be needed to reference all marker types. +const size_t kJpegMarkerArraySize = 256; + +/// A JpegMarker begins each JpegSegment in a JPEG file. The first byte of a +/// marker is 0xFF, and the second byte is the marker type value. Bytes with +/// values 0x00 and 0xFF indicate not a JpegMarker, but a zero byte or fill +/// byte, respectively. That is the sequence FF00 must be interpreted as a +/// single byte with a 0 value. The specification says that multiple fill bytes +/// may appear before a valid marker start: FFFFFFDA - the leading FFFF should +/// be ignored. +class JpegMarker { + public: + /// The length of the marker in the JPEG file. One byte for the 0xFF value, + /// and one byte for the marker type. + static const size_t kLength = 2; + + /// The offset from the start of the JpegMarker that contains the marker type. + static const size_t kTypeOffset = 1; + + /// The special byte value that may start a marker. + static const Byte kStart = 0xFF; + + /// Special marker type values referenced elsewhere in the code. + static const Byte kZERO = 0; + static const Byte kSOS = 0xDA; + static const Byte kSOI = 0xD8; + static const Byte kEOI = 0xD9; + static const Byte kAPP0 = 0xE0; + static const Byte kAPP1 = 0xE1; + static const Byte kAPP2 = 0xE2; + static const Byte kFILL = 0xFF; + + /// A set of bits, one for each type of marker. + using Flags = std::bitset<kJpegMarkerArraySize>; + + /// Creates a JpegMarker with the given type value. + explicit JpegMarker(Byte type) : type_(type) {} + + JpegMarker() = delete; + + /// Not all byte values are used to represent markers. Bytes with values 0x00 + /// and 0xFF indicate a zero byte or fill byte, respectively. + /// @return Whether this is a valid marker. + bool IsValid() const { return type_ != kZERO && type_ != kFILL; } + + /// @return The type of the marker. + Byte GetType() const { return type_; } + + /// @return The name of the marker type. + const std::string GetName() const; + + /// @param prefix A prefix for the returned string. + /// @return The <prefix>XX hex string representation of the type. + const std::string GetHexString(const std::string& prefix) const; + + /// Some markers have two extra bytes that indicate the size of the segment's + /// data payload. See https://www.w3.org/Graphics/JPEG/itu-t81.pdf, Table B-2. + /// @return Whether this marker type has such a variable length payload. + bool HasVariablePayloadSize() const; + + /// Some markers are delimiters in an otherwise continuous stream of bytes in + /// the JPEG file. See https://www.w3.org/Graphics/JPEG/itu-t81.pdf, Section + /// B.2.1. + /// @return Whether this is an entropy segment delimiter marker. + bool IsEntropySegmentDelimiter() const; + + private: + /// The type value of the marker. + Byte type_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_MARKER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_scanner.h b/includes/image_io/jpeg/jpeg_scanner.h new file mode 100644 index 0000000..0ab0488 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_scanner.h @@ -0,0 +1,100 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SCANNER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SCANNER_H_ // NOLINT + +#include <memory> + +#include "image_io/base/data_segment.h" +#include "image_io/base/data_source.h" +#include "image_io/jpeg/jpeg_marker.h" +#include "image_io/jpeg/jpeg_segment_processor.h" + +namespace photos_editing_formats { +namespace image_io { + +/// JpegScanner reads DataSegments from a DataSource, finds interesting +/// JpegSegments and passes them on to a JpegSegmentProcessor for further +/// examination. +class JpegScanner { + public: + JpegScanner() + : data_source_(nullptr), + segment_processor_(nullptr), + current_location_(0), + done_(false), + has_error_(false) {} + + /// Called to start and run the scanner. + /// @param data_source The DataSource from which to obtain DataSegments. + /// @param segment_processor The processor of the JpegSegment instances. + void Run(DataSource* data_source, JpegSegmentProcessor* segment_processor); + + /// If the JpegSegmentProcessor determines that it has seen enough JpegSegment + /// instances, it can call this function to terminate the scanner prematurely. + void SetDone() { done_ = true; } + + /// @return True if the done flag was set by SetDone(), else false. + bool IsDone() const { return done_; } + + /// @return True if the scanner encountered errors. + bool HasError() const { return has_error_; } + + /// @return The DataSource from which DataSegments are being read. + DataSource* GetDataSource() const { return data_source_; } + + /// JpegSegmentProcessor instances can call this function to inform the + /// scanner about the types of JpegSegment instances it is interested in. + /// The JpegScanner will not send any uninteresting segments to the processor. + void UpdateInterestingMarkerFlags(const JpegMarker::Flags& marker_flags) { + interesting_marker_flags_ = marker_flags; + } + + private: + /// Called from the Run() function to do the heavy lifting. + void FindAndProcessSegments(); + + /// @param marker The marker of the JpegSegment under construction. + /// @param begin_location The start of the JpegSegment under construction. + /// @return The size of the segment payload of given marker type that starts + /// at the specified location. + size_t GetPayloadSize(const JpegMarker& marker, size_t begin_location); + + /// @return The validated byte value at the given location. + ValidatedByte GetValidatedByte(size_t location); + + /// Calls GetValidatedByte() and returns its value if the byte is valid, else + /// sets the get_byte_error_ flag. + /// @return the byte value at the given location, or 0 if the byte is invalid. + Byte GetByte(size_t location); + + /// Asks the DataSource for the next DataSegment. + void GetNextSegment(); + + private: + /// The DataSource from which DataSegments are obtained. + DataSource* data_source_; + + /// The JpegSegmentProcessor to which JpegSegments are sent. + JpegSegmentProcessor* segment_processor_; + + /// The JpegSegment types of interest to the JpegSegmentProcessor. + JpegMarker::Flags interesting_marker_flags_; + + /// Depending on the DataSource, a given JpegSegment may span up to two + /// DataSegments. These are they. + std::shared_ptr<DataSegment> current_segment_; + std::shared_ptr<DataSegment> next_segment_; + + /// The current location of the scanner in the DataSource. + size_t current_location_; + + /// A flag that indicates the scanner is done, naturally or prematurely. + bool done_; + + /// A flag that indicates an error occurred while getting Byte data. + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SCANNER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment.h b/includes/image_io/jpeg/jpeg_segment.h new file mode 100644 index 0000000..c4a79fd --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment.h @@ -0,0 +1,178 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_H_ // NOLINT + +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" +#include "image_io/jpeg/jpeg_marker.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A JpegSegment is an entity in a JPEG file that starts with a JpegMarker and +/// is followed by zero or more payload bytes. The JpegSegment has a DataRange +/// that indicates the position of the segment in the originating DataSource. +/// A JpegScanner obtains DataSegment instances from a DataSource in such a way +/// that it can guarantee that a JpegSegment will span at most two DataSegment +/// instances. Clients of JpegSegment need not be concerned with the number of +/// underlying DataSegments if they use the member functions defined here to +/// access the segment's bytes. +class JpegSegment { + public: + /// If a JpegSegment has a variable length data payload, the payload data is + /// located at this offset from the start of the payload. + static constexpr size_t kVariablePayloadDataOffset = 2; + + /// Constructs a JpegSegment starting and ending at the indicated points in + /// the given DataSegment instances, the second of which may be null. + /// @param begin The start of JpegSegment range. + /// @param end The end of JpegSegment range. + /// @param begin_segment The DataSegment that contains the begin location of + /// the JpegSegment and the end if the end_segment is null. + /// @param end_segment The DataSegment that contains the end location of the + /// JpegSegment if it is not null. + JpegSegment(size_t begin, size_t end, const DataSegment* begin_segment, + const DataSegment* end_segment) + : data_range_(begin, end), + begin_segment_(begin_segment), + end_segment_(end_segment){} + ~JpegSegment() = default; + + /// @return The DataRange of the data in the segment. + const DataRange& GetDataRange() const { return data_range_; } + + /// @return The begin location of the segment's data range. + size_t GetBegin() const { return data_range_.GetBegin(); } + + /// @return The end location of the segment's data range. + size_t GetEnd() const { return data_range_.GetEnd(); } + + /// @return The length of the segment's data range. + size_t GetLength() const { return data_range_.GetLength(); } + + /// @return True if the segment's range contains the location, else false. + bool Contains(size_t location) const { + return data_range_.Contains(location); + } + + /// @return The location of the segment's JpegMarker. + size_t GetMarkerLocation() const { return GetBegin(); } + + /// @return The location of the segment's payload, which includes the payload + /// length if applicable for the type of segment. + size_t GetPayloadLocation() const { return GetBegin() + JpegMarker::kLength; } + + /// @return The location of the segment's payload's data. + size_t GetPayloadDataLocation() const { + return GetMarker().HasVariablePayloadSize() + ? GetPayloadLocation() + kVariablePayloadDataOffset + : GetPayloadLocation(); + } + + /// @param The location at which to obtain the byte value. + /// @return The validated byte value at the location, or 0/false if the + /// segment's range does not contain the location. + ValidatedByte GetValidatedByte(size_t location) const { + return DataSegment::GetValidatedByte(location, begin_segment_, + end_segment_); + } + + /// @return The payload size or zero if the segment's marker indicates the + /// segment does not have a payload. The payload size includes the two + /// bytes that encode the length of the payload. I.e., the payload data + /// size is two less than the value returned by this function. + size_t GetVariablePayloadSize() const; + + /// @param location The start location of the compare operation. + /// @param str The string to compare the bytes with. + /// @return True if the segment's bytes at the given location equals the str. + bool BytesAtLocationStartWith(size_t location, const char* str) const; + + /// @param location The start location of the search operation. + /// @param str The string to search for. + /// @return True if the segment's contains the string, starting at location. + bool BytesAtLocationContain(size_t location, const char* str) const; + + /// @param start_location The location at which to start the search. + /// @param value The byte value to search for. + /// @return The location in the segment's bytes of the next occurrence of the + /// given byte value, starting at the indicated location, or the segment's + /// range's GetEnd() location if not found. + size_t Find(size_t start_location, Byte value) const; + + /// @param start_location The location at which to start the search. + /// @param str The string to search for. + /// @return the location in the segment's bytes of the next occurrence of the + /// given string value, starting at the indicated location, or the + /// segment's range's GetEnd() location if not found. + size_t Find(size_t location, const char* str) const; + + /// XMP property names have the syntax property_name="property_value". + /// @param segment The segment in which to look for the property name/value. + /// @param start_location Where to start looking for the property name. + /// @param property_name The name of the property to look for. + /// @return The string value associated with the xmp property name, or an + /// empty string if the property was not found. + std::string ExtractXmpPropertyValue(size_t start_location, + const char* property_name) const; + + /// XMP property names have the syntax property_name="property_value". + /// @start_location The location in the segment to begin looking for the + /// property_name=" syntax. + /// @return The location of the next byte following the quote, or GetEnd() if + /// the property_name=" syntax was not found. + size_t FindXmpPropertyValueBegin(size_t start_location, + const char* property_name) const; + + /// XMP property names have the syntax property_name="property_value". + /// @start_location The location in the segment to begin looking for the final + /// quote of the property value. + /// @return The location of quote that terminates the property_value, or + /// GetEnd() if the final quote was not found. + size_t FindXmpPropertyValueEnd(size_t start_location) const; + + /// @param The DataRange to use to extract a string from the segment's bytes. + /// @return The string extracted from the segment at locations indicated by + /// the data_range, or an empty string if the data_range is not contained + /// in the segment's range, or any invalid or zero bytes are encountered. + std::string ExtractString(const DataRange& data_range) const; + + /// @return the JpegMarker of this segment. + JpegMarker GetMarker() const { + size_t marker_type_location = GetMarkerLocation() + 1; + // An invalid ValidatedByte has a value of 0, and a JpegMarker with a 0 + // type value is invalid, so its ok to just grab the ValidatedByte's value. + return JpegMarker(GetValidatedByte(marker_type_location).value); + } + + /// Fills two strings with byte_count bytes from the start of the segment's + /// payload in a form suitable for creating a "hex dump" of the segment. Note + /// that if the jpeg segment has a entropy delimiter type marker, there is + /// technically no payload to dump. However in this case, as long as a valid + /// byte can be obtained from the jpeg segment's underlying data segments, a + /// byte value will be dumped to the strings. + /// @param byte_count The number of bytes to dump from the segment's payload. + /// @param hex_string A string that will be at most 2 * byte_count in length + /// that will contain the hex values of the bytes. + /// @param ascii_string A string that will be at most byte_count in length + /// that will contain the printable character of the bytes, or a '.' for + /// non-printable byte values. + void GetPayloadHexDumpStrings(size_t byte_count, std::string* hex_string, + std::string* ascii_string) const; + + private: + /// The DataRange of the JpegSegment. + DataRange data_range_; + + /// The DataSegment that contains the begin of the range and possibly the + /// end. This DataSegment will never be null. + const DataSegment* begin_segment_; + + /// The DataSegment, that if not null, will contain the end location of the + /// JPegSegment's DataRange. + const DataSegment* end_segment_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment_builder.h b/includes/image_io/jpeg/jpeg_segment_builder.h new file mode 100644 index 0000000..e8e714f --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment_builder.h @@ -0,0 +1,140 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_BUILDER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_BUILDER_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/base/byte_buffer.h" +#include "image_io/jpeg/jpeg_xmp_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A helper to assemble the data in a JpegSegment. Currently this is only used +/// for testing purposes, but in the future may prove useful in the image_io +/// library itself. +class JpegSegmentBuilder { + public: + /// Sets the payload size value of the JpegSegment data in the byte buffer. + /// This function assumes that the byte buffer contains the data for exactly + /// one JpegSegment, and that the segment type has a variable payload size. + /// The byte buffer must have a size in the range [4:65535] for this to work. + /// @param byte_buffer The data defining the JpegSegment. + /// @return Whether the byte buffer's size was valid and the payload size set. + static bool SetPayloadSize(ByteBuffer* byte_buffer); + + /// @return The vector of ByteData. + const std::vector<ByteData>& GetByteData() const { return byte_data_; } + + /// @return The concatenated string values of all byte data, or an empty + /// string if there are invalid byte data entries. Note that the string + /// may have embedded null characters if there are any kAscii0 type + /// byte data elements present. + std::string GetByteDataValues() const; + + /// Adds the byte data to the vector. + /// @param byte_data The byte data to add. + void AddByteData(const ByteData& byte_data) { + byte_data_.push_back(byte_data); + } + + /// Adds a segment marker of the given type and payload size. + /// @param marker_type The type of segment marker to add. + /// @param size The size of the payload if the marker has a variable + /// size payload. This value must be in the range [2:65535], although no + /// check is performed to ensure that is the case. + void AddMarkerAndSize(Byte marker_type, size_t size); + + /// Adds a segment marker of the given type, and "0000" placeholder value if + /// the type has a variable payload size. The SetSizePlaceholder() function + /// can be called later to set the actual size of the segment. + /// @param marker_type The type of segment marker to add. + /// @return The index in the vector of ByteData where the marker was added. + size_t AddMarkerAndSizePlaceholder(Byte marker_type); + + /// Replacess the size of the segment marker that was previously added using + /// the AddMarkerAndSizePlaceholder() function. The first two bytes of the + /// ByteData at the given index must represent a valid JpegMarker that has + /// a variable length payload size. + /// @param index The index in the vector of ByteData set the size of. + /// @param size The size of the segment, including the size field itself. + /// This value must be in the range [2:65535]. + /// @return Whether the size was set successfully. + bool ReplaceSizePlaceholder(size_t index, size_t size); + + /// Adds the bytes that define an XMP header. + /// @param xmp_guid The guid value of the XMP data. If this value is not 32 + /// bytes long, it is either truncated or extended with 0s. + void AddExtendedXmpHeader(const std::string& xmp_guid); + + /// Adds the XMP syntax that appears at the start of an XMP segment. This + /// syntax appears after the XMP header in a segment, so this function should + /// be called after the AddExtendedXmpHeader() function. + void AddXmpMetaPrefix(); + + /// Adds the XMP syntax that appears at the end of an XMP segment. This syntax + /// finishes the XMP data, so it should be the last function called when + /// assembling the data for such a segment. + void AddXmpMetaSuffix(); + + /// Adds the RDF prefix that appears within the body of an XMP segment. This + /// syntax should be added before any XMP property names and values are added. + void AddRdfPrefix(); + + /// Adds the RDF suffix that appears within the body of an XMP segment. This + /// syntax should be added after all XMP property names and values are added. + void AddRdfSuffix(); + + /// Adds the RDF:Description prefix that appears within the body of an XMP + /// segment. This syntax should be added after the RDF prefix is added, but + /// before any XMP property names and values are added. + void AddRdfDescriptionPrefix(); + + /// Adds the RDF:Description suffix that appears within the body of an XMP + /// segment. This syntax should be added after after all XMP property names + /// and values are added, but before the RDF syntax is added. + void AddRdfDescriptionSuffix(); + + /// Adds the property name, and the '="' string that defines + /// the start of the name="value" string. After this call, you can + /// add the property value to the byte data vector, and then call the + /// AddXmpPropertySuffix() function to finish the definition. + /// @param property_name The name of the property to add. + void AddXmpPropertyPrefix(const std::string& property_name); + + /// Adds a final quote to finish off the definition of a name="value" string. + void AddXmpPropertySuffix(); + + /// Adds the name="value" strings to define the XMP property name and value. + /// @param property_name The name of the property to add. + /// @param property_value The value of the property to add. + void AddXmpPropertyNameAndValue(const std::string& property_name, + const std::string& property_value); + + /// Adds segment marker and the extended XMP header for an APP1/XMP type + /// segment that as extended XMP data. After this call you can either all the + /// AddXmpAndRdfPrefixes() function (if this is the first extended segment, or + /// just continue adding the property value contained in this segment. + /// @param xmp_guid The guid value of the XMP data. If this value is not 32 + /// bytes long, it is either truncated or extended with 0s. + void AddApp1XmpMarkerAndXmpExtendedHeader(const std::string& xmp_guid); + + /// Adds segment marker and all the prefixes to start the xmpmeta/rdf section + /// of the segment. After this call property names and values can be added, + /// and optionally the section can be completed by calling the + /// AddXmpAndRdfSuffixes() function. + void AddXmpAndRdfPrefixes(); + + /// Adds the suffixes to complete the definition of an APP1/XMP segment. Call + /// this function after the AddApp1XmpPrefixes() and after adding property + /// names and values to the byte data. + void AddXmpAndRdfSuffixes(); + + private: + std::vector<ByteData> byte_data_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_BUILDER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment_info.h b/includes/image_io/jpeg/jpeg_segment_info.h new file mode 100644 index 0000000..6fb25db --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment_info.h @@ -0,0 +1,85 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_INFO_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_INFO_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/base/data_range.h" +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// Interesting segment types. +const char kExif[] = "Exif"; +const char kJfif[] = "JFIF"; +const char kMpf[] = "MPF"; + +/// A class that holds interesting information about a JpegSegment. +class JpegSegmentInfo { + public: + /// @param image_index The index of the image in a @c DataSource that contains + /// the segment. + /// @param data_range The range in the segment in the @c DataSource. + /// @param type The type of segment. + JpegSegmentInfo(size_t image_index, const DataRange& data_range, + const std::string& type) + : image_index_(image_index), data_range_(data_range), type_(type) {} + + /// Constructs an empty, invalid segment info. + JpegSegmentInfo() : image_index_(0) {} + + JpegSegmentInfo(const JpegSegmentInfo&) = default; + JpegSegmentInfo& operator=(const JpegSegmentInfo&) = default; + + /// @param rhs The segment info to compare with this one. + /// @return Whether the segment infos are equal + bool operator==(const JpegSegmentInfo& rhs) const { + return image_index_ == rhs.image_index_ && data_range_ == rhs.data_range_ && + type_ == rhs.type_ && bytes_ == rhs.bytes_; + } + + /// @param rhs The segment info to compare with this one. + /// @return Whether the segment infos are not equal + bool operator!=(const JpegSegmentInfo& rhs) const { + return !(*this == rhs); + } + + /// @return Whether the segment info is valid. + bool IsValid() const { return !type_.empty() && data_range_.IsValid(); } + + /// @return The image index of the segment info. + size_t GetImageIndex() const { return image_index_; } + + /// @return The data range of the segment info. + const DataRange& GetDataRange() const { return data_range_; } + + /// @return The type of the segment info. + const std::string& GetType() const { return type_; } + + /// @return The (optional) bytes of the segment to which the info refers. The + /// vector will be empty unless the GetMutableBytes() function has been + /// and the vector filled with the segment contents. + const std::vector<Byte>& GetBytes() const { return bytes_; } + + /// @return A non-const pointer to the bytes vector. + std::vector<Byte>* GetMutableBytes() { return &bytes_; } + + private: + // The image index where the segment is located. + size_t image_index_; + + // The data range of the segment. + DataRange data_range_; + + // The type of segment. + std::string type_; + + // The (optional) bytes of the segment. + std::vector<Byte> bytes_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_INFO_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment_lister.h b/includes/image_io/jpeg/jpeg_segment_lister.h new file mode 100644 index 0000000..ca2a19d --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment_lister.h @@ -0,0 +1,35 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_LISTER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_LISTER_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/jpeg/jpeg_segment_processor.h" + +namespace photos_editing_formats { +namespace image_io { + +/// JpegSegmentLister is an implementation of JpegSegmentProcesor that creates +/// a listing (in the form of a vector of strings) describing the segments. +class JpegSegmentLister : public JpegSegmentProcessor { + public: + JpegSegmentLister(); + void Start(JpegScanner* scanner) override; + void Process(JpegScanner* scanner, const JpegSegment& segment) override; + void Finish(JpegScanner* scanner) override; + + /// @return The lines representing the listing of the segments. + const std::vector<std::string>& GetLines() const { return lines_; } + + private: + /// The number of occurences of the various segment types. + std::vector<int> marker_type_counts_; + + /// The lines representing the listing output. + std::vector<std::string> lines_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_LISTER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment_processor.h b/includes/image_io/jpeg/jpeg_segment_processor.h new file mode 100644 index 0000000..a193797 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment_processor.h @@ -0,0 +1,44 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_PROCESSOR_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_PROCESSOR_H_ // NOLINT + +#include "image_io/jpeg/jpeg_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +class JpegScanner; + +/// JpegSegmentProcessor is the abstract base class for implementations that do +/// something with the JPEG segments that the JpegScanner identifies. +class JpegSegmentProcessor { + public: + virtual ~JpegSegmentProcessor() = default; + + /// This function is called at the start of the JPegScanner::Run() function to + /// allow this JpegProcessor to initialize its data structures. It can also + /// inform the JpegScanner about preferences for the types of segments it is + /// interested in by calling the JpegScanner::UpdateInterestingMarkerFlags() + /// function. + /// @param scanner The scanner that is starting the JpegProcessor. + virtual void Start(JpegScanner* scanner) = 0; + + /// This function is called repeatedly by the JpegScanner as it identifies + /// segments in the JPEG file. The JpegProcessor can access the data in the + /// segment to do interesting things, or can update the scanner's preferences + /// like in the Start() function. + /// @param scanner The scanner that is providing the segment to the processor. + /// @param segment The segment provided by the scanner to the processor. + virtual void Process(JpegScanner* scanner, const JpegSegment& segment) = 0; + + /// This function is called after the JpegScanner has provided all the + /// segments to the JpegProcessor to allow the processor to finish its work + /// processing the segments. + /// @param scanner The scanner that is informing the processor that it is done + /// finding segments. + virtual void Finish(JpegScanner* scanner) = 0; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_PROCESSOR_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_xmp_data_extractor.h b/includes/image_io/jpeg/jpeg_xmp_data_extractor.h new file mode 100644 index 0000000..30d62a1 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_xmp_data_extractor.h @@ -0,0 +1,66 @@ +#ifndef IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/jpeg/jpeg_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class that can make use of the data in a JpegInfo instance to extract +/// the xmp data JpegSegments passed to it and forward it to a DataDestination. +class JpegXmpDataExtractor : public DataDestination { + public: + /// @param xmp_info_type The type of xmp data being extracted. + /// @param segment_count The number of segment ranges over which the xmp + /// data is spread. + /// @param data_destination The destination to which the extracted xmp data + /// is to be sent. + JpegXmpDataExtractor(JpegXmpInfo::Type xmp_info_type, size_t segment_count, + DataDestination* data_destination) + : xmp_info_type_(xmp_info_type), + last_segment_index_(segment_count - 1), + data_destination_(data_destination), + has_error_(false) {} + + /// Set the current segment index to the given value. + /// @param segment_index The index of the segment currently being processed. + void SetSegmentIndex(size_t segment_index) { segment_index_ = segment_index; } + + /// @return True if there was an error in the extraction process. + bool HasError() const { return has_error_; } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + /// @return The number of bytes written not to this extractor destination, but + /// to the next destination. Returns zero if the next destination is null. + size_t GetBytesTransferred() const override { + return data_destination_ ? data_destination_->GetBytesTransferred() : 0; + } + + private: + /// The type of xmp data being extracted. + JpegXmpInfo::Type xmp_info_type_; + + /// The xmp data require special processing when the last segment is being + /// transferred. This value is the index of the last segment. + size_t last_segment_index_; + + /// The DataDestination that the extracted xmp data is sent to. + DataDestination* data_destination_; + + /// The xmp data is spread over one or more segments in the DataSource. This + /// index tracks which one is being transferred. + size_t segment_index_; + + /// A true value indicates that an error occurred in the decoding process. + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_xmp_info.h b/includes/image_io/jpeg/jpeg_xmp_info.h new file mode 100644 index 0000000..2bda3f5 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_xmp_info.h @@ -0,0 +1,92 @@ +#ifndef IMAGE_IO_JPEG_JPEG_XMP_INFO_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_XMP_INFO_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/base/data_range.h" + +namespace photos_editing_formats { +namespace image_io { + +const size_t kXmpGuidSize = 32; +const char kXmpId[] = "http://ns.adobe.com/xap/1.0/"; +const char kXmpExtendedId[] = "http://ns.adobe.com/xmp/extension/"; +const size_t kXmpExtendedHeaderSize = + sizeof(kXmpExtendedId) + kXmpGuidSize + 2 * sizeof(std::uint32_t); + +/// Constants used to find and process information in APP1/XMP type segments. +const char kXmpAppleDepthId[] = "http://ns.apple.com/depthData/1.0"; +const char kXmpAppleMatteId[] = "http://ns.apple.com/portraitEffectsMatte/1.0/"; +const char kXmpGDepthV1Id[] = "http://ns.google.com/photos/1.0/depthmap/"; +const char kXmpGImageV1Id[] = "http://ns.google.com/photos/1.0/image/"; +const char kXmpHasExtendedId[] = "xmpNote:HasExtendedXMP"; + +/// JpegXmpInfo maintains information about the data in an Xmp property, such as +/// are used to store the GDepth and GImage data. +class JpegXmpInfo { + public: + /// The possible types of Xmp information. + enum Type { + /// GDepth:Data type information. + kGDepthInfoType, + + /// GImage:Data type information. + kGImageInfoType, + }; + + /// Initializes a vector of JpegXmpinfo instances, indexed by their type. + /// @param xmp_info_vector The vector to initialize. + static void InitializeVector(std::vector<JpegXmpInfo>* xmp_info_vector); + + /// @param xmp_info_type The type to get the identifier of. + /// @return The identfier that appears at the start of the Xmp segment. + static std::string GetIdentifier(Type jpeg_xmp_info_type); + + /// @param xmp_info_type The type to get the data property name of. + /// @return The name of the data property that appears in the Xmp segment. + static std::string GetDataPropertyName(Type jpeg_xmp_info_type); + + /// @param xmp_info_type The type to get the mime property name of. + /// @return The name of the mime property that appears in the primary + /// Xmp segment. + static std::string GetMimePropertyName(Type jpeg_xmp_info_type); + + explicit JpegXmpInfo(Type type) : type_(type) {} + JpegXmpInfo(const JpegXmpInfo&) = default; + JpegXmpInfo& operator=(const JpegXmpInfo&) = default; + + /// @return The type of the Xmp property information. + Type GetType() const { return type_; } + + /// @return The mime type of the Xmp data. + std::string GetMimeType() const { return mime_type_; } + + /// @param mime_type The mime type to assign to this instance. + void SetMimeType(const std::string& mime_type) { mime_type_ = mime_type; } + + /// @return The segment's data ranges where this Xmp data occurs. + const std::vector<DataRange>& GetSegmentDataRanges() const { + return segment_data_ranges_; + } + + /// @param The segment data ranges to assign to this instance. + void SetSegmentDataRanges(const std::vector<DataRange>& segment_data_ranges) { + segment_data_ranges_ = segment_data_ranges; + } + + private: + /// The type of the Xmp information. + Type type_; + + /// The mime type of the Xmp data. + std::string mime_type_; + + /// The segment data ranges that contain the Xmp data. + std::vector<DataRange> segment_data_ranges_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_XMP_INFO_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_xmp_info_builder.h b/includes/image_io/jpeg/jpeg_xmp_info_builder.h new file mode 100644 index 0000000..62b3ac4 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_xmp_info_builder.h @@ -0,0 +1,42 @@ +#ifndef IMAGE_IO_JPEG_JPEG_XMP_INFO_BUILDER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_XMP_INFO_BUILDER_H_ // NOLINT + +#include <vector> + +#include "image_io/jpeg/jpeg_segment.h" +#include "image_io/jpeg/jpeg_xmp_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A helper class for building information about the segments that contain +/// extended xmp data of various types. +class JpegXmpInfoBuilder { + public: + /// @param xmp_info_type The type of xmp information to build. + explicit JpegXmpInfoBuilder(JpegXmpInfo::Type xmp_info_type) + : xmp_info_type_(xmp_info_type) {} + + /// @param segment The segment to examine for xmp data. + void ProcessSegment(const JpegSegment& segment); + + /// @return The vector of segment data ranges that contains xmp property data. + const std::vector<DataRange>& GetPropertySegmentRanges() const { + return property_segment_ranges_; + } + + private: + /// The type of xmp data to collect. + JpegXmpInfo::Type xmp_info_type_; + + /// The vector of segment data ranges that contains xmp property data. + std::vector<DataRange> property_segment_ranges_; + + /// The segment data range that contains the xmp property data end. + DataRange property_end_segment_range_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_XMP_INFO_BUILDER_H_ // NOLINT diff --git a/includes/image_io/tools/image_tool_function.h b/includes/image_io/tools/image_tool_function.h new file mode 100644 index 0000000..f730359 --- /dev/null +++ b/includes/image_io/tools/image_tool_function.h @@ -0,0 +1,28 @@ +#ifndef IMAGE_IO_TOOLS_IMAGE_TOOL_FUNCTION_H_ // NOLINT +#define IMAGE_IO_TOOLS_IMAGE_TOOL_FUNCTION_H_ // NOLINT + +#include <functional> +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +/// All output of the ImageTool() function and the underlying image_io functions +/// are sent to this type of function that is passed to ImageTool(). Client code +/// can use a function that writes the line to stdout or to a log file. The +/// str parameter may have embedded new line characters in it. The function +/// should not write its own new line at the end of the str. +using ImageToolOutputter = std::function<void(const std::string& str)>; + +/// The ImageTool entry point, easily callable from a main() type function. +/// @param argc The number of strings in the argv array. +/// @param argv The options and values used in the command line. +/// @param outputter A function to output the strings produced by ImageTool(). +/// @return A zero value for successful, non-zero for an error. +int ImageTool(int argc, const char* argv[], + const ImageToolOutputter& outputter); + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_TOOLS_IMAGE_TOOL_FUNCTION_H_ // NOLINT diff --git a/includes/image_io/utils/file_utils.h b/includes/image_io/utils/file_utils.h new file mode 100644 index 0000000..d1a469d --- /dev/null +++ b/includes/image_io/utils/file_utils.h @@ -0,0 +1,41 @@ +#ifndef IMAGE_IO_UTILS_FILE_UTILS_H_ // NOLINT +#define IMAGE_IO_UTILS_FILE_UTILS_H_ // NOLINT + +#include <iostream> +#include <memory> +#include <string> + +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A policy that controls whether an error is reported or not. +enum class ReportErrorPolicy { kDontReportError, kReportError }; + +/// @param file_name The name of the file to get the size in bytes of. +/// @param size A pointer to a variable to receive the size. +/// @return Whether file size was obtained properly. +bool GetFileSize(const std::string& file_name, size_t* size); + +/// @param file_name The name of the file to open for output. +/// @return An ostream pointer or nullptr if the open failed. +std::unique_ptr<std::ostream> OpenOutputFile( + const std::string& file_name, ReportErrorPolicy report_error_policy); + +/// @param file_name The name of the file to open for input. +/// @return An istream pointer or nullptr if the open failed. +std::unique_ptr<std::istream> OpenInputFile( + const std::string& file_name, ReportErrorPolicy report_error_policy); + +/// Opens the named file for input, gets its size, and reads the entire contents +/// into a data segment that is returned to the caller. +/// @param file_name The name of the file to open for input. +/// @return A DataSegment pointer or nullptr if the open and reading failed. +std::shared_ptr<DataSegment> ReadEntireFile( + const std::string& file_name, ReportErrorPolicy report_error_policy); + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_UTILS_FILE_UTILS_H_ // NOLINT diff --git a/includes/image_io/xml/xml_action.h b/includes/image_io/xml/xml_action.h new file mode 100644 index 0000000..dce6c7d --- /dev/null +++ b/includes/image_io/xml/xml_action.h @@ -0,0 +1,57 @@ +#ifndef IMAGE_IO_XML_XML_ACTION_H_ // NOLINT +#define IMAGE_IO_XML_XML_ACTION_H_ // NOLINT + +#include <functional> + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_handler_context.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlActionContext; +class XmlTerminal; + +/// The definition for an action function associated with an XmlTerminal. +/// If the action does not need to change the result of the terminal, it can +/// simply return the value from XmlActionContext::GetResult(). +using XmlAction = + std::function<DataMatchResult(const XmlActionContext& context)>; + +/// The data context passed from an XmlTerminal to its action function. +class XmlActionContext : public XmlHandlerContext { + public: + XmlActionContext(const XmlHandlerContext& context, XmlTerminal* terminal, + const DataMatchResult& result) + : XmlHandlerContext(context), terminal_(terminal), result_(result) {} + XmlActionContext(size_t location, const DataRange& range, + const DataSegment& segment, const DataLineMap& data_line_map, + XmlHandler* handler, XmlTerminal* terminal, + const DataMatchResult& result) + : XmlHandlerContext(location, range, segment, data_line_map, handler), + terminal_(terminal), + result_(result) {} + + /// @return The terminal associated with the context. + XmlTerminal* GetTerminal() const { return terminal_; } + + /// @return The result associated with the constext. + const DataMatchResult& GetResult() const { return result_; } + + /// @param bytes_consumed The value to set in the returned result. + /// @return A result based on the context's action, but with its bytes + /// consumed value set to the given value. + DataMatchResult GetResultWithBytesConsumed(size_t bytes_consumed) const { + auto result = result_; + return result.SetBytesConsumed(bytes_consumed); + } + + private: + XmlTerminal* terminal_; + DataMatchResult result_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ACTION_H_ // NOLINT diff --git a/includes/image_io/xml/xml_attribute_rule.h b/includes/image_io/xml/xml_attribute_rule.h new file mode 100644 index 0000000..564af07 --- /dev/null +++ b/includes/image_io/xml/xml_attribute_rule.h @@ -0,0 +1,33 @@ +#ifndef IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlAttributeRule parses the following syntax: +/// S? Name S? = S? 'Value' +/// S? Name S? = S? "Value" +class XmlAttributeRule : public XmlRule { + public: + XmlAttributeRule(); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's AttributeName() function. + /// @param context The action context from the name terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleName(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's AttributeValue() function. + /// @param context The action context from the quoted string terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleValue(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_cdata_and_comment_rules.h b/includes/image_io/xml/xml_cdata_and_comment_rules.h new file mode 100644 index 0000000..0cc2e50 --- /dev/null +++ b/includes/image_io/xml/xml_cdata_and_comment_rules.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT +#define IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlCdataRule parses the following syntax "<![CDATA[ ... ]]>". +/// As mentioned in the comments for the XmlHandler::Cdata() function, the token +/// value that is passed to the handler never includes the leading "<![CDATA[" +/// syntax and always includes the trailing "]]>" syntax. This considerably +/// simplifies the parsing task. The alternate start point constructor is used +/// by the XmlCdataOrCommentRule. +class XmlCdataRule : public XmlRule { + public: + XmlCdataRule(); + explicit XmlCdataRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Cdata() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleCdataValue(const XmlActionContext& context); +}; + +/// The XmlCommentRule parses the following syntax "<!-- ... -->". +/// As mentioned in the comments for the XmlHandler::Comment() function, the +/// token value that is passed to the handler never includes the leading "<!--" +/// syntax and always includes the trailing "-->" syntax. This considerably +/// simplifies the parsing task. The alternate start point constructor is used +/// by the XmlCdataOrCommentRule. +class XmlCommentRule : public XmlRule { + public: + XmlCommentRule(); + explicit XmlCommentRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Comment() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleCommentValue(const XmlActionContext& context); +}; + +/// This rule will use chain delegation to start either the XmlCdataRule or the +/// XmlCommentRule, depending on the text being parsed. The syntax for XML is +/// pretty poor here - the parser needs to look ahead two characters from the < +/// character to determine what to do. The alternate start point constructor is +/// used by the XmlElementContentRule. +class XmlCdataOrCommentRule : public XmlRule { + public: + XmlCdataOrCommentRule(); + explicit XmlCdataOrCommentRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and creates the + /// XmlCdataRule or XmlCommentRule to chain to depending on what character + /// follows the exclamation point of the "<!" syntax. + /// @param context The action context from the rule's terminal. + /// @return The result value from the action context. + DataMatchResult HandlePostBangChar(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT diff --git a/includes/image_io/xml/xml_element_rules.h b/includes/image_io/xml/xml_element_rules.h new file mode 100644 index 0000000..f40f370 --- /dev/null +++ b/includes/image_io/xml/xml_element_rules.h @@ -0,0 +1,92 @@ +#ifndef IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT +#define IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlElementRule parses the following syntax: +/// Element ::= EmptyElemTag | STag content ETag +/// EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' +/// STag ::= '<' Name (S Attribute)* S? '>' +/// ETag ::= '</' Name S? '>' +/// The Attribute syntax is parsed by XmlAttributeRule, which this rule +/// delegates to as a child rule. The EmptyElemTag type syntax is handled by +/// this rule. The STag part of the syntax is handled by this rule, but the +/// element contents and the ETag syntax is handled by the XmlElementContentRule +/// that is chained to by this rule. +class XmlElementRule : public XmlRule { + public: + XmlElementRule(); + explicit XmlElementRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's StartElement() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleName(const XmlActionContext& context); + + /// Handles the book keeping after parsing the whitespace following the name + /// of the element, basically looking ahead to see if an XmlAttributeRule has + /// to be delegated to as a child rule, or if the element ends. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandlePostWhitespaceChar(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's FinishElement() function in response to the final literal in + /// the EmptyElemTag type sytax. As written in the comment for the XmlHandler + /// FinishElement() function, the token context passed to the handler in this + /// case will have an invalid range and a XmlPortion value of kNone - i.e., + /// the element name is not available tfor this form of the element syntax. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleEmptyElemTagEnd(const XmlActionContext& context); + + /// Handles the book keeping after parsing the final ">" literal of the STag + /// syntax of the rule, creating an XmlElementContentRule for use as a chained + /// to rule. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandleSTagEnd(const XmlActionContext& context); +}; + +/// The XmlElementContentRule parses the following syntax: +/// (c? Element | PI | CDATA | Comment )+ ETag +/// The "c?" syntax represents the character data passed to the XmlHandler's +/// ElementContent() function. The syntax for Element, PI, CDATA and Comment +/// all cause a child rule to be created and delegated to. The ETag syntax will +/// cause this element to be finished with a DataMatchResult type of kFull. +class XmlElementContentRule : public XmlRule { + public: + XmlElementContentRule(); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's ElementContent() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleContent(const XmlActionContext& context); + + /// Handles the book keeping after parsing the element's content characters, + /// and the first character literal ("<") of the Element, PI, CDATA or Comment + /// syntax, creating an appropriate child rule to delegate the processing to. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandlePostOpenChar(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's FinishElement() function. No check is done by the rule to verify + /// that the element name matches the one that was passed to the handler's + /// StartElement. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleEndTag(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT diff --git a/includes/image_io/xml/xml_handler.h b/includes/image_io/xml/xml_handler.h new file mode 100644 index 0000000..cdf3d30 --- /dev/null +++ b/includes/image_io/xml/xml_handler.h @@ -0,0 +1,107 @@ +#ifndef IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT +#define IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The handler that is called by XmlRule instances as they parse XML syntax +/// and produce tokens defined in the XmlTokenContext. Each handler function +/// may be called multiple times with different XmlPortion values. The first +/// time the XmlPortion::kBegin bit will be set. The last time, XmlPortion::kEnd +/// will be set. In between, XmlPortion::kMiddle will be set. If the entire +/// token value is available for the handler, all three bits will be set. +/// The implementation of each function in this base class returns the +/// DataMatchResult value that the context provides. The function overrides in +/// subclasses can return the same context value, or a copy that is modified +/// with a different result type, message and "can continue" flag. +class XmlHandler { + public: + virtual ~XmlHandler() = default; + + /// This function is called to start an XML element. Once started, any of + /// the other handler functions may be called. + /// @param context The token context used to specify the element name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult StartElement(const XmlTokenContext& context); + + /// This function is called to finish an XML element. Each call to this + /// function should be paired with a call to a StartElement function. + /// @param context The token context used to obtain the match result for + /// returning. For this function, the context might not have a valid token + /// value: the XmlPortion will always be kNone and the token range invalid. + /// This is the case if the syntax parsed is an empty element like this: + /// "<SomeElement [Attribute=Name]... />". For non empty elements with syntax: + /// "<SomeElement>...</SomeElement>", the value will be the element name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult FinishElement(const XmlTokenContext& context); + + /// This function is called to define an attribute name. This function will + /// never be called unless an element has been started with a prior call to + /// the StartElement() function. + /// @param context The token context used to specify the attribute name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult AttributeName(const XmlTokenContext& context); + + /// This function is called to define an attribute value. The token value + /// passed to this function always includes the quote marks at the begin and + /// end of the token value. The quote marks always match and may be either a + /// single quote (') or a double quote ("). Sometimes attribute values can be + /// very long, so implementations of this function should use care if they + /// retain the value as a string for later processing. This function will + /// never be called unless an element has been started with a prior call to + /// the StartElement() and AttributeName() functions. + /// @param context The token context used to specify the attribute value. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult AttributeValue(const XmlTokenContext& context); + + /// This function is called to define a block of characters in the body of + /// an element. This function may be called multiple times for a given + /// element. Handlers that are interested in the character content for an + /// element should concatenate the token values from all calls to obtain the + /// full value for the element. + /// @param context The token context used to specify the content value. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult ElementContent(const XmlTokenContext& context); + + /// This function is called to inform the handler of a comment. A comment in + /// XML has the syntax "<!--...-->". In order to simplify the XML parsing + /// task, the tokens passed to this function never include the leading "<!--" + /// characters, but always include the trailing "-->". + /// @param context The token context used to specify the comment. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Comment(const XmlTokenContext& context); + + /// This function is called to inform the handler CDATA block. A CDATA block + /// in XML has the syntax "<![CDATA[...]]>". In order to simplify the XML + /// parsing task, the tokens passed to this function never include the leading + /// "<![CDATA[" characters, but always include the trailing "]]". + /// @param context The token context used to specify the CDATA block. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Cdata(const XmlTokenContext& context); + + /// This function is called to define a processing instruction. Processing + /// instructions have an XML syntax "<?...?>". In order to simplify the XML + /// parsing task, no parsing of the processing instruction is done: handlers + /// that need the contents parsed are on their own. Also, again to simplify + /// the XML parsing task, the tokens passed to this function never include the + /// leading "<?" characters, but always include the trailing "?>". + /// @param context The token context used to specify the processing data. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Pi(const XmlTokenContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT diff --git a/includes/image_io/xml/xml_handler_context.h b/includes/image_io/xml/xml_handler_context.h new file mode 100644 index 0000000..5595118 --- /dev/null +++ b/includes/image_io/xml/xml_handler_context.h @@ -0,0 +1,31 @@ +#ifndef IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT +#define IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT + +#include "image_io/base/data_context.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlHandler; + +class XmlHandlerContext : public DataContext { + public: + XmlHandlerContext(const DataContext& context, XmlHandler* handler) + : DataContext(context), handler_(handler) {} + + XmlHandlerContext(size_t location, const DataRange& range, + const DataSegment& segment, + const DataLineMap& data_line_map, XmlHandler* handler) + : DataContext(location, range, segment, data_line_map), + handler_(handler) {} + + XmlHandler* GetHandler() const { return handler_; } + + private: + XmlHandler* handler_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT diff --git a/includes/image_io/xml/xml_pi_rule.h b/includes/image_io/xml/xml_pi_rule.h new file mode 100644 index 0000000..674a3fa --- /dev/null +++ b/includes/image_io/xml/xml_pi_rule.h @@ -0,0 +1,32 @@ +#ifndef IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlPiRule parses the processing information syntax: "<?...?>". This +/// syntax is considerably simplified from the official XML specification. As +/// documented in the comments for the XmlHandler Pi() function, The leading +/// "<?" syntax is never sent to the handler, while the trailing "?>" literal +/// is always sent as part of the processing content token. This approach makes +/// it much easier to parse XML syntax. The alternate start point constructor +/// is used by the XmlElementContentRule. +class XmlPiRule : public XmlRule { + public: + XmlPiRule(); + explicit XmlPiRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Pi() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandlePiValue(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_portion.h b/includes/image_io/xml/xml_portion.h new file mode 100644 index 0000000..673c958 --- /dev/null +++ b/includes/image_io/xml/xml_portion.h @@ -0,0 +1,48 @@ +#ifndef IMAGE_IO_XML_XML_PORTION_H_ // NOLINT +#define IMAGE_IO_XML_XML_PORTION_H_ // NOLINT + +namespace photos_editing_formats { +namespace image_io { + +/// An bit-type enum for indicating what part of an entity is defined: the +/// begin, middle and or end. Bitwise "and" and "or" operators are defined to +/// combine and test values. +enum class XmlPortion { + kNone = 0, + kBegin = 1, + kMiddle = 2, + kEnd = 4, +}; + +/// @return The value that results from the bitwise "and" of given portions. +inline XmlPortion operator&(XmlPortion lhs, XmlPortion rhs) { + int lhs_value = static_cast<int>(lhs); + int rhs_value = static_cast<int>(rhs); + return static_cast<XmlPortion>(lhs_value & rhs_value); +} + +/// @return The value that results from the bitwise "or" of given portions. +inline XmlPortion operator|(XmlPortion lhs, XmlPortion rhs) { + int lhs_value = static_cast<int>(lhs); + int rhs_value = static_cast<int>(rhs); + return static_cast<XmlPortion>(lhs_value | rhs_value); +} + +/// @param value The value to use for the test. +/// @param mask The mask to use for the test. +/// @return Whether any of the bits in the mask are set in the value. +inline bool ContainsAny(XmlPortion value, XmlPortion mask) { + return (value & mask) != XmlPortion::kNone; +} + +/// @param value The value to use for the test. +/// @param mask The mask to use for the test. +/// @return Whether all of the bits in the mask are set in the value. +inline bool ContainsAll(XmlPortion value, XmlPortion mask) { + return (value & mask) == mask; +} + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_PORTION_H_ // NOLINT diff --git a/includes/image_io/xml/xml_reader.h b/includes/image_io/xml/xml_reader.h new file mode 100644 index 0000000..9a42b74 --- /dev/null +++ b/includes/image_io/xml/xml_reader.h @@ -0,0 +1,105 @@ +#ifndef IMAGE_IO_XML_XML_READER_H_ // NOLINT +#define IMAGE_IO_XML_XML_READER_H_ // NOLINT + +#include <memory> +#include <string> +#include <vector> + +#include "image_io/base/data_line_map.h" +#include "image_io/base/data_match_result.h" +#include "image_io/base/message.h" +#include "image_io/xml/xml_handler_context.h" +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class for reading and parsing the text of a data segment, resulting in the +/// functions of an XmlHandler to be called. This reader's Parse() function can +/// be called multiple times for text that spans multiple data segments. Errors +/// are reported to the message handler as they are encountered. In general, +/// there will be three types of errors: internal (programming), syntax, and +/// value errors. Internal errors can come from any where in this code base; +/// Only one such error is permitted per StartParse/Parse... sequence. Syntax +/// errors are usually issued by XmlRule instances; like internal errors, only +/// one such error is tolerated per StartParse/Parse... sequence. XmlHandler +/// functions may issue value errors; multiple such value errors are tolerated. +class XmlReader { + public: + explicit XmlReader(XmlHandler* handler) + : handler_(handler), + bytes_parsed_(0), + has_internal_or_syntax_error_(false), + has_errors_(false) {} + + /// Sets up the reader for parsing data segment text using the given XmlRule. + /// @param rule The top level rule to use when parsing the data segment text. + /// @return Whether the reader was set up propertly. + bool StartParse(std::unique_ptr<XmlRule> rule); + + /// Parses the text portion of the data segment starting at a location. This + /// function may be called multiple times for text that spans multiple data + /// segments. + /// @param start_location The location at which to start reading/parsing. + /// This location must be contained in the range parameter. + /// @param range The portion of the data segment to parse. This range value + /// must be contained in the range returned by DataSegment::GetRange() + /// @param segment The segment containing the text to parse. + /// @return Whether the parsing was successful. + bool Parse(size_t start_location, const DataRange& range, + const DataSegment& segment); + + /// Finishes up the reading/parsing process. The rule passed to StartParse() + /// must have consumed all the text of the segments and be "done", otherwise + /// this function will issue an error message. + /// @param Whether the reading/parsing operation was completed successfully. + bool FinishParse(); + + /// @return The total number of bytes of text that have been read/parsed. + size_t GetBytesParsed() const { return bytes_parsed_; } + + /// @return Whether errors have been encountered in reading/parsing the text. + bool HasErrors() const { return has_errors_; } + + /// @return The handler that handles the output of the parsing operations. + XmlHandler* GetHandler() const { return handler_; } + + private: + /// Sets up the context's name list that is used when creating error message. + /// @parma context The context to set up. + void InitializeContextNameList(XmlHandlerContext* context); + + /// Reports the message indicated in the result to the message handler and + /// updates the data boolean data members indicating errors. + /// @param result The result value for an XmlRule::Parse function. + /// @param context The context for generating an error message if needed. + void ReportError(const DataMatchResult& result, const DataContext& context); + + /// Reports the message to the message handler and updates the data boolean + /// data members indicating errors. + /// @param message The message to send to the message handler. + void ReportError(const Message& message); + + /// The reader's handler. + XmlHandler* handler_; + + /// A data line map used for error message creation. + DataLineMap data_line_map_; + + /// The pending and active rules. + std::vector<std::unique_ptr<XmlRule>> rule_stack_; + + /// The total number of bytes that have been parsed. + size_t bytes_parsed_; + + /// Whether an internal or syntax error has occurred. + bool has_internal_or_syntax_error_; + + /// Whether any type of error has occurred. + bool has_errors_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_READER_H_ // NOLINT diff --git a/includes/image_io/xml/xml_rule.h b/includes/image_io/xml/xml_rule.h new file mode 100644 index 0000000..c76f87c --- /dev/null +++ b/includes/image_io/xml/xml_rule.h @@ -0,0 +1,175 @@ +#ifndef IMAGE_IO_XML_XML_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_RULE_H_ // NOLINT + +#include <memory> +#include <string> +#include <vector> + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_handler_context.h" +#include "image_io/xml/xml_terminal.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A rule represents a sequence of terminals to match text from a DataSource, +/// and the state needed to keep track the parsing operation in case the text +/// is split across multiple DataSegments. XmlRules collaborate with an instance +/// of XmlHandler to process the token values the terminals produce. +/// +/// Terminals are added in the constructors of the rule subclasses, and are +/// not typically accessed directly from the clients of an XmlRule. Instead, +/// XmlRule clients normally just call the rule's Parse function and take action +/// based on the DataMatchResult value that is returned. The functions of the +/// XmlHandler are called internally by the rule's terminals as they parse the +/// text in the data segment. +/// +/// Normally, the terminals are parsed by the Parse() function in a sequential +/// manner until they are exhausted. At which time the Parse function returns +/// with a DataMatchResult that has a type equal to kFull. If the DataSegment +/// runs out of data before the end of the final terminal, the result type will +/// be kPartialOutOfData. Of course if any of the terminals' scanners detect an +/// error the result type will be kError. +/// +/// Rules may decide to delegate the parsing process to another rule. There are +/// two types of delegation: +/// 1. Rule chaining - in this case a rule decides that another rule should +/// be used instead to continue the parsing process. This situation is +/// indicated when the result type is kFull and the rule's HasNextRule() +/// function returns true. The chained-to rule is obtained by calling the +/// rule's GetNextRule() function. The current rule can be discarded. +/// 2. Child rules - in this case a "parent" rule decides that the next set of +/// syntax should be parsed by another "child" rule, and after that rule +/// completes, the parsing task should be returned to the parent rule. This +/// situaltion is indicated when the result type is kPartial and the rule's +/// HasNextRule() returns true. The child rule is obtained by calling the +/// rule's GetNextRule() function. The current parent rule should be placed +/// on a stack until the child rule is done, and then the child discarded and +/// the parent rule used for the next Parse operation. +/// The action functions associated with a terminal are typically used to create +/// the next rule and set the result type and thus initiate the delegation +/// process. When the XmlRule::Parse function detects a delegation has been +/// requested, it returns to its caller so that the caller can handle the +/// delegation in the appropriate fashion. For an example, see the XmlReader's +/// Parse() function. +/// +/// In addition to delegation the action functions associated with a terminal +/// can change the order of the terminals processed from a strictly sequential +/// order to whatever the rule so desires. This is done by calling the rule's +/// SetTerminalIndex() function. Terminals can be identified by name using the +/// GetTerminalIndexFromName() function if the rule's terminals were +/// constructed with names. If the terminal index of a rule is set to a +/// terminal that has already been used, the terminal's scanners state must be +/// reset in order for it to parse successfully again. Sometimes the entire +/// rule is "restarted" in which case the ResetTerminalScanners() function can +/// be called to reset the scanners of all the rules terminals. +/// +/// Finally, because of the look-ahead needs of the XML grammar, some rules +/// support alternate "starting points", allowing them to skip some set of +/// initial terminals when the rule's Parse() function is called. Rules that +/// support this feature will have a constructor with an StartPoint parameter. +class XmlRule { + public: + /// For rules that support alternate starting points, this enum provides the + /// values at which a rule's Parse() function can begin. + enum StartPoint { + /// Start parsing at the first terminal position. + kFirstStartPoint, + + /// STart parsing at a second (alternative) position. + kSecondStartPoint, + }; + + virtual ~XmlRule() = default; + explicit XmlRule(const std::string& name); + + /// @return The name of the rule. + const std::string& GetName() const { return name_; } + + /// Parse the text indicated in the context's data segment and range and call + /// the context's XmlHandler functions as needed. The implementation of this + /// function makes use of the terminals contained by the rule, but it is + /// declared virtual so that subclasses can customize as needed. + /// @param context The context describing the text to parse and the handler + /// to call. + /// @param A result that indicates the type of match that occurred, the number + /// of bytes consumed and an error message if needed. + virtual DataMatchResult Parse(XmlHandlerContext context); + + /// Adds a literal terminal to the rule. + /// @param literal The literal value to scan for. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddLiteralTerminal(const std::string& literal); + + /// Adds a name terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddNameTerminal(); + + /// Adds a quoted string terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddQuotedStringTerminal(); + + /// Adds a sentinel terminal to the rule. + /// @param sentinels The sentinel values to scan for. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddSentinelTerminal(const std::string& sentinels); + + /// Adds a scan through literal terminal to the rule. + /// @param literal The literal value to scan through. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddThroughLiteralTerminal(const std::string& literal); + + /// Adds a whitespace terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddWhitespaceTerminal(); + + /// Adds an optional whitespace terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddOptionalWhitespaceTerminal(); + + /// @return The number of terminals in the rule. + size_t GetTerminalCount() const { return terminals_.size(); } + + /// @return The index of the terminal currently parsing text. + size_t GetTerminalIndex() const { return terminal_index_; } + + /// @param name The name of the terminal to look for. + /// @return The index of the terminal with the given name, or the value + /// returned by the rule's GetTerminalCount() if not found. + size_t GetTerminalIndexFromName(const std::string name) const; + + /// @param terminal_index The index of the terminal that should next be used + /// for parsing the input text. + void SetTerminalIndex(size_t terminal_index); + + /// @return The terminal currently parsing text, or nullptr if there is none. + XmlTerminal* GetCurrentTerminal(); + + /// @param index The index of the terminal to get. + /// @return The terminal at the given index, or nullptr if index is invalid. + XmlTerminal* GetTerminal(size_t index); + + /// Resets the scanner's state of all the terminals in the rule. + void ResetTerminalScanners(); + + /// @return Whether the rule has a next rule for delegation. + bool HasNextRule() const; + + /// @return Returns the next rule to the caller. If there is no next rule, + /// the get function of the returned unique_ptr will return nullptr. + std::unique_ptr<XmlRule> ReleaseNextRule(); + + /// @param next_rule The new rule to use for delegation purposes. + void SetNextRule(std::unique_ptr<XmlRule> next_rule); + + private: + std::string name_; + std::vector<XmlTerminal> terminals_; + std::unique_ptr<XmlRule> next_rule_; + size_t terminal_index_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_terminal.h b/includes/image_io/xml/xml_terminal.h new file mode 100644 index 0000000..7d999f0 --- /dev/null +++ b/includes/image_io/xml/xml_terminal.h @@ -0,0 +1,61 @@ +#ifndef IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT +#define IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT + +#include <string> + +#include "image_io/base/data_scanner.h" +#include "image_io/xml/xml_action.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A terminal represents a part of a rule that uses a DataScanner to match +/// zero or more characters from a DataSource. A terminal can also have a name +/// that can be be used in error messages and also used to identify it in a +/// rule. A terminal can also have an action function associated with it that it +/// can use to validate the token produced by the terminal/scanner, and do +/// further processing with the token. Finally, the terminal's action function +/// can manipulate the DataMatchResult that was produced by the terminal's +/// scanner and accessible via the action function's XmlActionContext param. +class XmlTerminal { + public: + explicit XmlTerminal(const DataScanner& scanner) : scanner_(scanner){} + + /// Sets the name of the terminal. Looks best with an XmlRule::AddTerminal + /// function: AddWhitespaceTerminal().WithName("SomeName"); + /// @param name The name to give to the terminal. + /// @return A reference to the terminal. + XmlTerminal& WithName(const std::string& name) { + name_ = name; + return *this; + } + + /// Sets the action of the terminal. Looks best with an XmlRule::AddTerminal + /// function: AddWhitespaceTerminal().WithAction(SomeAction); + /// @param action The action to give to the terminal. + /// @return A reference to the terminal. + XmlTerminal& WithAction(const XmlAction& action) { + action_ = action; + return *this; + } + + /// @return The terminal's scanner. + DataScanner* GetScanner() { return &scanner_; } + + /// @return The terminal's name. + const std::string& GetName() const { return name_; } + + /// @return The terminal's action function. + const XmlAction& GetAction() const { return action_; } + + private: + DataScanner scanner_; + XmlAction action_; + std::string name_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT diff --git a/includes/image_io/xml/xml_token_context.h b/includes/image_io/xml/xml_token_context.h new file mode 100644 index 0000000..cceca5c --- /dev/null +++ b/includes/image_io/xml/xml_token_context.h @@ -0,0 +1,62 @@ +#ifndef IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT +#define IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT + +#include <string> + +#include "image_io/base/data_context.h" +#include "image_io/base/data_match_result.h" +#include "image_io/base/data_range.h" +#include "image_io/xml/xml_portion.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlActionContext; + +/// A token context is passed from the action of an XmlTerminal to an XmlHandler +/// associated with the XmlActionContext used to call the action function. +class XmlTokenContext : public DataContext { + public: + explicit XmlTokenContext(const XmlActionContext& context); + XmlTokenContext(size_t location, const DataRange& range, + const DataSegment& segment, const DataLineMap& data_line_map, + const DataMatchResult& result, const DataRange& token_range, + const XmlPortion& token_portion); + + /// @return The result associated with the context. + const DataMatchResult& GetResult() const { return result_; } + + /// @return The token range for the token. Note that the token range may not + /// be a subrange of the context's GetRange() or even the context's segment's + /// data range. Such would be the case when a token's value is split across + /// two or more data segments. + const DataRange& GetTokenRange() const { return token_range_; } + + /// @return The portion of the token that this context represents. This + /// portion value can be the bitwise or of any of the XmlPortion bit values. + const XmlPortion& GetTokenPortion() const { return token_portion_; } + + /// Builds the string value of the token. If the context's token portion has + /// the XmlPortion::kBegin bit set, the string value is first cleared. Then + /// the string is extracted from the context's data source and appended onto + /// the value. Remember that some token values (especially attribute values) + /// can be quite long so care should be excercised when obtaining values with + /// this function. + /// @param value The value of the token being built. + /// @return Whether the token value is complete (i.e., the context's portion + /// had the XmlPortion::kEnd bit set). + bool BuildTokenValue(std::string* value) const; + + static XmlPortion ComputeTokenPortion(size_t token_scan_count, + DataMatchResult::Type result_type); + + private: + DataMatchResult result_; + DataRange token_range_; + XmlPortion token_portion_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT diff --git a/src/base/byte_buffer.cc b/src/base/byte_buffer.cc new file mode 100644 index 0000000..55fbc2f --- /dev/null +++ b/src/base/byte_buffer.cc @@ -0,0 +1,80 @@ +#include "image_io/base/byte_buffer.h" + +#include <utility> + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::unique_ptr; + +/// @param byte_data The byte data to write to the buffer at pos. +/// @param pos The location in a buffer to write the byte data to. +/// @return The number of bytes written to the buffer at pos. +static size_t WriteBytes(const ByteData& byte_data, Byte* pos) { + size_t byte_count = byte_data.GetByteCount(); + if (!byte_count) { + return 0; + } + if (byte_data.GetType() == ByteData::kHex) { + const string& value = byte_data.GetValue(); + for (size_t index = 0; index < byte_count; ++index) { + if (!ByteData::Hex2Byte(value[2 * index], value[2 * index + 1], pos++)) { + return 0; + } + } + } else { + memcpy(pos, byte_data.GetValue().c_str(), byte_count); + } + return byte_count; +} + +ByteBuffer::ByteBuffer(size_t size, std::unique_ptr<Byte[]> buffer) + : buffer_(std::move(buffer)), size_(size) { + if (!buffer_) { + size_ = 0; + } + if (!size_) { + buffer_.reset(); + } +} + +ByteBuffer::ByteBuffer(const std::vector<ByteData>& byte_data_vector) { + size_ = 0; + for (const auto& byte_data : byte_data_vector) { + size_ += byte_data.GetByteCount(); + } + if (!size_) { + return; + } + // Note that within google3, std::make_unique is not available, and clangtidy + // says use absl::make_unique. This library attempts to minimize the number of + // dependencies on google3, hence the no lint on the next line. + buffer_.reset(new Byte[size_]); // NOLINT + Byte* pos = buffer_.get(); + for (const auto& byte_data : byte_data_vector) { + size_t bytes_written = WriteBytes(byte_data, pos); + if (bytes_written == 0 && byte_data.GetByteCount() != 0) { + size_ = 0; + buffer_.reset(nullptr); + } + pos += bytes_written; + } +} + +bool ByteBuffer::SetBigEndianValue(size_t location, std::uint16_t value) { + if (location + 1 >= size_) { + return false; + } + buffer_[location] = static_cast<Byte>(value >> 8); + buffer_[location + 1] = static_cast<Byte>(value & 0xFF); + return true; +} + +Byte* ByteBuffer::Release() { + size_ = 0; + return buffer_.release(); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/data_context.cc b/src/base/data_context.cc new file mode 100644 index 0000000..3d58cd2 --- /dev/null +++ b/src/base/data_context.cc @@ -0,0 +1,168 @@ +#include "image_io/base/data_context.h" + +#include <cctype> +#include <iomanip> +#include <sstream> + +#include "image_io/base/byte_data.h" + +namespace photos_editing_formats { +namespace image_io { + +namespace { + +void AddNames(const std::list<std::string>& name_list, std::stringstream* ss) { + for (const auto& name : name_list) { + *ss << name << ":"; + } +} + +} // namespace + +std::string DataContext::GetInvalidLocationAndRangeErrorText() const { + std::stringstream ss; + ss << "Invalid location:" << location_ << " range:[" << range_.GetBegin() + << "," << range_.GetEnd() << ") segment_range:[" + << segment_.GetDataRange().GetBegin() << "," + << segment_.GetDataRange().GetEnd() << ")"; + return GetErrorText(ss.str(), ""); +} + +std::string DataContext::GetErrorText( + const std::string& error_description, + const std::string& expectation_description) const { + std::list<std::string> none; + return GetErrorText(none, none, error_description, expectation_description); +} + +std::string DataContext::GetErrorText( + const std::list<std::string>& prefix_name_list, + const std::list<std::string>& postfix_name_list, + const std::string& error_description, + const std::string& expectation_description) const { + const std::string kContinue("- "); + std::stringstream ss; + + // Write error description if present. + if (!error_description.empty()) { + ss << error_description << std::endl; + } + + // Write name:name:... if present. + std::string names_string = + GetNamesString(prefix_name_list, postfix_name_list); + if (!names_string.empty()) { + ss << kContinue << names_string << std::endl; + } + + // Get the line:XX part of the line string. + DataLine data_line; + std::string line_number_string; + if (IsValidLocationAndRange()) { + data_line = line_info_map_.GetDataLine(location_); + line_number_string = GetLineNumberString(data_line); + } + + // Get the line_string related ranges and the line string. + DataRange clipped_range, line_range; + size_t spaces_before_caret = line_number_string.length(); + GetClippedAndLineRange(data_line, &clipped_range, &line_range); + std::string line_string = + GetLineString(clipped_range, line_range, &spaces_before_caret); + + // Write the line string + ss << kContinue << line_number_string << line_string << std::endl; + + // Write the caret and expectation description + size_t spaces_count = location_ + spaces_before_caret - line_range.GetBegin(); + std::string spaces(spaces_count, ' '); + ss << kContinue << spaces << '^'; + if (!expectation_description.empty()) { + ss << "expected:" << expectation_description; + } + return ss.str(); +} + +std::string DataContext::GetNamesString( + const std::list<std::string>& prefix_name_list, + const std::list<std::string>& postfix_name_list) const { + std::stringstream ss; + if (!prefix_name_list.empty() || !name_list_.empty() || + !postfix_name_list.empty()) { + AddNames(prefix_name_list, &ss); + AddNames(name_list_, &ss); + AddNames(postfix_name_list, &ss); + } + return ss.str(); +} + +std::string DataContext::GetLineNumberString(const DataLine& data_line) const { + std::stringstream liness; + liness << "line:"; + if (data_line.number == 0) { + liness << "?:"; + } else { + liness << data_line.number << ":"; + } + return liness.str(); +} + +void DataContext::GetClippedAndLineRange(const DataLine& data_line, + DataRange* clipped_range, + DataRange* line_range) const { + // Lines could be really long, so provide some sane limits: some kLimit chars + // on either side of the current location. + const size_t kLimit = 25; + size_t line_begin, line_end; + *clipped_range = data_line.range.IsValid() + ? range_.GetIntersection(data_line.range) + : range_; + if (clipped_range->IsValid() && clipped_range->Contains(location_)) { + line_begin = (clipped_range->GetBegin() + kLimit < location_) + ? location_ - kLimit + : clipped_range->GetBegin(); + line_end = std::min(line_begin + 2 * kLimit, clipped_range->GetEnd()); + } else { + line_begin = location_; + line_end = std::min(location_ + 2 * kLimit, range_.GetEnd()); + *clipped_range = DataRange(line_begin, line_end); + } + *line_range = DataRange(line_begin, line_end); +} + +std::string DataContext::GetLineString(const DataRange& clipped_range, + const DataRange& line_range, + size_t* spaces_before_caret) const { + std::stringstream ss; + if (!IsValidLocationAndRange()) { + ss << "Invalid location or range"; + return ss.str(); + } + + const char* cbytes = + reinterpret_cast<const char*>(segment_.GetBuffer(line_range.GetBegin())); + if (cbytes != nullptr) { + if (line_range.GetBegin() != clipped_range.GetBegin()) { + ss << "..."; + *spaces_before_caret += 3; + } + for (size_t index = 0; index < line_range.GetLength(); ++index) { + char cbyte = cbytes[index]; + if (isprint(cbyte)) { + ss << cbyte; + } else { + ss << "\\x" << ByteData::Byte2Hex(cbyte); + if (index + line_range.GetBegin() < location_) { + *spaces_before_caret += 4; + } + } + } + if (line_range.GetEnd() != clipped_range.GetEnd()) { + ss << "..."; + } + } + return ss.str(); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/data_line_map.cc b/src/base/data_line_map.cc new file mode 100644 index 0000000..06ecfd9 --- /dev/null +++ b/src/base/data_line_map.cc @@ -0,0 +1,64 @@ +#include "image_io/base/data_line_map.h" + +#include <algorithm> + +namespace photos_editing_formats { +namespace image_io { + +size_t DataLineMap::GetDataLineCount() const { return data_lines_.size(); } + +DataLine DataLineMap::GetDataLine(size_t location) const { + if (data_lines_.empty()) { + return DataLine(); + } + DataLine key(0, DataRange(location, location)); + auto not_less_pos = + std::lower_bound(data_lines_.begin(), data_lines_.end(), key, + [](const DataLine& lhs, const DataLine& rhs) { + return lhs.range.GetBegin() < rhs.range.GetBegin(); + }); + if (not_less_pos == data_lines_.end()) { + --not_less_pos; + } else if (not_less_pos != data_lines_.begin()) { + auto prev_pos = not_less_pos - 1; + if (location < prev_pos->range.GetEnd()) { + not_less_pos = prev_pos; + } + } + if (not_less_pos->range.Contains(location)) { + return *not_less_pos; + } + return DataLine(); +} + +void DataLineMap::FindDataLines(const DataRange& range, + const DataSegment& segment) { + size_t line_end; + size_t range_end = range.GetEnd(); + size_t line_begin = range.GetBegin(); + size_t next_number = GetDataLineCount() + 1; + while (line_begin < range_end) { + line_end = std::min(range_end, segment.Find(line_begin, '\n')); + if (last_line_incomplete_ && !data_lines_.empty()) { + line_begin = data_lines_.back().range.GetBegin(); + data_lines_.back().range = DataRange(line_begin, line_end); + if (line_end < range_end && + segment.GetValidatedByte(line_end).value == '\n') { + last_line_incomplete_ = false; + } + } else { + data_lines_.emplace_back(next_number++, DataRange(line_begin, line_end)); + } + line_begin = line_end + 1; + } + last_line_incomplete_ = + line_end == range_end || segment.GetValidatedByte(line_end).value != '\n'; +} + +void DataLineMap::Clear() { + data_lines_.clear(); + last_line_incomplete_ = false; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/data_range_tracking_destination.cc b/src/base/data_range_tracking_destination.cc new file mode 100644 index 0000000..98e5b06 --- /dev/null +++ b/src/base/data_range_tracking_destination.cc @@ -0,0 +1,47 @@ +#include "image_io/base/data_range_tracking_destination.h" + +namespace photos_editing_formats { +namespace image_io { + +void DataRangeTrackingDestination::StartTransfer() { + tracked_data_range_ = DataRange(); + bytes_transferred_ = 0; + has_disjoint_transfer_ranges_ = false; + if (destination_ != nullptr) { + destination_->StartTransfer(); + } +} + +DataDestination::TransferStatus DataRangeTrackingDestination::Transfer( + const DataRange& transfer_range, const DataSegment& data_segment) { + DataDestination::TransferStatus transfer_status = + destination_ ? destination_->Transfer(transfer_range, data_segment) + : DataDestination::kTransferOk; + if (transfer_status != kTransferError) { + bytes_transferred_ += transfer_range.GetLength(); + } + if (has_disjoint_transfer_ranges_) { + return transfer_status; + } + if (!tracked_data_range_.IsValid()) { + tracked_data_range_ = transfer_range; + return transfer_status; + } + if (tracked_data_range_.GetEnd() == transfer_range.GetBegin()) { + tracked_data_range_ = + DataRange(tracked_data_range_.GetBegin(), transfer_range.GetEnd()); + return transfer_status; + } else { + has_disjoint_transfer_ranges_ = true; + return transfer_status; + } +} + +void DataRangeTrackingDestination::FinishTransfer() { + if (destination_ != nullptr) { + destination_->FinishTransfer(); + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/data_scanner.cc b/src/base/data_scanner.cc new file mode 100644 index 0000000..e6677a5 --- /dev/null +++ b/src/base/data_scanner.cc @@ -0,0 +1,390 @@ +#include "image_io/base/data_scanner.h" + +namespace photos_editing_formats { +namespace image_io { + +namespace { + +const char kWhitespaceChars[] = " \t\n\r"; + +/// This function is like strspn but does not assume a null-terminated string. +size_t memspn(const char* s, size_t slen, const char* accept) { + const char* p = s; + const char* spanp; + char c, sc; + +cont: + c = *p++; + if (slen-- == 0) return p - 1 - s; + for (spanp = accept; (sc = *spanp++) != '\0';) + if (sc == c) goto cont; + return p - 1 - s; +} + +/// @return Whether value is in the range [lo:hi]. +bool InRange(char value, char lo, char hi) { + return value >= lo && value <= hi; +} + +/// @return Whether the value is the first character of a kName type scanner. +bool IsFirstNameChar(char value) { + return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' || + value == ':'; +} + +/// Scans the characters in the s string, where the characters can be any legal +/// character in the name. +/// @return The number of name characters scanned. +size_t ScanOptionalNameChars(const char* s, size_t slen) { + const char* kOptionalChars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:"; + return memspn(s, slen, kOptionalChars); +} + +/// Scans the whitespace characters in the s string. +/// @return The number of whitepace characters scanned. +size_t ScanWhitespaceChars(const char* s, size_t slen) { + return memspn(s, slen, kWhitespaceChars); +} + +} // namespace + +std::string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; } + +DataScanner DataScanner::CreateLiteralScanner(const std::string& literal) { + return DataScanner(DataScanner::kLiteral, literal); +} + +DataScanner DataScanner::CreateNameScanner() { + return DataScanner(DataScanner::kName); +} + +DataScanner DataScanner::CreateQuotedStringScanner() { + return DataScanner(DataScanner::kQuotedString); +} + +DataScanner DataScanner::CreateSentinelScanner(const std::string& sentinels) { + return DataScanner(DataScanner::kSentinel, sentinels); +} + +DataScanner DataScanner::CreateThroughLiteralScanner( + const std::string& literal) { + return DataScanner(DataScanner::kThroughLiteral, literal); +} + +DataScanner DataScanner::CreateWhitespaceScanner() { + return DataScanner(DataScanner::kWhitespace); +} + +DataScanner DataScanner::CreateOptionalWhitespaceScanner() { + return DataScanner(DataScanner::kOptionalWhitespace); +} + +size_t DataScanner::ExtendTokenLength(size_t delta_length) { + token_range_ = + DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length); + return token_range_.GetLength(); +} + +void DataScanner::SetInternalError(const DataContext& context, + const std::string& error_description, + DataMatchResult* result) { + result->SetType(DataMatchResult::kError); + result->SetMessage( + Message::kInternalError, + context.GetErrorText({}, {GetDescription()}, error_description, "")); +} + +void DataScanner::SetSyntaxError(const DataContext& context, + const std::string& error_description, + DataMatchResult* result) { + result->SetType(DataMatchResult::kError); + result->SetMessage(Message::kSyntaxError, + context.GetErrorText(error_description, GetDescription())); +} + +DataMatchResult DataScanner::ScanLiteral(const char* cbytes, + size_t bytes_available, + const DataContext& context) { + DataMatchResult result; + size_t token_length = token_range_.GetLength(); + if (token_length >= literal_or_sentinels_.length()) { + SetInternalError(context, "Literal already scanned", &result); + return result; + } + size_t bytes_still_needed = literal_or_sentinels_.length() - token_length; + size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available); + if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) == + 0) { + token_length = ExtendTokenLength(bytes_to_compare); + result.SetBytesConsumed(bytes_to_compare); + result.SetType(token_length == literal_or_sentinels_.length() + ? DataMatchResult::kFull + : DataMatchResult::kPartialOutOfData); + } else { + SetSyntaxError(context, "Expected literal", &result); + } + return result; +} + +DataMatchResult DataScanner::ScanName(const char* cbytes, + size_t bytes_available, + const DataContext& context) { + DataMatchResult result; + size_t token_length = token_range_.GetLength(); + if (token_length == 0) { + if (!IsFirstNameChar(*cbytes)) { + SetSyntaxError(context, "Expected first character of a name", &result); + return result; + } + token_length = ExtendTokenLength(1); + result.SetBytesConsumed(1); + bytes_available -= 1; + cbytes += 1; + } + size_t optional_bytes_consumed = + ScanOptionalNameChars(cbytes, bytes_available); + token_length = ExtendTokenLength(optional_bytes_consumed); + result.IncrementBytesConsumed(optional_bytes_consumed); + if (result.GetBytesConsumed() == 0 && token_length > 0) { + result.SetType(DataMatchResult::kFull); + } else if (optional_bytes_consumed < bytes_available) { + result.SetType(DataMatchResult::kFull); + } else { + result.SetType(DataMatchResult::kPartialOutOfData); + } + return result; +} + +DataMatchResult DataScanner::ScanQuotedString(const char* cbytes, + size_t bytes_available, + const DataContext& context) { + const size_t kStart = 0; + const size_t kDone = '.'; + const size_t kSquote = '\''; + const size_t kDquote = '"'; + DataMatchResult result; + size_t token_length = token_range_.GetLength(); + if ((data_ == kStart && token_length != 0) || + (data_ != kStart && data_ != kSquote && data_ != kDquote)) { + SetInternalError(context, "Inconsistent state", &result); + return result; + } + if (data_ == kStart) { + if (*cbytes != kSquote && *cbytes != kDquote) { + SetSyntaxError(context, "Expected start of a quoted string", &result); + return result; + } + data_ = *cbytes++; + bytes_available--; + result.SetBytesConsumed(1); + token_length = ExtendTokenLength(1); + } + const char* ebytes = reinterpret_cast<const char*>( + memchr(cbytes, static_cast<int>(data_), bytes_available)); + size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available; + result.IncrementBytesConsumed(bytes_scanned); + token_length = ExtendTokenLength(bytes_scanned); + if (bytes_scanned == bytes_available) { + result.SetType(DataMatchResult::kPartialOutOfData); + } else { + result.SetType(DataMatchResult::kFull); + result.IncrementBytesConsumed(1); + ExtendTokenLength(1); + data_ = kDone; + } + return result; +} + +DataMatchResult DataScanner::ScanSentinel(const char* cbytes, + size_t bytes_available, + const DataContext& context) { + DataMatchResult result; + if (data_ != 0) { + SetInternalError(context, "Sentinel already scanned", &result); + return result; + } + char cbyte = *cbytes; + for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) { + char sentinel = literal_or_sentinels_[index]; + if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) { + ExtendTokenLength(1); + result.SetBytesConsumed(1).SetType(DataMatchResult::kFull); + data_ = sentinel; + break; + } + } + if (result.GetBytesConsumed() == 0) { + SetSyntaxError(context, "Expected sentinal character", &result); + } + return result; +} + +DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes, + size_t bytes_available, + const DataContext& context) { + DataMatchResult result; + size_t& scanned_literal_length = data_; + if (scanned_literal_length >= literal_or_sentinels_.length()) { + SetInternalError(context, "Literal already scanned", &result); + return result; + } + while (bytes_available > 0) { + if (scanned_literal_length == 0) { + // Literal scan not in progress. Find the first char of the literal. + auto* matched_byte = reinterpret_cast<const char*>( + memchr(cbytes, literal_or_sentinels_[0], bytes_available)); + if (matched_byte == nullptr) { + // first char not found and chars exhausted. + ExtendTokenLength(bytes_available); + result.IncrementBytesConsumed(bytes_available); + result.SetType(DataMatchResult::kPartialOutOfData); + break; + } else { + // found the first char of the literal. + size_t bytes_scanned = (matched_byte - cbytes) + 1; + result.IncrementBytesConsumed(bytes_scanned); + bytes_available -= bytes_scanned; + cbytes += bytes_scanned; + ExtendTokenLength(bytes_scanned); + scanned_literal_length = 1; + } + } + // check if the rest of the literal is there. + size_t bytes_still_needed = + literal_or_sentinels_.length() - scanned_literal_length; + size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available); + if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes, + bytes_to_compare) == 0) { + // Yes, the whole literal is there or chars are exhausted. + ExtendTokenLength(bytes_to_compare); + scanned_literal_length += bytes_to_compare; + result.IncrementBytesConsumed(bytes_to_compare); + result.SetType(scanned_literal_length == literal_or_sentinels_.length() + ? DataMatchResult::kFull + : DataMatchResult::kPartialOutOfData); + break; + } + // false alarm, the firsts char of the literal were found, but not the + // whole enchilada. Keep searching at one past the first char of the match. + scanned_literal_length = 0; + } + return result; +} + +DataMatchResult DataScanner::ScanWhitespace(const char* cbytes, + size_t bytes_available, + const DataContext& context) { + DataMatchResult result; + size_t token_length = token_range_.GetLength(); + result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available)); + token_length = ExtendTokenLength(result.GetBytesConsumed()); + if (result.GetBytesConsumed() == 0) { + if (token_length == 0 && type_ == kWhitespace) { + SetSyntaxError(context, "Expected whitespace", &result); + } else { + result.SetType(DataMatchResult::kFull); + } + } else { + result.SetType((result.GetBytesConsumed() < bytes_available) + ? DataMatchResult::kFull + : DataMatchResult::kPartialOutOfData); + } + return result; +} + +DataMatchResult DataScanner::Scan(const DataContext& context) { + scan_call_count_ += 1; + DataMatchResult result; + if (!context.IsValidLocationAndRange()) { + SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(), + &result); + return result; + } + if (!token_range_.IsValid()) { + token_range_ = DataRange(context.GetLocation(), context.GetLocation()); + } + size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation(); + const char* cbytes = context.GetCharBytes(); + switch (type_) { + case kLiteral: + result = ScanLiteral(cbytes, bytes_available, context); + break; + case kName: + result = ScanName(cbytes, bytes_available, context); + break; + case kQuotedString: + result = ScanQuotedString(cbytes, bytes_available, context); + break; + case kSentinel: + result = ScanSentinel(cbytes, bytes_available, context); + break; + case kThroughLiteral: + result = ScanThroughLiteral(cbytes, bytes_available, context); + break; + case kWhitespace: + case kOptionalWhitespace: + result = ScanWhitespace(cbytes, bytes_available, context); + break; + default: + SetInternalError(context, "Undefined scanner type", &result); + break; + } + return result; +} + +void DataScanner::ResetTokenRange() { token_range_ = DataRange(); } + +void DataScanner::Reset() { + data_ = 0; + scan_call_count_ = 0; + ResetTokenRange(); +} + +std::string DataScanner::GetDescription() const { + std::string description; + switch (type_) { + case kLiteral: + description = "Literal:'"; + description += literal_or_sentinels_; + description += "'"; + break; + case kName: + description = "Name"; + break; + case kQuotedString: + description = "QuotedString"; + break; + case kSentinel: + description = "OneOf:'"; + description += literal_or_sentinels_; + description += "'"; + break; + case kThroughLiteral: + description = "ThruLiteral:'"; + description += literal_or_sentinels_; + description += "'"; + break; + case kWhitespace: + description = "Whitespace"; + break; + case kOptionalWhitespace: + description = "OptionalWhitespace"; + break; + } + return description; +} + +std::string DataScanner::GetLiteral() const { + return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_ + : ""; +} + +std::string DataScanner::GetSentenels() const { + return type_ == kSentinel ? literal_or_sentinels_ : ""; +} + +char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; } + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/data_segment.cc b/src/base/data_segment.cc new file mode 100644 index 0000000..95b4cc7 --- /dev/null +++ b/src/base/data_segment.cc @@ -0,0 +1,81 @@ +#include "image_io/base/data_segment.h" + +#include <cstring> + +namespace photos_editing_formats { +namespace image_io { + +using std::default_delete; +using std::shared_ptr; + +shared_ptr<DataSegment> DataSegment::Create( + const DataRange& data_range, const Byte* buffer, + DataSegment::BufferDispositionPolicy buffer_policy) { + return shared_ptr<DataSegment>( + new DataSegment(data_range, buffer, buffer_policy), + default_delete<DataSegment>()); +} + +size_t DataSegment::Find(size_t start_location, Byte value) const { + if (!Contains(start_location)) { + return GetEnd(); + } + const Byte* location = reinterpret_cast<const Byte*>( + memchr((buffer_ + start_location) - GetBegin(), value, + GetEnd() - start_location)); + return location ? (location - buffer_) + GetBegin() : GetEnd(); +} + +size_t DataSegment::Find(size_t location, const char* str, + size_t str_length) const { + char char0 = *str; + while (Contains(location)) { + size_t memchr_count = GetEnd() - location; + const void* void0_ptr = memchr(GetBuffer(location), char0, memchr_count); + if (void0_ptr) { + const Byte* byte0_ptr = reinterpret_cast<const Byte*>(void0_ptr); + size_t byte0_location = (byte0_ptr - buffer_) + GetBegin(); + if (byte0_location + str_length <= GetEnd()) { + const char* char0_ptr = reinterpret_cast<const char*>(void0_ptr); + if (strncmp(char0_ptr, str, str_length) == 0) { + return byte0_location; + } + } + } + ++location; + } + return GetEnd(); +} + +ValidatedByte DataSegment::GetValidatedByte(size_t location, + const DataSegment* segment1, + const DataSegment* segment2) { + for (const DataSegment* segment : {segment1, segment2}) { + if (segment && segment->Contains(location)) { + return segment->GetValidatedByte(location); + } + } + return InvalidByte(); +} + +size_t DataSegment::Find(size_t start_location, Byte value, + const DataSegment* segment1, + const DataSegment* segment2) { + if (segment1 && segment2 && segment1->GetEnd() == segment2->GetBegin()) { + size_t value_location = segment2->GetEnd(); + if (segment1->Contains(start_location)) { + value_location = segment1->Find(start_location, value); + if (value_location == segment1->GetEnd()) { + value_location = segment2->Find(segment2->GetBegin(), value); + } + } else { + value_location = segment2->Find(start_location, value); + } + return value_location; + } + size_t segment1_end = segment1 ? segment1->GetEnd() : 0; + return segment2 ? std::max(segment1_end, segment2->GetEnd()) : segment1_end; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/data_segment_data_source.cc b/src/base/data_segment_data_source.cc new file mode 100644 index 0000000..79dc55d --- /dev/null +++ b/src/base/data_segment_data_source.cc @@ -0,0 +1,42 @@ +#include "image_io/base/data_segment_data_source.h" + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +void DataSegmentDataSource::Reset() {} + +std::shared_ptr<DataSegment> DataSegmentDataSource::GetDataSegment( + size_t begin, size_t min_size) { + DataRange range(begin, begin + min_size); + if (range.GetIntersection(shared_data_segment_->GetDataRange()).IsValid()) { + return shared_data_segment_; + } else { + return std::shared_ptr<DataSegment>(nullptr); + } +} + +DataSource::TransferDataResult DataSegmentDataSource::TransferData( + const DataRange& data_range, size_t /*best_size*/, + DataDestination* data_destination) { + bool data_transferred = false; + DataDestination::TransferStatus status = DataDestination::kTransferDone; + DataRange transfer_range = + shared_data_segment_->GetDataRange().GetIntersection(data_range); + if (data_destination && transfer_range.IsValid()) { + data_transferred = true; + status = data_destination->Transfer(transfer_range, *shared_data_segment_); + } + if (data_transferred) { + return status == DataDestination::kTransferError ? kTransferDataError + : kTransferDataSuccess; + } else { + return data_destination ? kTransferDataNone : kTransferDataError; + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/istream_data_source.cc b/src/base/istream_data_source.cc new file mode 100644 index 0000000..d1d66f0 --- /dev/null +++ b/src/base/istream_data_source.cc @@ -0,0 +1,81 @@ +#include "image_io/base/istream_data_source.h" + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +void IStreamDataSource::Reset() { + istream_->clear(); + istream_->seekg(0); + current_data_segment_.reset(); +} + +std::shared_ptr<DataSegment> IStreamDataSource::GetDataSegment( + size_t begin, size_t min_size) { + if (current_data_segment_ && current_data_segment_->Contains(begin)) { + return current_data_segment_; + } + current_data_segment_ = Read(begin, min_size); + return current_data_segment_; +} + +DataSource::TransferDataResult IStreamDataSource::TransferData( + const DataRange &data_range, size_t best_size, + DataDestination *data_destination) { + bool data_transferred = false; + DataDestination::TransferStatus status = DataDestination::kTransferDone; + if (data_destination && data_range.IsValid()) { + size_t min_size = std::min(data_range.GetLength(), best_size); + if (current_data_segment_ && + current_data_segment_->GetLength() >= min_size && + current_data_segment_->GetDataRange().Contains(data_range)) { + status = data_destination->Transfer(data_range, *current_data_segment_); + data_transferred = true; + } else { + istream_->clear(); + size_t chunk_size = min_size; + for (size_t begin = data_range.GetBegin(); begin < data_range.GetEnd(); + begin += chunk_size) { + size_t segment_length = 0; + size_t end = std::min(data_range.GetEnd(), begin + chunk_size); + std::shared_ptr<DataSegment> data_segment = Read(begin, end - begin); + if (data_segment) { + segment_length = data_segment->GetLength(); + if (segment_length) { + status = data_destination->Transfer(data_segment->GetDataRange(), + *data_segment); + data_transferred = true; + } + } + if (status != DataDestination::kTransferOk || segment_length == 0) { + break; + } + } + } + } + if (data_transferred) { + return status == DataDestination::kTransferError ? kTransferDataError + : kTransferDataSuccess; + } else { + return data_destination ? kTransferDataNone : kTransferDataError; + } +} + +std::shared_ptr<DataSegment> IStreamDataSource::Read(size_t begin, + size_t count) { + std::shared_ptr<DataSegment> shared_data_segment; + istream_->seekg(begin); + if (istream_->rdstate() == std::ios_base::goodbit) { + Byte *buffer = new Byte[count]; + istream_->read(reinterpret_cast<char *>(buffer), count); + size_t bytes_read = istream_->gcount(); + shared_data_segment = + DataSegment::Create(DataRange(begin, begin + bytes_read), buffer); + } + return shared_data_segment; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/message_handler.cc b/src/base/message_handler.cc new file mode 100644 index 0000000..70959c0 --- /dev/null +++ b/src/base/message_handler.cc @@ -0,0 +1,69 @@ +#include "image_io/base/message_handler.h" + +#include <memory> +#include <string> +#include <utility> + +#include "image_io/base/cout_message_writer.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::unique_ptr; + +/// The message handler. No effort made to delete it at program's end. +static MessageHandler* gMessageHandler = nullptr; + +void MessageHandler::Init(std::unique_ptr<MessageWriter> message_writer, + std::unique_ptr<MessageStore> message_store) { + auto* old_handler = gMessageHandler; + gMessageHandler = new MessageHandler; + gMessageHandler->SetMessageWriter(std::move(message_writer)); + gMessageHandler->SetMessageStore(std::move(message_store)); + delete old_handler; +} + +MessageHandler* MessageHandler::Get() { + if (!gMessageHandler) { + gMessageHandler = new MessageHandler; + gMessageHandler->SetMessageWriter( + unique_ptr<MessageWriter>(new CoutMessageWriter)); + gMessageHandler->SetMessageStore( + unique_ptr<MessageStore>(new VectorMessageStore)); + } + return gMessageHandler; +} + +MessageHandler::~MessageHandler() { + if (gMessageHandler == this) { + gMessageHandler = nullptr; + } +} + +void MessageHandler::SetMessageWriter( + std::unique_ptr<MessageWriter> message_writer) { + message_writer_ = std::move(message_writer); +} + +void MessageHandler::SetMessageStore( + std::unique_ptr<MessageStore> message_store) { + message_store_ = std::move(message_store); +} + +void MessageHandler::ReportMessage(Message::Type type, const string& text) { + int system_errno = (type == Message::kStdLibError) ? errno : 0; + ReportMessage(Message(type, system_errno, text)); +} + +void MessageHandler::ReportMessage(const Message& message) { + if (message_store_) { + message_store_->AddMessage(message); + } + if (message_writer_) { + message_writer_->WriteMessage(message); + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/message_writer.cc b/src/base/message_writer.cc new file mode 100644 index 0000000..d13dc41 --- /dev/null +++ b/src/base/message_writer.cc @@ -0,0 +1,62 @@ +#include "image_io/base/message_writer.h" + +#include <cstring> +#include <sstream> +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::stringstream; + +string MessageWriter::GetFormattedMessage(const Message& message) const { + stringstream message_stream; + message_stream << GetTypeCategory(message.GetType()) << ":" + << GetTypeDescription(message.GetType(), + message.GetSystemErrno()) + << ":" << message.GetText(); + return message_stream.str(); +} + +string MessageWriter::GetTypeCategory(Message::Type type) const { + if (type == Message::kStatus) { + return "STATUS"; + } else { + return "ERROR"; + } +} + +string MessageWriter::GetTypeDescription(Message::Type type, + int system_errno) const { + string description; + switch (type) { + case Message::kStatus: + break; + case Message::kStdLibError: + description = system_errno > 0 ? std::strerror(system_errno) : "Unknown"; + break; + case Message::kPrematureEndOfDataError: + description = "Premature end of data"; + break; + case Message::kStringNotFoundError: + description = "String not found"; + break; + case Message::kDecodingError: + description = "Decoding error"; + break; + case Message::kSyntaxError: + description = "Syntax error"; + break; + case Message::kValueError: + description = "Value error"; + break; + case Message::kInternalError: + description = "Internal error"; + break; + } + return description; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/ostream_data_destination.cc b/src/base/ostream_data_destination.cc new file mode 100644 index 0000000..97915c9 --- /dev/null +++ b/src/base/ostream_data_destination.cc @@ -0,0 +1,45 @@ +#include "image_io/base/ostream_data_destination.h" + +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" +#include "image_io/base/message_handler.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::ostream; + +void OStreamDataDestination::StartTransfer() {} + +DataDestination::TransferStatus OStreamDataDestination::Transfer( + const DataRange& transfer_range, const DataSegment& data_segment) { + if (ostream_ && transfer_range.IsValid() && !HasError()) { + size_t bytes_written = 0; + size_t bytes_to_write = transfer_range.GetLength(); + const Byte* buffer = data_segment.GetBuffer(transfer_range.GetBegin()); + if (buffer) { + ostream::pos_type prewrite_pos = ostream_->tellp(); + ostream_->write(reinterpret_cast<const char*>(buffer), bytes_to_write); + ostream::pos_type postwrite_pos = ostream_->tellp(); + if (postwrite_pos != EOF) { + bytes_written = ostream_->tellp() - prewrite_pos; + bytes_transferred_ += bytes_written; + } + } + if (bytes_written != bytes_to_write) { + MessageHandler::Get()->ReportMessage(Message::kStdLibError, name_); + has_error_ = true; + return kTransferError; + } + } + return kTransferOk; +} + +void OStreamDataDestination::FinishTransfer() { + if (ostream_) { + ostream_->flush(); + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/extras/base64_decoder_data_destination.cc b/src/extras/base64_decoder_data_destination.cc new file mode 100644 index 0000000..a15b997 --- /dev/null +++ b/src/extras/base64_decoder_data_destination.cc @@ -0,0 +1,187 @@ +#include "image_io/extras/base64_decoder_data_destination.h" + +#include <memory> +#include <sstream> +#include <vector> + +#include "image_io/base/data_segment.h" +#include "image_io/base/message_handler.h" +#include <modp_b64/modp_b64.h> + +namespace photos_editing_formats { +namespace image_io { + +using std::shared_ptr; +using std::unique_ptr; +using std::vector; + +// Set this flag to 1 for debugging output. +#define PHOTOS_EDITING_FORMATS_IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DEST_DEBUG 0 + +/// A helper function to adjust the parameters for the base64 decoder function +/// that are used by the Base64DecoderDataDestination to those that are required +/// to call the modp_b64_decode function. +/// @param src The source bytes to decode. +/// @param len The number of source bytes to decode. +/// @param out The output buffer to receive the decoded bytes, assumed to be +/// large enough (which the Base64DecoderDataDestination code does). +/// @param pad_count The number of pad characters detected at the end of the +/// src buffer. +/// @return The number of decoded bytes placed in the out buffer. +static size_t base64_decode(const Byte* src, size_t len, Byte* out, + size_t* pad_count) { + // The base64 encoding is described at https://en.wikipedia.org/wiki/Base64. + // It uses these 64 printable characters: [0-9], [a-z], [A-Z], + and /. Since + // each character can represent 6 bits, 4 encoded characters can be used to + // represent 3 decoded bytes (6*4 = 3*8). There is the possibility that up to + // two padding bytes have to be added to the src that is encoded to ensure + // that the total number of encoded bytes is evenly divisible by 3. The = char + // is used for the purpose of completing the multiple-of-4 encoded bytes. The + // = may appear only at the end of the buffer being decoded, or else its an + // error. + const char kPadChar = '='; + if (len > 2 && src[len - 1] == kPadChar && src[len - 2] == kPadChar) { + // If the final two chars of the src buffer are pads then pad count is 2. + *pad_count = 2; + } else if (len > 1 && src[len - 1] == kPadChar) { + // If the final char of the src buffer is a pad then pad count is 1. + *pad_count = 1; + } else { + *pad_count = 0; + } + int bytes_decoded = modp_b64_decode(reinterpret_cast<char*>(out), + reinterpret_cast<const char*>(src), + static_cast<int>(len)); + return bytes_decoded > 0 ? bytes_decoded : 0; +} + +void Base64DecoderDataDestination::StartTransfer() { + next_destination_->StartTransfer(); +} + +DataDestination::TransferStatus Base64DecoderDataDestination::Transfer( + const DataRange& transfer_range, const DataSegment& data_segment) { + const Byte* encoded_buffer = + data_segment.GetBuffer(transfer_range.GetBegin()); + if (!encoded_buffer || !transfer_range.IsValid() || HasError()) { + return kTransferError; + } + + // If there are left over bytes from the last call, steal enough bytes from + // the current encoded buffer to make up chunk's worth. If there are no more + // bytes in the encoded buffer (must be a small buffer) then we're done. +#if PHOTOS_EDITING_FORMATS_IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DEST_DEBUG + std::stringstream sstream1; + sstream1 << " " << leftover_bytes_.size() << " bytes left over"; + MessageHandler::Get()->ReportMessage(MessageHandler::kStatus, sstream1.str()); +#endif // PHOTOS_EDITING_FORMATS_IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DEST_DEBUG + size_t number_stolen_bytes = 0; + std::vector<Byte> leftover_and_stolen_bytes; + if (!leftover_bytes_.empty()) { + // Note that because of the way the leftover_bytes are captured at the end + // of this function, leftover_bytes.size() will be in the range [0:4). The + // number_stolen_bytes is always less than or equal to the number of bytes + // in the transfer_range. If the transfer_range happens to be small, and + // the leftover_bytes.size() + number_stolen_bytes does not equal 4, then + // no decoding can be done, and so the function just returns kTransferOk, + // indicating that the transfer operation should continue. The next call to + // Transfer() will either have enough bytes avaiable to be stolen so that + // the bytes can be decoded, or the process of premature return will be + // repeated, up to 3 times, worst case, where the transfer_range length is + // 1 each time Transfer is called. + number_stolen_bytes = + std::min(transfer_range.GetLength(), 4 - leftover_bytes_.size() % 4); + leftover_bytes_.insert(leftover_bytes_.end(), encoded_buffer, + encoded_buffer + number_stolen_bytes); + if (number_stolen_bytes == transfer_range.GetLength() && + leftover_bytes_.size() % 4) { + return kTransferOk; + } + using std::swap; + swap(leftover_and_stolen_bytes, leftover_bytes_); + } + + // Figure out the size of the buffer to hold the decoded bytes. When computing + // the number_remaining_bytes, note that number_stolen_bytes is 0 if there are + // no leftover_bytes, or in the range [1:3], and if the transfer_range length + // equals the number_stolen_bytes, then the execution does not get to this + // point, but rather the function returns in the above code block. Thus it is + // safe to subtract number_stolen_bytes from the transfer_range's length to + // obtain a (guarenteed) positive value for number_remaining_bytes. + size_t number_remaining_bytes = + transfer_range.GetLength() - number_stolen_bytes; + size_t number_leftover_and_stolen_decoded_bytes = + leftover_and_stolen_bytes.size() / 4 * 3; + size_t number_remaining_chunks = number_remaining_bytes / 4; + size_t number_remaining_decoded_bytes = number_remaining_chunks * 3; + size_t decoded_buffer_length = + number_leftover_and_stolen_decoded_bytes + number_remaining_decoded_bytes; + unique_ptr<Byte[]> decoded_buffer(new Byte[decoded_buffer_length]); + + // Decode the left over and stolen bytes first. + size_t pad_count1 = 0; + size_t total_bytes_decoded = 0; + if (number_leftover_and_stolen_decoded_bytes) { + total_bytes_decoded = base64_decode(leftover_and_stolen_bytes.data(), + leftover_and_stolen_bytes.size(), + decoded_buffer.get(), &pad_count1); + if (total_bytes_decoded + pad_count1 != + number_leftover_and_stolen_decoded_bytes) { + MessageHandler::Get()->ReportMessage(Message::kDecodingError, ""); + has_error_ = true; + return kTransferError; + } + } + + // Decode the remaining bytes from the encoded buffer. + size_t pad_count2 = 0; + if (number_remaining_decoded_bytes) { + size_t number_bytes_decoded = base64_decode( + encoded_buffer + number_stolen_bytes, number_remaining_chunks * 4, + decoded_buffer.get() + total_bytes_decoded, &pad_count2); + total_bytes_decoded += number_bytes_decoded; + if (total_bytes_decoded + pad_count1 + pad_count2 != + decoded_buffer_length) { + MessageHandler::Get()->ReportMessage(Message::kDecodingError, ""); + has_error_ = true; + return kTransferError; + } + } + + // Capture any new left over bytes. The number_new_leftover_bytes will always + // be in the range [0:4). + size_t number_processed_bytes = + number_stolen_bytes + number_remaining_chunks * 4; + size_t number_new_leftover_bytes = + transfer_range.GetLength() - number_processed_bytes; + if (number_new_leftover_bytes) { + leftover_bytes_.insert( + leftover_bytes_.end(), encoded_buffer + number_processed_bytes, + encoded_buffer + number_processed_bytes + number_new_leftover_bytes); + } + +#if PHOTOS_EDITING_FORMATS_IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DEST_DEBUG + std::stringstream sstream2; + sstream2 << " " << leftover_bytes_.size() << " new bytes left over"; + MessageHandler::Get()->ReportMessage(Message::kStatus, sstream2.str()); +#endif // PHOTOS_EDITING_FORMATS_IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DEST_DEBUG + + // And call the next stage + size_t decoded_location = next_decoded_location_; + next_decoded_location_ += (total_bytes_decoded); + DataRange decoded_range(decoded_location, next_decoded_location_); + shared_ptr<DataSegment> decoded_data_segment = + DataSegment::Create(decoded_range, decoded_buffer.release()); + return next_destination_->Transfer(decoded_range, *decoded_data_segment); +} + +void Base64DecoderDataDestination::FinishTransfer() { + if (leftover_bytes_.size() % 4) { + MessageHandler::Get()->ReportMessage(Message::kDecodingError, ""); + has_error_ = true; + } + next_destination_->FinishTransfer(); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/gcontainer/gcontainer.cc b/src/gcontainer/gcontainer.cc new file mode 100644 index 0000000..1179778 --- /dev/null +++ b/src/gcontainer/gcontainer.cc @@ -0,0 +1,163 @@ +#include "image_io/gcontainer/gcontainer.h" + +#include <fstream> + +#include "image_io/base/data_segment.h" +#include "image_io/base/data_segment_data_source.h" +#include "image_io/base/message_handler.h" +#include "image_io/base/ostream_data_destination.h" +#include "image_io/jpeg/jpeg_info.h" +#include "image_io/jpeg/jpeg_info_builder.h" +#include "image_io/jpeg/jpeg_scanner.h" +#include "image_io/utils/file_utils.h" + +namespace photos_editing_formats { +namespace image_io { +namespace gcontainer { +namespace { + +using photos_editing_formats::image_io::DataRange; +using photos_editing_formats::image_io::DataSegment; +using photos_editing_formats::image_io::DataSegmentDataSource; +using photos_editing_formats::image_io::JpegInfoBuilder; +using photos_editing_formats::image_io::JpegScanner; +using photos_editing_formats::image_io::Message; +using photos_editing_formats::image_io::MessageHandler; +using photos_editing_formats::image_io::OStreamDataDestination; +using photos_editing_formats::image_io::ReportErrorPolicy; +using std::string; + +// Populates first_image_range with the first image (from the header metadata +// to the EOI marker) present in the JPEG file input_file_name. Returns true if +// such a first image is found, false otherwise. +// +// input_file_name must be a JPEG file. +// image_data_segment is populated with the DataSegment for +// input_file_name, and is populated only in the successful case. +// first_image_range is populated with the first image found in the input file, +// only if such an image is found. +bool ExtractFirstImageInJpeg(const string& input_file_name, + std::shared_ptr<DataSegment>* image_data_segment, + DataRange* first_image_range) { + if (first_image_range == nullptr) { + return false; + } + + // Get the input and output setup. + MessageHandler::Get()->ClearMessages(); + auto data_segment = + ReadEntireFile(input_file_name, ReportErrorPolicy::kReportError); + if (!data_segment) { + return false; + } + + // Get the jpeg info and first image range from the input. + DataSegmentDataSource data_source(data_segment); + JpegInfoBuilder jpeg_info_builder; + jpeg_info_builder.SetImageLimit(1); + JpegScanner jpeg_scanner; + jpeg_scanner.Run(&data_source, &jpeg_info_builder); + if (jpeg_scanner.HasError()) { + return false; + } + + const auto& jpeg_info = jpeg_info_builder.GetInfo(); + const auto& image_ranges = jpeg_info.GetImageRanges(); + if (image_ranges.empty()) { + MessageHandler::Get()->ReportMessage(Message::kPrematureEndOfDataError, + "No Images Found"); + return false; + } + + *image_data_segment = data_segment; + *first_image_range = image_ranges[0]; + return true; +} + +} // namespace + +bool WriteImageAndFiles(const string& input_file_name, + const std::vector<string>& other_files, + const string& output_file_name) { + auto output_stream = + OpenOutputFile(output_file_name, ReportErrorPolicy::kReportError); + if (!output_stream) { + return false; + } + + OStreamDataDestination output_destination(std::move(output_stream)); + output_destination.SetName(output_file_name); + + DataRange image_range; + std::shared_ptr<DataSegment> data_segment; + if (!ExtractFirstImageInJpeg(input_file_name, &data_segment, &image_range)) { + return false; + } + + output_destination.StartTransfer(); + DataSegmentDataSource data_source(data_segment); + data_source.TransferData(image_range, image_range.GetLength(), + &output_destination); + + size_t bytes_transferred = image_range.GetLength(); + for (const string& tack_on_file : other_files) { + if (tack_on_file.empty()) { + continue; + } + auto tack_on_data_segment = + ReadEntireFile(tack_on_file, ReportErrorPolicy::kReportError); + if (!tack_on_data_segment) { + continue; + } + + DataSegmentDataSource tack_on_source(tack_on_data_segment); + DataRange tack_on_range = tack_on_data_segment->GetDataRange(); + bytes_transferred += tack_on_range.GetLength(); + tack_on_source.TransferData(tack_on_range, tack_on_range.GetLength(), + &output_destination); + } + + output_destination.FinishTransfer(); + return output_destination.GetBytesTransferred() == bytes_transferred && + !output_destination.HasError(); +} + +bool ParseFileAfterImage(const string& input_file_name, + size_t file_start_offset, size_t file_length, + string* out_file_contents) { + if (out_file_contents == nullptr || file_start_offset < 0 || + file_length == 0) { + return false; + } + + DataRange image_range; + std::shared_ptr<DataSegment> data_segment; + if (!ExtractFirstImageInJpeg(input_file_name, &data_segment, &image_range)) { + return false; + } + + size_t image_bytes_end_offset = image_range.GetEnd(); + size_t image_file_end = data_segment->GetEnd(); + size_t file_start_in_image = image_bytes_end_offset + file_start_offset; + size_t file_end_in_image = file_start_in_image + file_length; + if (image_file_end < file_end_in_image) { + // Requested file is past the end of the image file. + return false; + } + + // Get the file's contents. + const DataRange file_range(file_start_in_image, file_end_in_image); + size_t file_range_size = file_range.GetLength(); + // TODO(miraleung): Consider subclassing image_io/data_destination.h and + // transferring bytes directly into the string. TBD pending additional mime + // type getters. + std::ifstream input_file_stream(input_file_name); + input_file_stream.seekg(file_range.GetBegin()); + out_file_contents->resize(file_range_size); + input_file_stream.read(&(*out_file_contents)[0], file_range_size); + return true; +} + +} // namespace gcontainer +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_apple_depth_builder.cc b/src/jpeg/jpeg_apple_depth_builder.cc new file mode 100644 index 0000000..ce83f9f --- /dev/null +++ b/src/jpeg/jpeg_apple_depth_builder.cc @@ -0,0 +1,252 @@ +#include "image_io/jpeg/jpeg_apple_depth_builder.h" + +#include <cstring> +#include <sstream> + +#include "image_io/base/byte_buffer.h" +#include "image_io/base/data_segment_data_source.h" +#include "image_io/base/message.h" +#include "image_io/base/message_handler.h" +#include "image_io/jpeg/jpeg_info.h" +#include "image_io/jpeg/jpeg_info_builder.h" +#include "image_io/jpeg/jpeg_scanner.h" +#include "image_io/jpeg/jpeg_segment_info.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::vector; + +namespace { + +/// The special Apple depth JFIF segment suffix and length. The -1 in the +/// kAmpfLength compuration is because the size of kAmpf is 5 bytes, including +/// the terminating null character, but the kAmpfLength should be 4. Can't use +/// strlen (which would be better) because it is not constexpr-able. +const char kAmpf[] = "AMPF"; +constexpr size_t kAmpfLength = sizeof(kAmpf) - 1; + +/// The contents of the MPF segment length and value in three parts. For more +/// information, see go/photos-image-io-phase2-summary. +const size_t kMpfSegmentLength = 0x5A; +const char kMpfHex0[] = + "FFE200584D5046004D4D002A000000080003B00000070000000430313030B0010004000000" + "0100000002B002000700000020000000320000000000030000"; +// Four byte primary image length value +const char kMpfHex1[] = "000000000000000000000000"; +// Four byte depth image length value +// Four byte depth image offset value +const char kMpfHex2[] = "00000000"; + +/// The optimum size to use for the DataSource::TransferData() function. +constexpr size_t kBestDataSize = 0x10000; + +/// @param image_limit The limit on the number of images to get info of. +/// @param data_source The data source from which to get info. +/// @param info A pointer to the jpeg_info object to receive the info. +/// @return Whether the info was obtained successfully or not. +bool GetJpegInfo(int image_limit, DataSource* data_source, JpegInfo* info) { + JpegInfoBuilder info_builder; + info_builder.SetImageLimit(image_limit); + info_builder.SetCaptureSegmentBytes(kJfif); + JpegScanner scanner; + scanner.Run(data_source, &info_builder); + if (scanner.HasError()) { + return false; + } + *info = info_builder.GetInfo(); + return true; +} + +} // namespace + +bool JpegAppleDepthBuilder::Run(DataSource* primary_image_data_source, + DataSource* depth_image_data_source, + DataDestination* data_destination) { + primary_image_data_source_ = primary_image_data_source; + depth_image_data_source_ = depth_image_data_source; + data_destination_ = data_destination; + if (!GetPrimaryImageData()) { + MessageHandler::Get()->ReportMessage(Message::kDecodingError, + "Primary image data"); + return false; + } + if (!GetDepthImageData()) { + MessageHandler::Get()->ReportMessage(Message::kDecodingError, + "Depth image data"); + return false; + } + data_destination->StartTransfer(); + bool status = TransferPrimaryImage(); + if (status) { + status = TransferDepthImage(); + } + data_destination->FinishTransfer(); + return status; +} + +bool JpegAppleDepthBuilder::GetPrimaryImageData() { + JpegInfo info; + if (!GetJpegInfo(1, primary_image_data_source_, &info)) { + return false; + } + if (info.GetImageRanges().empty()) { + return false; + } + primary_image_range_ = info.GetImageRanges()[0]; + JpegSegmentInfo jfif_segment_info = info.GetSegmentInfo(0, kJfif); + if (!jfif_segment_info.IsValid() || + jfif_segment_info.GetBytes().size() < kAmpfLength) { + return false; + } + primary_image_jfif_segment_range_ = jfif_segment_info.GetDataRange(); + primary_image_jfif_segment_bytes_ = jfif_segment_info.GetBytes(); + + JpegSegmentInfo exif_info = info.GetSegmentInfo(0, kExif); + if (!exif_info.IsValid()) { + return false; + } + JpegSegmentInfo mpf_info = info.GetSegmentInfo(0, kMpf); + if (mpf_info.IsValid()) { + primary_image_mpf_segment_range_ = mpf_info.GetDataRange(); + } else { + size_t exif_end = exif_info.GetDataRange().GetEnd(); + primary_image_mpf_segment_range_ = DataRange(exif_end, exif_end); + } + return true; +} + +bool JpegAppleDepthBuilder::GetDepthImageData() { + JpegInfo info; + if (!GetJpegInfo(2, depth_image_data_source_, &info)) { + return false; + } + if (!info.HasAppleDepth()) { + return false; + } + depth_image_range_ = info.GetAppleDepthImageRange(); + return true; +} + +bool JpegAppleDepthBuilder::TransferPrimaryImage() { + // The first move involves all from the start of the data source to the + // mpf location or the beginning of the jfif segment, which ever comes first. + size_t first_end = std::min(primary_image_jfif_segment_range_.GetBegin(), + primary_image_mpf_segment_range_.GetBegin()); + DataRange first_range(0, first_end); + if (!TransferData(primary_image_data_source_, first_range)) { + return false; + } + + // Move the new Jfif segment. If the primary image jfif came right after the + // SOI then the first_end is positioned at the start of the jfif segment. So + // move it to the end so that the original jfif segment does not get copied + // to the output destination. + size_t jfif_length_delta = 0; + if (!TransferNewJfifSegment(&jfif_length_delta)) { + return false; + } + if (first_end == primary_image_jfif_segment_range_.GetBegin()) { + first_end = primary_image_jfif_segment_range_.GetEnd(); + } + + // The second move is from the end of the first move or the end of the jfif + // segment, which ever comes first to the mpf location. + size_t second_begin = + std::min(first_end, primary_image_jfif_segment_range_.GetEnd()); + DataRange second_range(second_begin, + primary_image_mpf_segment_range_.GetBegin()); + if (second_range.IsValid()) { + if (!TransferData(primary_image_data_source_, second_range)) { + return false; + } + } + + // Move the new Mpf segment. + if (!TransferNewMpfSegment(jfif_length_delta)) { + return false; + } + + // The third move is from from the end of the mpf to the end of the image. + DataRange mpf_eoi_range(primary_image_mpf_segment_range_.GetEnd(), + primary_image_range_.GetEnd()); + if (!mpf_eoi_range.IsValid()) { + return false; + } + return TransferData(primary_image_data_source_, mpf_eoi_range); +} + +bool JpegAppleDepthBuilder::TransferNewJfifSegment(size_t* jfif_length_delta) { + *jfif_length_delta = 0; + size_t jfif_size = primary_image_jfif_segment_bytes_.size(); + Byte* jfif_bytes = new Byte[jfif_size + kAmpfLength]; + memcpy(jfif_bytes, primary_image_jfif_segment_bytes_.data(), jfif_size); + if (memcmp(jfif_bytes + jfif_size - kAmpfLength, kAmpf, kAmpfLength) != 0) { + memcpy(jfif_bytes + jfif_size, kAmpf, kAmpfLength); + *jfif_length_delta = kAmpfLength; + jfif_size += kAmpfLength; + size_t jfif_data_length = jfif_size - 2; + jfif_bytes[2] = ((jfif_data_length >> 8) & 0xFF); + jfif_bytes[3] = (jfif_data_length & 0xFF); + } + DataRange jfif_range(0, jfif_size); + auto jfif_segment = DataSegment::Create(jfif_range, jfif_bytes); + DataSegmentDataSource jfif_data_source(jfif_segment); + return TransferData(&jfif_data_source, jfif_range); +} + +bool JpegAppleDepthBuilder::TransferNewMpfSegment(size_t jfif_length_delta) { + size_t primary_image_length = + primary_image_range_.GetLength() + jfif_length_delta - + primary_image_mpf_segment_range_.GetLength() + kMpfSegmentLength; + size_t depth_image_length = depth_image_range_.GetLength(); + size_t depth_image_offset = + primary_image_length - primary_image_mpf_segment_range_.GetBegin() - 8; + vector<ByteData> mpf_bytes; + mpf_bytes.reserve(5); + mpf_bytes.emplace_back(ByteData::kHex, kMpfHex0); + mpf_bytes.emplace_back(ByteData::kHex, + ByteData::Size2BigEndianHex(primary_image_length)); + mpf_bytes.emplace_back(ByteData::kHex, kMpfHex1); + mpf_bytes.emplace_back(ByteData::kHex, + ByteData::Size2BigEndianHex(depth_image_length)); + mpf_bytes.emplace_back(ByteData::kHex, + ByteData::Size2BigEndianHex(depth_image_offset)); + mpf_bytes.emplace_back(ByteData::kHex, kMpfHex2); + ByteBuffer mpf_byte_buffer(mpf_bytes); + size_t mpf_segment_size = mpf_byte_buffer.GetSize(); + if (!mpf_byte_buffer.IsValid() || mpf_segment_size != kMpfSegmentLength) { + return false; + } + DataRange mpf_range(0, mpf_segment_size); + auto mpf_segment = DataSegment::Create(mpf_range, mpf_byte_buffer.Release()); + DataSegmentDataSource mpf_data_source(mpf_segment); + return TransferData(&mpf_data_source, mpf_range); +} + +bool JpegAppleDepthBuilder::TransferDepthImage() { + return TransferData(depth_image_data_source_, depth_image_range_); +} + +bool JpegAppleDepthBuilder::TransferData(DataSource* data_source, + const DataRange& data_range) { + size_t old_byte_count = data_destination_->GetBytesTransferred(); + DataSource::TransferDataResult result = + data_source->TransferData(data_range, kBestDataSize, data_destination_); + if (result == DataSource::kTransferDataSuccess) { + size_t bytes_transferred = + data_destination_->GetBytesTransferred() - old_byte_count; + if (bytes_transferred != data_range.GetLength()) { + result = DataSource::kTransferDataError; + std::stringstream ss; + ss << "JpegAppleDepthBuilder:data source transferred " + << bytes_transferred << " bytes instead of " << data_range.GetLength(); + MessageHandler::Get()->ReportMessage(Message::kInternalError, ss.str()); + } + } + return result == DataSource::kTransferDataSuccess; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_image_extractor.cc b/src/jpeg/jpeg_image_extractor.cc new file mode 100644 index 0000000..82f8fce --- /dev/null +++ b/src/jpeg/jpeg_image_extractor.cc @@ -0,0 +1,110 @@ +#include "image_io/jpeg/jpeg_image_extractor.h" + +#include <sstream> + +#include "image_io/base/data_range_tracking_destination.h" +#include "image_io/base/message_handler.h" +#include "image_io/extras/base64_decoder_data_destination.h" +#include "image_io/jpeg/jpeg_segment.h" +#include "image_io/jpeg/jpeg_xmp_data_extractor.h" + +/// Set this macro to 1 for debug output. +#define PHOTOS_EDITING_FORMATS_IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_DEBUG 0 + +namespace photos_editing_formats { +namespace image_io { + +using std::vector; + +namespace { + +/// The optimim size to use for the DataSource::TransferData() function. +constexpr size_t kBestDataSize = 0x10000; + +} // namespace + +bool JpegImageExtractor::ExtractAppleDepthImage( + DataDestination* image_destination) { + bool succeeded = + ExtractImage(jpeg_info_.GetAppleDepthImageRange(), image_destination); + return jpeg_info_.HasAppleDepth() && succeeded; +} + +bool JpegImageExtractor::ExtractAppleMatteImage( + DataDestination* image_destination) { + bool succeeded = + ExtractImage(jpeg_info_.GetAppleMatteImageRange(), image_destination); + return jpeg_info_.HasAppleMatte() && succeeded; +} + +bool JpegImageExtractor::ExtractImage(const DataRange& image_range, + DataDestination* image_destination) { + DataRangeTrackingDestination data_range_destination(image_destination); + bool has_errors = false; + data_range_destination.StartTransfer(); + if (image_range.IsValid()) { + DataSource::TransferDataResult result = data_source_->TransferData( + image_range, kBestDataSize, &data_range_destination); + if (result == DataSource::kTransferDataError) { + has_errors = true; + } else if (result == DataSource::kTransferDataNone || + data_range_destination.HasDisjointTransferRanges() || + data_range_destination.GetTrackedDataRange() != image_range) { + has_errors = true; + MessageHandler::Get()->ReportMessage(Message::kPrematureEndOfDataError, + ""); + } + } + data_range_destination.FinishTransfer(); + return !has_errors; +} + +bool JpegImageExtractor::ExtractGDepthImage( + DataDestination* image_destination) { + return ExtractImage(JpegXmpInfo::kGDepthInfoType, image_destination); +} + +bool JpegImageExtractor::ExtractGImageImage( + DataDestination* image_destination) { + return ExtractImage(JpegXmpInfo::kGImageInfoType, image_destination); +} + +bool JpegImageExtractor::ExtractImage(JpegXmpInfo::Type xmp_info_type, + DataDestination* image_destination) { + bool has_errors = false; + const bool has_image = jpeg_info_.HasImage(xmp_info_type); + Base64DecoderDataDestination base64_decoder(image_destination); + const vector<DataRange>& data_ranges = + jpeg_info_.GetSegmentDataRanges(xmp_info_type); + size_t data_ranges_count = data_ranges.size(); + JpegXmpDataExtractor xmp_data_extractor(xmp_info_type, data_ranges_count, + &base64_decoder); + xmp_data_extractor.StartTransfer(); + if (has_image) { + for (size_t index = 0; index < data_ranges_count; ++index) { + const DataRange& data_range = data_ranges[index]; + xmp_data_extractor.SetSegmentIndex(index); +#if PHOTOS_EDITING_FORMATS_IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_DEBUG + std::stringstream sstream; + sstream << "Segment " << index << " from " << data_range.GetBegin() + << " to " << data_range.GetEnd(); + MessageHandler::Get()->ReportMessage(Message::kStatus, sstream.str()); +#endif // PHOTOS_EDITING_FORMATS_IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_DEBUG + DataSource::TransferDataResult result = data_source_->TransferData( + data_range, kBestDataSize, &xmp_data_extractor); + if (result == DataSource::kTransferDataError) { + has_errors = true; + break; + } else if (result == DataSource::kTransferDataNone) { + has_errors = true; + MessageHandler::Get()->ReportMessage(Message::kPrematureEndOfDataError, + ""); + } + } + } + xmp_data_extractor.FinishTransfer(); + return has_image && !has_errors; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_info_builder.cc b/src/jpeg/jpeg_info_builder.cc new file mode 100644 index 0000000..29ad8bc --- /dev/null +++ b/src/jpeg/jpeg_info_builder.cc @@ -0,0 +1,232 @@ +#include "image_io/jpeg/jpeg_info_builder.h" + +#include <sstream> +#include <string> + +#include "image_io/base/message_handler.h" +#include "image_io/jpeg/jpeg_marker.h" +#include "image_io/jpeg/jpeg_scanner.h" +#include "image_io/jpeg/jpeg_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::stringstream; +using std::vector; + +JpegInfoBuilder::JpegInfoBuilder() + : image_limit_(std::numeric_limits<int>::max()), image_count_(0), + gdepth_info_builder_(JpegXmpInfo::kGDepthInfoType), + gimage_info_builder_(JpegXmpInfo::kGImageInfoType) {} + +void JpegInfoBuilder::SetCaptureSegmentBytes( + const std::string& segment_info_type) { + capture_segment_bytes_types_.insert(segment_info_type); +} + +void JpegInfoBuilder::Start(JpegScanner* scanner) { + JpegMarker::Flags marker_flags; + marker_flags[JpegMarker::kSOI] = true; + marker_flags[JpegMarker::kEOI] = true; + marker_flags[JpegMarker::kAPP0] = true; + marker_flags[JpegMarker::kAPP1] = true; + marker_flags[JpegMarker::kAPP2] = true; + scanner->UpdateInterestingMarkerFlags(marker_flags); +} + +void JpegInfoBuilder::Process(JpegScanner* scanner, + const JpegSegment& segment) { + // SOI segments are used to track of the number of images in the JPEG file. + // Apple depth images start with a SOI marker, so store its range for later. + JpegMarker marker = segment.GetMarker(); + if (marker.GetType() == JpegMarker::kSOI) { + image_count_++; + image_mpf_count_.push_back(0); + image_xmp_apple_depth_count_.push_back(0); + image_xmp_apple_matte_count_.push_back(0); + most_recent_soi_marker_range_ = + DataRange(segment.GetBegin(), segment.GetBegin() + JpegMarker::kLength); + } else if (marker.GetType() == JpegMarker::kEOI) { + if (most_recent_soi_marker_range_.IsValid()) { + DataRange image_range(most_recent_soi_marker_range_.GetBegin(), + segment.GetBegin() + JpegMarker::kLength); + jpeg_info_.AddImageRange(image_range); + // This image range might represent the Apple depth or matte image if + // other info indicates such an image is in progress and the apple image + // range has not yet been set. + if (HasAppleDepth() && !jpeg_info_.GetAppleDepthImageRange().IsValid()) { + jpeg_info_.SetAppleDepthImageRange(image_range); + } + if (HasAppleMatte() && !jpeg_info_.GetAppleMatteImageRange().IsValid()) { + jpeg_info_.SetAppleMatteImageRange(image_range); + } + if (image_count_ >= image_limit_) { + scanner->SetDone(); + } + } + } else if (marker.GetType() == JpegMarker::kAPP0) { + // APP0/JFIF segments are interesting. + if (image_count_ > 0 && IsJfifSegment(segment)) { + const auto& data_range = segment.GetDataRange(); + JpegSegmentInfo segment_info(image_count_ - 1, data_range, kJfif); + MaybeCaptureSegmentBytes(kJfif, segment, segment_info.GetMutableBytes()); + jpeg_info_.AddSegmentInfo(segment_info); + } + } else if (marker.GetType() == JpegMarker::kAPP2) { + // APP2/MPF segments. JPEG files with Apple depth information have this + // segment in the primary (first) image of the file, but note their presence + // where ever they are found. + if (image_count_ > 0 && IsMpfSegment(segment)) { + ++image_mpf_count_[image_count_ - 1]; + const auto& data_range = segment.GetDataRange(); + JpegSegmentInfo segment_info(image_count_ - 1, data_range, kMpf); + MaybeCaptureSegmentBytes(kMpf, segment, segment_info.GetMutableBytes()); + jpeg_info_.AddSegmentInfo(segment_info); + } + } else if (marker.GetType() == JpegMarker::kAPP1) { + // APP1/XMP segments. Both Apple depth and GDepthV1 image formats have + // APP1/XMP segments with important information in them. There are two types + // of XMP segments, a primary one (that starts with kXmpId) and an extended + // one (that starts with kExtendedXmpId). Apple depth information is only in + // the former, while GDepthV1/GImageV1 information is in both. + if (IsPrimaryXmpSegment(segment)) { + // The primary XMP segment in a non-primary image (i.e., not the first + // image in the file) may contain Apple depth/matte information. + if (image_count_ > 1 && HasId(segment, kXmpAppleDepthId)) { + ++image_xmp_apple_depth_count_[image_count_ - 1]; + } else if (image_count_ > 1 && HasId(segment, kXmpAppleMatteId)) { + ++image_xmp_apple_matte_count_[image_count_ - 1]; + } else if (image_count_ == 1 && (HasId(segment, kXmpGDepthV1Id) || + HasId(segment, kXmpGImageV1Id))) { + // The primary XMP segment in the primary image may contain GDepthV1 + // and/or GImageV1 data. + SetPrimaryXmpGuid(segment); + SetXmpMimeType(segment, JpegXmpInfo::kGDepthInfoType); + SetXmpMimeType(segment, JpegXmpInfo::kGImageInfoType); + } + } else if (image_count_ == 1 && IsExtendedXmpSegment(segment)) { + // The extended XMP segment in the primary image may contain GDepth and/or + // GImage data. + if (HasMatchingExtendedXmpGuid(segment)) { + gdepth_info_builder_.ProcessSegment(segment); + gimage_info_builder_.ProcessSegment(segment); + } + } else if (image_count_ > 0 && IsExifSegment(segment)) { + const auto& data_range = segment.GetDataRange(); + JpegSegmentInfo segment_info(image_count_ - 1, data_range, kExif); + MaybeCaptureSegmentBytes(kExif, segment, segment_info.GetMutableBytes()); + jpeg_info_.AddSegmentInfo(segment_info); + } + } +} + +void JpegInfoBuilder::Finish(JpegScanner* scanner) { + jpeg_info_.SetSegmentDataRanges( + JpegXmpInfo::kGDepthInfoType, + gdepth_info_builder_.GetPropertySegmentRanges()); + jpeg_info_.SetSegmentDataRanges( + JpegXmpInfo::kGImageInfoType, + gimage_info_builder_.GetPropertySegmentRanges()); +} + +bool JpegInfoBuilder::HasAppleDepth() const { + if (image_count_ > 1 && image_mpf_count_[0]) { + for (size_t image = 1; image < image_xmp_apple_depth_count_.size(); + ++image) { + if (image_xmp_apple_depth_count_[image]) { + return true; + } + } + } + return false; +} + +bool JpegInfoBuilder::HasAppleMatte() const { + if (image_count_ > 1 && image_mpf_count_[0]) { + for (size_t image = 1; image < image_xmp_apple_matte_count_.size(); + ++image) { + if (image_xmp_apple_matte_count_[image]) { + return true; + } + } + } + return false; +} + +bool JpegInfoBuilder::IsPrimaryXmpSegment(const JpegSegment& segment) const { + size_t location = segment.GetPayloadDataLocation(); + return segment.BytesAtLocationStartWith(location, kXmpId); +} + +bool JpegInfoBuilder::IsExtendedXmpSegment(const JpegSegment& segment) const { + size_t location = segment.GetPayloadDataLocation(); + return segment.BytesAtLocationStartWith(location, kXmpExtendedId); +} + +bool JpegInfoBuilder::IsMpfSegment(const JpegSegment& segment) const { + size_t payload_data_location = segment.GetPayloadDataLocation(); + return segment.BytesAtLocationStartWith(payload_data_location, kMpf); +} + +bool JpegInfoBuilder::IsExifSegment(const JpegSegment& segment) const { + size_t payload_data_location = segment.GetPayloadDataLocation(); + return segment.BytesAtLocationStartWith(payload_data_location, kExif); +} + +bool JpegInfoBuilder::IsJfifSegment(const JpegSegment& segment) const { + size_t payload_data_location = segment.GetPayloadDataLocation(); + return segment.BytesAtLocationStartWith(payload_data_location, kJfif); +} + +void JpegInfoBuilder::MaybeCaptureSegmentBytes(const std::string& type, + const JpegSegment& segment, + std::vector<Byte>* bytes) const { + if (capture_segment_bytes_types_.count(type) == 0) { + return; + } + bytes->clear(); + bytes->reserve(segment.GetLength()); + size_t segment_begin = segment.GetBegin(); + size_t segment_end = segment.GetEnd(); + for (size_t location = segment_begin; location < segment_end; ++location) { + ValidatedByte validated_byte = segment.GetValidatedByte(location); + if (!validated_byte.is_valid) { + bytes->clear(); + return; + } + bytes->emplace_back(validated_byte.value); + } +} + +bool JpegInfoBuilder::HasMatchingExtendedXmpGuid( + const JpegSegment& segment) const { + if (primary_xmp_guid_.empty()) { + return false; + } + if (segment.GetLength() <= kXmpExtendedHeaderSize) { + return false; + } + size_t start = segment.GetPayloadDataLocation() + sizeof(kXmpExtendedId); + return segment.BytesAtLocationStartWith(start, primary_xmp_guid_.c_str()); +} + +bool JpegInfoBuilder::HasId(const JpegSegment& segment, const char* id) const { + return segment.BytesAtLocationContain(segment.GetPayloadDataLocation(), id); +} + +void JpegInfoBuilder::SetPrimaryXmpGuid(const JpegSegment& segment) { + primary_xmp_guid_ = segment.ExtractXmpPropertyValue( + segment.GetPayloadDataLocation(), kXmpHasExtendedId); +} + +void JpegInfoBuilder::SetXmpMimeType(const JpegSegment& segment, + JpegXmpInfo::Type xmp_info_type) { + string property_name = JpegXmpInfo::GetMimePropertyName(xmp_info_type); + jpeg_info_.SetMimeType(xmp_info_type, segment.ExtractXmpPropertyValue( + segment.GetPayloadDataLocation(), + property_name.c_str())); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_marker.cc b/src/jpeg/jpeg_marker.cc new file mode 100644 index 0000000..1afe5d9 --- /dev/null +++ b/src/jpeg/jpeg_marker.cc @@ -0,0 +1,98 @@ +#include "image_io/jpeg/jpeg_marker.h" + +#include <iomanip> +#include <limits> +#include <sstream> +#include <string> +#include <vector> + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::stringstream; +using std::vector; + +// Storage for class (static) data members. +const size_t JpegMarker::kLength; // = 2; +const size_t JpegMarker::kTypeOffset; // = 1; +const Byte JpegMarker::kZERO; // = 0x00; +const Byte JpegMarker::kStart; // = 0xFF; +const Byte JpegMarker::kSOS; // = 0xDA; +const Byte JpegMarker::kSOI; // = 0xD8; +const Byte JpegMarker::kEOI; // = 0xD9; +const Byte JpegMarker::kAPP0; // = 0xE0; +const Byte JpegMarker::kAPP1; // = 0xE1; +const Byte JpegMarker::kAPP2; // = 0xE2; +const Byte JpegMarker::kFILL; // = 0xFF; + +const std::string JpegMarker::GetName() const { + switch (type_) { + case 0x01: + return "TEM"; + case 0xC4: + return "DHT"; + case 0xC8: + return "JPG"; + case 0xCC: + return "DAC"; + case JpegMarker::kSOI: + return"SOI"; + case JpegMarker::kEOI: + return "EOI"; + case JpegMarker::kSOS: + return "SOS"; + case 0xDB: + return "DQT"; + case 0xDC: + return "DNL"; + case 0xDD: + return "DRI"; + case 0xDE: + return "DHP"; + case 0xDF: + return "EXP"; + case 0xFE: + return "COM"; + } + + stringstream name_stream; + + if (0xC0 <= type_ && type_ <= 0xC0+15) { + name_stream << "SOF" << type_-0xC0; + return name_stream.str(); + } + if (0xD0 <= type_ && type_ <= 0xD0+7) { + name_stream << "RST" << type_-0xD0; + return name_stream.str(); + } + if (JpegMarker::kAPP0 <= type_ && type_ <= JpegMarker::kAPP0+15) { + name_stream << "APP" << type_-JpegMarker::kAPP0; + return name_stream.str(); + } + if (0xF0 <= type_ && type_ <= 0xF0+13) { + name_stream << "JPG" << type_-0xF0; + return name_stream.str(); + } + return GetHexString("0x"); +} + +const std::string JpegMarker::GetHexString(const std::string& prefix) const { + stringstream name_stream; + name_stream << prefix << std::hex << std::uppercase << std::setfill('0') + << std::setw(2) << static_cast<int>(type_); + return name_stream.str(); +} + +bool JpegMarker::HasVariablePayloadSize() const { + return type_ != 0x00 && type_ != 0x01 && (type_ < 0xD0 || type_ > 0xD7) && + type_ != JpegMarker::kSOI && type_ != JpegMarker::kEOI && + type_ != 0xFF; +} + +bool JpegMarker::IsEntropySegmentDelimiter() const { + return (type_ == kSOS || (type_ >= 0xD0 && type_ <= 0xD7)); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_scanner.cc b/src/jpeg/jpeg_scanner.cc new file mode 100644 index 0000000..85426b2 --- /dev/null +++ b/src/jpeg/jpeg_scanner.cc @@ -0,0 +1,120 @@ +#include "image_io/jpeg/jpeg_scanner.h" + +#include <sstream> + +#include "image_io/base/message_handler.h" +#include "image_io/jpeg/jpeg_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::stringstream; + +/// The minimum size for the DataSegments requested from the DataSource. Using +/// this value will guarentee that a JpegSegment will occupy at most two +/// DataSegments. +const size_t kMinBufferDataRequestSize = 0x10000; + +void JpegScanner::Run(DataSource* data_source, + JpegSegmentProcessor* segment_processor) { + if (data_source_) { + // The Run() function is already active. + return; + } + data_source_ = data_source; + segment_processor_ = segment_processor; + current_location_ = 0; + done_ = false; + has_error_ = false; + data_source_->Reset(); + current_segment_ = data_source_->GetDataSegment(current_location_, + kMinBufferDataRequestSize); + segment_processor_->Start(this); + FindAndProcessSegments(); + segment_processor_->Finish(this); + data_source_ = nullptr; + segment_processor_ = nullptr; + current_segment_.reset(); + next_segment_.reset(); +} + +void JpegScanner::FindAndProcessSegments() { + while (!IsDone() && !HasError()) { + size_t begin_segment_location = + current_segment_->Find(current_location_, JpegMarker::kStart); + if (begin_segment_location == current_segment_->GetEnd()) { + GetNextSegment(); + if (next_segment_) { + current_location_ = + std::max(current_location_, next_segment_->GetBegin()); + current_segment_ = next_segment_; + next_segment_.reset(); + continue; + } + SetDone(); + break; + } + size_t payload_size = 0; + JpegMarker marker( + GetByte(begin_segment_location + JpegMarker::kTypeOffset)); + if (marker.IsValid() && !HasError()) { + payload_size = GetPayloadSize(marker, begin_segment_location); + if (marker.IsValid() && interesting_marker_flags_[marker.GetType()]) { + size_t end_segment_location = + begin_segment_location + JpegMarker::kLength + payload_size; + GetByte(end_segment_location - 1); + if (!HasError()) { + JpegSegment segment(begin_segment_location, end_segment_location, + current_segment_.get(), next_segment_.get()); + segment_processor_->Process(this, segment); + } + } + } + current_location_ = + begin_segment_location + JpegMarker::kLength + payload_size; + } +} + +size_t JpegScanner::GetPayloadSize(const JpegMarker& marker, + size_t begin_location) { + if (marker.HasVariablePayloadSize()) { + return (GetByte(begin_location + JpegMarker::kLength) << 8) | + GetByte(begin_location + JpegMarker::kLength + 1); + } else { + return 0; + } +} + +ValidatedByte JpegScanner::GetValidatedByte(size_t location) { + if (current_segment_->Contains(location)) { + return current_segment_->GetValidatedByte(location); + } + GetNextSegment(); + if (next_segment_ && next_segment_->Contains(location)) { + return next_segment_->GetValidatedByte(location); + } + stringstream sstream; + sstream << location; + MessageHandler::Get()->ReportMessage(Message::kPrematureEndOfDataError, + sstream.str()); + return InvalidByte(); +} + +Byte JpegScanner::GetByte(size_t location) { + ValidatedByte validated_byte = GetValidatedByte(location); + if (validated_byte.is_valid) { + return validated_byte.value; + } + has_error_ = true; + return 0; +} + +void JpegScanner::GetNextSegment() { + if (!next_segment_ && current_segment_) { + next_segment_ = data_source_->GetDataSegment(current_segment_->GetEnd(), + kMinBufferDataRequestSize); + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_segment.cc b/src/jpeg/jpeg_segment.cc new file mode 100644 index 0000000..61a9c97 --- /dev/null +++ b/src/jpeg/jpeg_segment.cc @@ -0,0 +1,174 @@ +#include "image_io/jpeg/jpeg_segment.h" + +#include <cctype> +#include <iomanip> +#include <sstream> +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::stringstream; + +/// Finds the character allowing it to be preceded by whitespace characters. +/// @param segment The segment in which to look for the character. +/// @param start_location The location at which to start looking. +/// @param value The character value to look for. +/// @return The location of the character or segment.GetEnd() if not found, +/// of non whitespace characters are found first. +static size_t SkipWhiteSpaceFindChar(const JpegSegment& segment, + size_t start_location, char value) { + for (size_t location = start_location; location < segment.GetEnd(); + ++location) { + ValidatedByte validated_byte = segment.GetValidatedByte(location); + if (!validated_byte.is_valid) { + return segment.GetEnd(); + } + if (validated_byte.value == Byte(value)) { + return location; + } + if (!std::isspace(validated_byte.value)) { + return segment.GetEnd(); + } + } + return segment.GetEnd(); +} + +size_t JpegSegment::GetVariablePayloadSize() const { + if (!GetMarker().HasVariablePayloadSize()) { + return 0; + } + size_t payload_location = GetPayloadLocation(); + ValidatedByte hi = GetValidatedByte(payload_location); + ValidatedByte lo = GetValidatedByte(payload_location + 1); + if (!hi.is_valid || !lo.is_valid) { + return 0; + } + return static_cast<size_t>(hi.value) << 8 | static_cast<size_t>(lo.value); +} + +bool JpegSegment::BytesAtLocationStartWith(size_t location, + const char* str) const { + while (*str && Contains(location)) { + ValidatedByte validated_byte = GetValidatedByte(location++); + if (!validated_byte.is_valid || Byte(*str++) != validated_byte.value) { + return false; + } + } + return *str == 0; +} + +bool JpegSegment::BytesAtLocationContain(size_t location, + const char* str) const { + return Find(location, str) != GetEnd(); +} + +size_t JpegSegment::Find(size_t location, const char* str) const { + Byte byte0 = static_cast<Byte>(*str); + while ((location = Find(location, byte0)) < GetEnd()) { + if (BytesAtLocationStartWith(location, str)) { + return location; + } + ++location; + } + return GetEnd(); +} + +size_t JpegSegment::Find(size_t start_location, Byte value) const { + if (!begin_segment_ && !end_segment_) { + return GetEnd(); + } + size_t value_location = GetEnd(); + if (begin_segment_ && !end_segment_) { + value_location = begin_segment_->Find(start_location, value); + } else { + value_location = + DataSegment::Find(start_location, value, begin_segment_, end_segment_); + } + return Contains(value_location) ? value_location : GetEnd(); +} + +std::string JpegSegment::ExtractXmpPropertyValue( + size_t start_location, const char* property_name) const { + size_t begin_value_location = + FindXmpPropertyValueBegin(start_location, property_name); + if (begin_value_location != GetEnd()) { + size_t end_value_location = FindXmpPropertyValueEnd(begin_value_location); + if (end_value_location != GetEnd()) { + DataRange data_range(begin_value_location, end_value_location); + return ExtractString(data_range); + } + } + return ""; +} + +size_t JpegSegment::FindXmpPropertyValueBegin(size_t start_location, + const char* property_name) const { + size_t property_location = Find(start_location, property_name); + if (property_location != GetEnd()) { + size_t equal_location = SkipWhiteSpaceFindChar( + *this, property_location + strlen(property_name), '='); + if (equal_location != GetEnd()) { + size_t quote_location = + SkipWhiteSpaceFindChar(*this, equal_location + 1, '"'); + if (quote_location != GetEnd()) { + return quote_location + 1; + } + } + } + return GetEnd(); +} + +size_t JpegSegment::FindXmpPropertyValueEnd(size_t start_location) const { + return Find(start_location, Byte('"')); +} + +std::string JpegSegment::ExtractString(const DataRange& data_range) const { + std::string value; + if (Contains(data_range.GetBegin()) && data_range.GetEnd() <= GetEnd()) { + size_t start_location = data_range.GetBegin(); + size_t length = data_range.GetLength(); + value.resize(length, ' '); + for (size_t index = 0; index < length; ++index) { + ValidatedByte validated_byte = GetValidatedByte(start_location + index); + if (!validated_byte.value) { // Invalid bytes have a zero value. + value.resize(0); + break; + } + value[index] = static_cast<char>(validated_byte.value); + } + } + return value; +} + +void JpegSegment::GetPayloadHexDumpStrings(size_t byte_count, + std::string* hex_string, + std::string* ascii_string) const { + stringstream ascii_stream; + stringstream hex_stream; + hex_stream << std::hex << std::uppercase; + + size_t dump_count = GetMarker().IsEntropySegmentDelimiter() + ? byte_count + : std::min(byte_count, GetLength() - 2); + for (size_t index = 0; index < dump_count; ++index) { + ValidatedByte payload_byte = GetValidatedByte(GetPayloadLocation() + index); + if (!payload_byte.is_valid) { + break; + } + Byte value = payload_byte.value; + hex_stream << std::setfill('0') << std::setw(2) << static_cast<int>(value); + ascii_stream << (isprint(value) ? static_cast<char>(value) : '.'); + } + size_t current_count = ascii_stream.str().length(); + for (size_t index = current_count; index < byte_count; ++index) { + hex_stream << " "; + ascii_stream << "."; + } + *hex_string = hex_stream.str(); + *ascii_string = ascii_stream.str(); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_segment_builder.cc b/src/jpeg/jpeg_segment_builder.cc new file mode 100644 index 0000000..aafb525 --- /dev/null +++ b/src/jpeg/jpeg_segment_builder.cc @@ -0,0 +1,160 @@ +#include "image_io/jpeg/jpeg_segment_builder.h" + +#include "image_io/jpeg/jpeg_marker.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::string; + +// The strings needed to build the xml data associated with XMP data. See +// https://wwwimages2.adobe.com/content/dam/acom/en/devnet/xmp/pdfs/ +// XMP%20SDK%20Release%20cc-2016-08/XMPSpecificationPart1.pdf +const char kXmpMetaPrefix[] = "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\">"; +const char kXmpMetaSuffix[] = "</x:xmpmeta>"; +const char kRdfPrefix[] = + "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"" + "xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\">"; +const char kRdfSuffix[] = "</rdf:RDF>"; +const char kRdfDescriptionPrefix[] = "<rdf:Description rdf:about=\"\""; +const char kRdfDescriptionSuffix[] = "/>"; + +bool JpegSegmentBuilder::SetPayloadSize(ByteBuffer* byte_buffer) { + std::uint16_t size = byte_buffer->GetSize(); + if (size == byte_buffer->GetSize() && size >= 4) { + return byte_buffer->SetBigEndianValue(2, size - 2); + } + return false; +} + +string JpegSegmentBuilder::GetByteDataValues() const { + string values; + for (const auto& byte_datum : byte_data_) { + if (!byte_datum.IsValid()) { + return ""; + } + values += byte_datum.GetValue(); + if (byte_datum.GetType() == ByteData::kAscii0) { + values.append(1, 0); + } + } + return values; +} + +void JpegSegmentBuilder::AddMarkerAndSize(Byte marker_type, size_t size) { + JpegMarker marker(marker_type); + string hex_string = marker.GetHexString("FF"); + if (marker.HasVariablePayloadSize()) { + hex_string += ByteData::Byte2Hex((size >> 8) & 0xFF); + hex_string += ByteData::Byte2Hex(size & 0xFF); + } + byte_data_.emplace_back(ByteData::kHex, hex_string); +} + +size_t JpegSegmentBuilder::AddMarkerAndSizePlaceholder(Byte marker_type) { + JpegMarker marker(marker_type); + string hex_string = marker.GetHexString("FF"); + if (marker.HasVariablePayloadSize()) { + hex_string += "0000"; + } + byte_data_.emplace_back(ByteData::kHex, hex_string); + return byte_data_.size() - 1; +} + +bool JpegSegmentBuilder::ReplaceSizePlaceholder(size_t index, size_t size) { + if (index >= byte_data_.size() || size < 2 || size > 0xFFFF) { + return false; + } + const ByteData& byte_datum = byte_data_[index]; + if (byte_datum.GetType() != ByteData::kHex) { + return false; + } + string value = byte_datum.GetValue(); + if (value.length() < 4) { + return false; + } + Byte flag, type; + if (!ByteData::Hex2Byte(value[0], value[1], &flag) || + !ByteData::Hex2Byte(value[2], value[3], &type)) { + return false; + } + JpegMarker marker(type); + if (flag != JpegMarker::kStart || !marker.IsValid() || + !marker.HasVariablePayloadSize()) { + return false; + } + value.replace(2, 2, ByteData::Byte2Hex((size >> 8) & 0xFF)); + value.replace(4, 2, ByteData::Byte2Hex(size & 0xFF)); + byte_data_[index] = ByteData(ByteData::kHex, value); + return true; +} + +void JpegSegmentBuilder::AddExtendedXmpHeader(const std::string& xmp_guid) { + string guid_value(xmp_guid); + guid_value.resize(kXmpGuidSize, '0'); + byte_data_.emplace_back(ByteData::kAscii0, kXmpExtendedId); + byte_data_.emplace_back(ByteData::kAscii, guid_value); + byte_data_.emplace_back(ByteData::kAscii, string(8, '0')); +} + +void JpegSegmentBuilder::AddXmpMetaPrefix() { + byte_data_.emplace_back(ByteData::kAscii, kXmpMetaPrefix); +} + +void JpegSegmentBuilder::AddXmpMetaSuffix() { + byte_data_.emplace_back(ByteData::kAscii, kXmpMetaSuffix); +} + +void JpegSegmentBuilder::AddRdfPrefix() { + byte_data_.emplace_back(ByteData::kAscii, kRdfPrefix); +} + +void JpegSegmentBuilder::AddRdfSuffix() { + byte_data_.emplace_back(ByteData::kAscii, kRdfSuffix); +} + +void JpegSegmentBuilder::AddRdfDescriptionPrefix() { + byte_data_.emplace_back(ByteData::kAscii, kRdfDescriptionPrefix); +} + +void JpegSegmentBuilder::AddRdfDescriptionSuffix() { + byte_data_.emplace_back(ByteData::kAscii, kRdfDescriptionSuffix); +} + +void JpegSegmentBuilder::AddXmpPropertyPrefix( + const std::string& property_name) { + string property_name_equals_quote = property_name + "=\""; + byte_data_.emplace_back(ByteData::kAscii, property_name_equals_quote); +} + +void JpegSegmentBuilder::AddXmpPropertySuffix() { + byte_data_.emplace_back(ByteData::kAscii, "\""); +} + +void JpegSegmentBuilder::AddXmpPropertyNameAndValue( + const std::string& property_name, const std::string& property_value) { + AddXmpPropertyPrefix(property_name); + byte_data_.emplace_back(ByteData::kAscii, property_value); + AddXmpPropertySuffix(); +} + +void JpegSegmentBuilder::AddApp1XmpMarkerAndXmpExtendedHeader( + const std::string& xmp_guid) { + AddMarkerAndSizePlaceholder(JpegMarker::kAPP1); + AddExtendedXmpHeader(xmp_guid); +} + +void JpegSegmentBuilder::AddXmpAndRdfPrefixes() { + AddXmpMetaPrefix(); + AddRdfPrefix(); + AddRdfDescriptionPrefix(); +} + +void JpegSegmentBuilder::AddXmpAndRdfSuffixes() { + AddRdfDescriptionSuffix(); + AddRdfSuffix(); + AddXmpMetaSuffix(); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_segment_lister.cc b/src/jpeg/jpeg_segment_lister.cc new file mode 100644 index 0000000..ffdd3f2 --- /dev/null +++ b/src/jpeg/jpeg_segment_lister.cc @@ -0,0 +1,158 @@ +#include "image_io/jpeg/jpeg_segment_lister.h" + +#include <iomanip> +#include <sstream> +#include <string> + +#include "image_io/jpeg/jpeg_marker.h" +#include "image_io/jpeg/jpeg_scanner.h" +#include "image_io/jpeg/jpeg_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The width of the type column. +constexpr size_t kTypeWidth = 5; + +/// The width of the number columns. +constexpr size_t kNumWidth = 12; + +/// The number of bytes to dump from each segment. +constexpr size_t kDumpCount = 16; + +/// The width of the ascii dump column, including the surrounding [] brackets. +constexpr size_t kAscWidth = kDumpCount + 2; + +/// The width of the hex dump column, including the surrounding [] brackets. +constexpr size_t kHexWidth = 2 * kDumpCount + 2; + +using std::string; +using std::stringstream; + +namespace { + +/// @param value The value to convert to a string. +/// @return The value paraemter as a string of length kNumWidth. +string Size2String(size_t value) { + stringstream stream; + stream << std::setw(kNumWidth) << std::right << value; + return stream.str(); +} + +/// @param value The value to convert to a hex string. +/// @return The value paraemter as a hex string of length kNumWidth. +string Size2HexString(size_t value) { + stringstream stream; + stream << std::hex << std::uppercase << std::setw(kNumWidth) << std::right + << value; + return stream.str(); +} + +/// @param str The string to add brackets to. +/// @return The str value enclosed by square brackets. +string BracketedString(const string& str) { + stringstream stream; + stream << '[' << str << ']'; + return stream.str(); +} + +/// @param str The string to center. +/// @param width The width to center the string in. +/// @return A string with leading/trailing spaces added so that it is centered. +string CenteredString(const string& str, size_t width) { + if (str.length() >= width) { + return str; + } + size_t spacing = width - str.length(); + size_t leading = spacing / 2; + size_t trailing = spacing - leading; + return string(leading, ' ') + str + string(trailing, ' '); +} + +/// @param type The type value of the segment. If this value is empty, then a +/// divider line with dashes is created. +/// @param begin The begin value of the segment. +/// @param count The count (size) of the segment. +/// @param hex_string The hex dump string of the segment. +/// @param asc_string The ascii dump string of the segment. +/// @return A line with the various parameters properly spaced. +string SegmentLine(string type, string begin, string count, string hex_string, + string asc_string) { + if (type.empty()) { + type = string(kTypeWidth, '-'); + begin = count = string(kNumWidth, '-'); + hex_string = string(kHexWidth, '-'); + asc_string = string(kAscWidth, '-'); + } + stringstream line_stream; + line_stream << std::setw(kTypeWidth) << std::left << type << " " + << std::setw(kNumWidth) << std::right << begin << " " + << std::setw(kNumWidth) << std::right << count << " " + << std::setw(kHexWidth) << std::right << hex_string << " " + << std::setw(kAscWidth) << std::right << asc_string; + return line_stream.str(); +} + +/// @param type The type value of the summary. If this value is empty, then a +/// divider line with dashes is created. +/// @param count The number of the segments of the given type. +/// @return A line with the parameters properly spaced. +string SummaryLine(string type, string count) { + if (type.empty()) { + type = string(kTypeWidth, '-'); + count = string(kNumWidth, '-'); + } + stringstream line_stream; + line_stream << std::setw(kTypeWidth) << std::left << type << " " + << std::setw(kNumWidth) << std::right << count; + return line_stream.str(); +} + +} // namespace + +JpegSegmentLister::JpegSegmentLister() + : marker_type_counts_(kJpegMarkerArraySize, 0) {} + +void JpegSegmentLister::Start(JpegScanner* scanner) { + scanner->UpdateInterestingMarkerFlags(JpegMarker::Flags().set()); + string divider_line = SegmentLine("", "", "", "", ""); + lines_.push_back(divider_line); + lines_.push_back(SegmentLine("Type", "Offset", "Payload Size", + CenteredString("Hex Payload", kHexWidth), + CenteredString("Ascii Payload", kAscWidth))); + lines_.push_back(divider_line); +} + +void JpegSegmentLister::Process(JpegScanner* scanner, + const JpegSegment& segment) { + JpegMarker marker = segment.GetMarker(); + string hex_payload, ascii_payload; + ++marker_type_counts_[marker.GetType()]; + segment.GetPayloadHexDumpStrings(kDumpCount, &hex_payload, &ascii_payload); + lines_.push_back(SegmentLine( + marker.GetName(), Size2HexString(segment.GetBegin()), + Size2HexString(segment.GetEnd() - segment.GetBegin() - 2), + BracketedString(hex_payload), BracketedString(ascii_payload))); +} + +void JpegSegmentLister::Finish(JpegScanner* scanner) { + lines_.push_back(""); + string divider_line = SummaryLine("", ""); + lines_.push_back(divider_line); + lines_.push_back(SummaryLine("Type", "Count")); + lines_.push_back(divider_line); + int total_segments = 0; + for (int type = 0; type < kJpegMarkerArraySize; ++type) { + int count = marker_type_counts_[type]; + if (count) { + total_segments += count; + lines_.push_back( + SummaryLine(JpegMarker(type).GetName(), Size2String(count))); + } + } + lines_.push_back(divider_line); + lines_.push_back(SummaryLine("TOTAL", Size2String(total_segments))); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_xmp_data_extractor.cc b/src/jpeg/jpeg_xmp_data_extractor.cc new file mode 100644 index 0000000..f59dea5 --- /dev/null +++ b/src/jpeg/jpeg_xmp_data_extractor.cc @@ -0,0 +1,103 @@ +#include "image_io/jpeg/jpeg_xmp_data_extractor.h" + +#include <iomanip> +#include <sstream> +#include <string> + +#include "image_io/base/message_handler.h" +#include "image_io/jpeg/jpeg_marker.h" +#include "image_io/jpeg/jpeg_segment.h" + +/// Set this flag to 1 for debugging output. +#define PHOTOS_EDITING_FORMATS_IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_DEBUG 0 + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::stringstream; + +void JpegXmpDataExtractor::StartTransfer() { + data_destination_->StartTransfer(); +} + +DataDestination::TransferStatus JpegXmpDataExtractor::Transfer( + const DataRange& transfer_range, const DataSegment& data_segment) { + if (HasError()) { + return kTransferError; + } +#if PHOTOS_EDITING_FORMATS_IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_DEBUG + stringstream sstream1; + sstream1 << "Segment " << segment_index_ << " of " << last_segment_index_ + << " - data range from " << transfer_range.GetBegin() << " to " + << transfer_range.GetEnd(); + MessageHandler::Get()->ReportMessage(Message::kStatus, sstream1.str()); +#endif // PHOTOS_EDITING_FORMATS_IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_DEBUG + const size_t xmp_header_length = JpegMarker::kLength + + JpegSegment::kVariablePayloadDataOffset + + kXmpExtendedHeaderSize; + size_t encoded_data_begin = transfer_range.GetBegin() + xmp_header_length; + size_t xmp_data_begin = encoded_data_begin; + size_t xmp_data_end = transfer_range.GetEnd(); + if (segment_index_ == 0) { + string property_name = JpegXmpInfo::GetDataPropertyName(xmp_info_type_); + size_t gdepth_data_location = data_segment.Find( + encoded_data_begin, property_name.c_str(), property_name.length()); + if (gdepth_data_location != transfer_range.GetEnd()) { + size_t quote_location = data_segment.Find(gdepth_data_location, '"'); + if (quote_location != transfer_range.GetEnd()) { + xmp_data_begin = quote_location + 1; + } + } + if (xmp_data_begin == encoded_data_begin) { + MessageHandler::Get()->ReportMessage(Message::kStringNotFoundError, + property_name + "=\""); + has_error_ = true; + return kTransferError; + } + } + if (segment_index_ == last_segment_index_) { + xmp_data_end = data_segment.Find(xmp_data_begin, '"'); + if (xmp_data_end == transfer_range.GetEnd()) { + MessageHandler::Get()->ReportMessage(Message::kStringNotFoundError, "\""); + has_error_ = true; + return kTransferError; + } + } + + DataRange xmp_data_range(xmp_data_begin, xmp_data_end); +#if PHOTOS_EDITING_FORMATS_IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_DEBUG + string strb((const char*)data_segment.GetBuffer(xmp_data_range.GetBegin()), + 50); + string stre((const char*)data_segment.GetBuffer(xmp_data_end - 50), 50); + stringstream sstream2; + sstream2 << " " << xmp_data_begin << ":" << xmp_data_end << " = " + << xmp_data_range.GetLength() << " bytes: [" << strb << "..." << stre + << "] - "; + MessageHandler::Get()->ReportMessage(Message::kStatus, sstream2.str()); + for (size_t i = transfer_range.GetBegin(); i < data_segment.GetEnd(); + i += 32) { + stringstream hex_stream, ascii_stream; + hex_stream << std::hex << std::setfill('0') << std::setw(2) + << std::uppercase; + for (size_t j = 0; j < 32 && (i + j) < data_segment.GetEnd(); ++j) { + Byte value = data_segment.GetValidatedByte(i + j).value; + hex_stream << " " << size_t(value); + ascii_stream << (isprint(value) ? static_cast<char>(value) : '.'); + } + stringstream sstream3; + sstream3 << " * " << std::hex << std::setfill('0') << std::setw(8) + << std::uppercase << i; + sstream3 << ":" << hex_stream.str() << " [" << ascii_stream.str() << "]"; + MessageHandler::Get()->ReportMessage(Message::kStatus, sstream3.str()); + } +#endif // PHOTOS_EDITING_FORMATS_IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_DEBUG + return data_destination_->Transfer(xmp_data_range, data_segment); +} + +void JpegXmpDataExtractor::FinishTransfer() { + data_destination_->FinishTransfer(); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_xmp_info.cc b/src/jpeg/jpeg_xmp_info.cc new file mode 100644 index 0000000..e6ad8c6 --- /dev/null +++ b/src/jpeg/jpeg_xmp_info.cc @@ -0,0 +1,48 @@ +#include "image_io/jpeg/jpeg_xmp_info.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::vector; + +const char kGDepthDataPropertyName[] = "GDepth:Data"; +const char kGImageDataPropertyName[] = "GImage:Data"; +const char kGDepthMimePropertyName[] = "GDepth:Mime"; +const char kGImageMimePropertyName[] = "GImage:Mime"; + +void JpegXmpInfo::InitializeVector(vector<JpegXmpInfo>* xmp_info_vector) { + xmp_info_vector->clear(); + xmp_info_vector->push_back(JpegXmpInfo(JpegXmpInfo::kGDepthInfoType)); + xmp_info_vector->push_back(JpegXmpInfo(JpegXmpInfo::kGImageInfoType)); +} + +string JpegXmpInfo::GetIdentifier(Type jpeg_xmp_info_type) { + switch (jpeg_xmp_info_type) { + case kGDepthInfoType: + return kXmpGDepthV1Id; + case kGImageInfoType: + return kXmpGImageV1Id; + } +} + +string JpegXmpInfo::GetDataPropertyName(Type jpeg_xmp_info_type) { + switch (jpeg_xmp_info_type) { + case kGDepthInfoType: + return kGDepthDataPropertyName; + case kGImageInfoType: + return kGImageDataPropertyName; + } +} + +string JpegXmpInfo::GetMimePropertyName(Type jpeg_xmp_info_type) { + switch (jpeg_xmp_info_type) { + case kGDepthInfoType: + return kGDepthMimePropertyName; + case kGImageInfoType: + return kGImageMimePropertyName; + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/jpeg/jpeg_xmp_info_builder.cc b/src/jpeg/jpeg_xmp_info_builder.cc new file mode 100644 index 0000000..c899f93 --- /dev/null +++ b/src/jpeg/jpeg_xmp_info_builder.cc @@ -0,0 +1,42 @@ +#include "image_io/jpeg/jpeg_xmp_info_builder.h" + +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +void JpegXmpInfoBuilder::ProcessSegment(const JpegSegment& segment) { + // If the property has not yet been found, look for it, and if found, add the + // segment's range to the vector of ranges. + size_t extended_xmp_data_begin = + segment.GetPayloadDataLocation() + kXmpExtendedHeaderSize; + size_t property_value_begin = extended_xmp_data_begin; + if (property_segment_ranges_.empty()) { + std::string property_name = + JpegXmpInfo::GetDataPropertyName(xmp_info_type_); + property_value_begin = segment.FindXmpPropertyValueBegin( + extended_xmp_data_begin, property_name.c_str()); + if (property_value_begin != segment.GetEnd()) { + property_segment_ranges_.push_back(segment.GetDataRange()); + } + } else if (!property_end_segment_range_.IsValid()) { + // The start of the property value was encountered in a previous segment - + // if the closing quote has not yet been found, then add the segment's range + // to the vector or ranges. + property_segment_ranges_.push_back(segment.GetDataRange()); + } + + // If the start of the property value has been found but the end has not, look + // for the end in this segment. + if (!property_segment_ranges_.empty() && + !property_end_segment_range_.IsValid()) { + size_t property_value_end = + segment.FindXmpPropertyValueEnd(property_value_begin); + if (property_value_end != segment.GetEnd()) { + property_end_segment_range_ = segment.GetDataRange(); + } + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/utils/file_utils.cc b/src/utils/file_utils.cc new file mode 100644 index 0000000..d61a2cd --- /dev/null +++ b/src/utils/file_utils.cc @@ -0,0 +1,85 @@ +#include "image_io/utils/file_utils.h" + +#include <sys/stat.h> +#import <fstream> +#import <iostream> +#import <memory> + +#include "image_io/base/data_range.h" +#include "image_io/base/message_handler.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::fstream; +using std::istream; +using std::ostream; +using std::unique_ptr; + +bool GetFileSize(const std::string& file_name, size_t* size) { + struct stat stat_buf; + if (stat(file_name.c_str(), &stat_buf)) { + return false; + } else { + if (size) { + *size = stat_buf.st_size; + } + return true; + } +} + +unique_ptr<ostream> OpenOutputFile(const std::string& file_name, + ReportErrorPolicy report_error_policy) { + auto* file_stream = new fstream(file_name, std::ios::out | std::ios::binary); + if (file_stream && !file_stream->is_open()) { + delete file_stream; + file_stream = nullptr; + if (report_error_policy == ReportErrorPolicy::kReportError) { + MessageHandler::Get()->ReportMessage(Message::kStdLibError, file_name); + } + } + return unique_ptr<ostream>(file_stream); +} + +unique_ptr<istream> OpenInputFile(const std::string& file_name, + ReportErrorPolicy report_error_policy) { + auto* file_stream = new fstream(file_name, std::ios::in | std::ios::binary); + if (file_stream && !file_stream->is_open()) { + delete file_stream; + file_stream = nullptr; + if (report_error_policy == ReportErrorPolicy::kReportError) { + MessageHandler::Get()->ReportMessage(Message::kStdLibError, file_name); + } + } + return unique_ptr<istream>(file_stream); +} + +std::shared_ptr<DataSegment> ReadEntireFile( + const std::string& file_name, ReportErrorPolicy report_error_policy) { + size_t buffer_size = 0; + std::shared_ptr<DataSegment> shared_data_segment; + if (GetFileSize(file_name, &buffer_size)) { + unique_ptr<istream> shared_istream = + OpenInputFile(file_name, ReportErrorPolicy::kDontReportError); + if (shared_istream) { + Byte* buffer = new Byte[buffer_size]; + if (buffer) { + shared_data_segment = + DataSegment::Create(DataRange(0, buffer_size), buffer); + shared_istream->read(reinterpret_cast<char*>(buffer), buffer_size); + size_t bytes_read = shared_istream->tellg(); + if (bytes_read != buffer_size) { + shared_data_segment.reset(); + } + } + } + } + if (!shared_data_segment && + report_error_policy == ReportErrorPolicy::kReportError) { + MessageHandler::Get()->ReportMessage(Message::kStdLibError, file_name); + } + return shared_data_segment; +} + +} // namespace image_io +} // namespace photos_editing_formats |