diff options
author | Eino-Ville Talvala <etalvala@google.com> | 2018-11-15 16:07:46 -0800 |
---|---|---|
committer | Eino-Ville Talvala <etalvala@google.com> | 2018-11-15 16:07:46 -0800 |
commit | 2d6d3250dcb304c8ad081dedc8eef6ea48fd669d (patch) | |
tree | 68cc8d5a9bf5a558f46025d740c47cb292eea9f0 /includes/image_io | |
parent | 840fc3b66a9e6593d542ada6fe14d91107fab98d (diff) | |
download | image_io-2d6d3250dcb304c8ad081dedc8eef6ea48fd669d.tar.gz |
Initial commit of libimage_io
Image_io is a library for manipulating image files, especially XMP
metadata within them.
Test: m libimage_io
Bug: 109735087
Bug: 119211681
Change-Id: I657f307be0459fe40154806c7cd388b97bcb0ea5
Diffstat (limited to 'includes/image_io')
51 files changed, 4123 insertions, 0 deletions
diff --git a/includes/image_io/base/byte_buffer.h b/includes/image_io/base/byte_buffer.h new file mode 100644 index 0000000..77a55bc --- /dev/null +++ b/includes/image_io/base/byte_buffer.h @@ -0,0 +1,57 @@ +#ifndef IMAGE_IO_BASE_BYTE_BUFFER_H_ // NOLINT +#define IMAGE_IO_BASE_BYTE_BUFFER_H_ // NOLINT + +#include <memory> +#include <vector> + +#include "image_io/base/byte_data.h" + +namespace photos_editing_formats { +namespace image_io { + +/// This class provides a means to allocate and fill a Byte buffer with the +/// data specified in a vector of ByteData objects, and then to release that +/// buffer to be used in a DataSegment. This is used for testing purposes +/// initially, but has applicability for use in the image_io itself. +class ByteBuffer { + public: + /// Constructs a ByteBuffer using a previously allocated buffer. + /// @param size The size of the buffer. + /// @param buffer The previously allocated buffer + ByteBuffer(size_t size, std::unique_ptr<Byte[]> buffer); + + /// Constructs a ByteBuffer using the vector of byte data. + /// @param byte_data_vector The data to used to define the length and value of + /// the buffer. If any ByteData in the vector is of kHex type, and it + /// contains invalid hex digits, the size value will be set to 0, + /// resulting in a ByteBuffer the IsValid() function of which will return + /// false. + explicit ByteBuffer(const std::vector<ByteData>& byte_data_vector); + + /// @return Whether the byte buffer is valid. + bool IsValid() const { return size_ > 0; } + + /// @return The size of the byte buffer. + size_t GetSize() const { return size_; } + + /// @param location The location in the byte buffer to set. + /// @param value The two-byte value. + /// @return Whether the value was set successfully. + bool SetBigEndianValue(size_t location, std::uint16_t value); + + /// Releases the buffer to the caller and sets this ByteBuffer object to an + /// invalid state. That is, after this call IsValid() will return false, and + /// GetSize() will return 0. + /// @return The buffer pointer or nullptr if the ByteBuffer was invalid. The + /// caller is responsible for deleting the buffer when done. + Byte* Release(); + + private: + std::unique_ptr<Byte[]> buffer_; + size_t size_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_BYTE_BUFFER_H_ // NOLINT diff --git a/includes/image_io/base/byte_data.h b/includes/image_io/base/byte_data.h new file mode 100644 index 0000000..7bfc97e --- /dev/null +++ b/includes/image_io/base/byte_data.h @@ -0,0 +1,137 @@ +#ifndef IMAGE_IO_BASE_BYTE_DATA_H_ // NOLINT +#define IMAGE_IO_BASE_BYTE_DATA_H_ // NOLINT + +#include <cctype> +#include <string> + +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A string representation of byte data destined to be added to a ByteBuffer, +/// and thence defining a portion of a DataSegment. +class ByteData { + public: + /// The type of data represented in the string value. + enum Type { + /// The string value contains hex digits. + kHex, + + /// The string value contains ascii text. When adding the string to + /// a ByteBuffer, do not add the terminating null character. + kAscii, + + /// The string value contains ascii text. When adding the string to + /// a ByteBuffer, add the terminating null character as well. + kAscii0 + }; + + /// @param type The type of byte data + /// @param value The string value of the byte data. + ByteData(Type type, const std::string& value) : type_(type), value_(value) {} + + /// @return The type of byte data. + Type GetType() const { return type_; } + + /// @return The string value of the byte data. + const std::string& GetValue() const { return value_; } + + /// @return Whether the byte data string value has a valid length and is made + /// up of a valid set of characters. + bool IsValid() const { return IsValidLength() && HasValidCharacters(); } + + /// @return Whether the byte data string value has a valid length. The kAscii + /// and kAscii0 type values have no restrictions, but the kHex type values + /// must have an even number of characters (zero length is ok). + bool IsValidLength() const { + return type_ != kHex || ((value_.length() % 2) == 0u); + } + + /// @return Whether the byte data string value is made up of valid characters. + /// The kAscii and kAscii0 type values have no restrictions, but the kHex + /// type values can only have these characters: [0-9][a-f][A-F] + bool HasValidCharacters() const { + if (type_ != kHex) { + return true; + } + for (const auto& chr : value_) { + if (!isxdigit(chr)) { + return false; + } + } + return true; + } + + /// @return The number of bytes this data requires when converted to Bytes, + /// or 0 if the byte data is invalid. + size_t GetByteCount() const { + if (!IsValid()) { + return 0; + } else if (type_ == kHex) { + return value_.length() / 2; + } else if (type_ == kAscii) { + return value_.length(); + } else { + return value_.length() + 1; + } + } + + /// @param hex_digit The hex character to convert to its decimal equivalent. + /// @return The decimal equivalent of the hex_digit, or -1 if the character is + /// not a valid hex digit. + static int Hex2Decimal(char hex_digit) { + if (hex_digit >= '0' && hex_digit <= '9') { + return static_cast<int>(hex_digit - '0'); + } else if (hex_digit >= 'a' && hex_digit <= 'f') { + return static_cast<int>(hex_digit - 'a' + 10); + } else if (hex_digit >= 'A' && hex_digit <= 'F') { + return static_cast<int>(hex_digit - 'A' + 10); + } else { + return -1; + } + } + + /// @param hi_char The hi-order nibble of the byte. + /// @param hi_char The lo-order nibble of the byte. + /// @param value The pointer to the Byte to receive the value. + /// @return Whether the conversion was successful. + static bool Hex2Byte(char hi_char, char lo_char, Byte* value) { + int hi = Hex2Decimal(hi_char); + int lo = Hex2Decimal(lo_char); + if (hi < 0 || lo < 0 || value == nullptr) { + return false; + } + *value = ((hi << 4) | lo); + return true; + } + + /// @param value The byte value to convert to a two digit hex string. + /// @return The hex string equivalent of the value. + static std::string Byte2Hex(Byte value) { + const char kHexChars[] = "0123456789ABCDEF"; + std::string str(2, ' '); + str[0] = kHexChars[(value >> 4) & 0xF]; + str[1] = kHexChars[value & 0xF]; + return str; + } + + /// @param value The size_t value to convert to an eight digit hex string. + /// @return The big endian hex string equivalent of the value. + static std::string Size2BigEndianHex(size_t value) { + std::string hex_string = Byte2Hex((value >> 24) & 0xFF); + hex_string += Byte2Hex((value >> 16) & 0xFF); + hex_string += Byte2Hex((value >> 8) & 0xFF); + hex_string += Byte2Hex(value & 0xFF); + return hex_string; + } + + private: + Type type_; + std::string value_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_BYTE_DATA_H_ // NOLINT diff --git a/includes/image_io/base/cout_message_writer.h b/includes/image_io/base/cout_message_writer.h new file mode 100644 index 0000000..a124ff6 --- /dev/null +++ b/includes/image_io/base/cout_message_writer.h @@ -0,0 +1,22 @@ +#ifndef IMAGE_IO_BASE_COUT_MESSAGE_WRITER_H_ // NOLINT +#define IMAGE_IO_BASE_COUT_MESSAGE_WRITER_H_ // NOLINT + +#include <iostream> + +#include "image_io/base/message_writer.h" + +namespace photos_editing_formats { +namespace image_io { + +/// This subclass of MessageWriter writes messages to cout. +class CoutMessageWriter : public MessageWriter { + public: + void WriteMessage(const Message& message) override { + std::cout << GetFormattedMessage(message) << std::endl; + } +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_COUT_MESSAGE_WRITER_H_ // NOLINT diff --git a/includes/image_io/base/data_context.h b/includes/image_io/base/data_context.h new file mode 100644 index 0000000..bef5c98 --- /dev/null +++ b/includes/image_io/base/data_context.h @@ -0,0 +1,143 @@ +#ifndef IMAGE_IO_BASE_DATA_CONTEXT_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_CONTEXT_H_ // NOLINT + +#include <list> +#include <string> + +#include "image_io/base/data_line_map.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class to represent a position in a textual subrange of a DataSegment, and +/// a means to create an usable error message that shows the relevant line +/// number and line text and the location as a "caret" position. The class also +/// provides a list of names that can be used to add context to the errors. +class DataContext { + public: + /// @param location A location in the data segment. + /// @param range A subrange of the data segment's range. + /// @param data_line_map A map for obtaining the line number and range given + /// the location. + DataContext(size_t location, const DataRange& range, + const DataSegment& segment, const DataLineMap& data_line_map) + : location_(location), + range_(range), + segment_(segment), + line_info_map_(data_line_map) {} + + /// @return The location of the context. + size_t GetLocation() const { return location_; } + + /// @param location A new value to use to set the location of the context. + void SetLocation(size_t location) { location_ = location; } + + /// @param delta A delta value that is added to the location of the context. + /// @return The new location of the context. + size_t IncrementLocation(size_t delta) { + location_ += delta; + return location_; + } + + /// @return The range of the data segment defined by this context. + const DataRange& GetRange() const { return range_; } + + /// @param range Sets a new range to use for this context. + void SetRange(const DataRange& range) { range_ = range; } + + /// @return The data segment of this context. + const DataSegment& GetSegment() const { return segment_; } + + /// @return The line info map of this context. + const DataLineMap& GetDataLineMap() const { return line_info_map_; } + + /// @return Whether the context's location and range are valid for use with + /// the data segment's range. + bool IsValidLocationAndRange() const { + return range_.IsValid() && range_.Contains(location_) && + segment_.GetDataRange().Contains(range_); + } + + /// @return A pointer to the data segment's buffer, cast as a const char* type + /// pointer, or nullptr if the location and/or range are invalid. + const char* GetCharBytes() const { + return IsValidLocationAndRange() + ? reinterpret_cast<const char*>(segment_.GetBuffer(location_)) + : nullptr; + } + + /// @return The number of bytes available from the location of the context to + /// the end of the context's range, or 0 if the location and/or range are + /// invalid. + size_t GetBytesAvailable() const { + return IsValidLocationAndRange() ? range_.GetEnd() - location_ : 0; + } + + /// @return The context's name list that is used when creating error messages. + std::list<std::string>& GetNameList() { return name_list_; } + + /// @return The context's name list that is used when creating error messages. + const std::list<std::string>& GetNameList() const { return name_list_; } + + /// @return An error message that describes the location/range data segment + /// range that leads to the IsValidLocationRange() function returning false. + /// Great to user for internal error messages. + std::string GetInvalidLocationAndRangeErrorText() const; + + /// @return An error message with the given descriptions for the error and the + /// expectation. See the other GetErrorText() function documentation for more + /// details on the format of the error messsage. + std::string GetErrorText(const std::string& error_description, + const std::string& expectation_description) const; + + /// @return An error message with the given descriptions for the error and the + /// expectation. The format of the error message is: + /// error_description + /// - prefix_name_list:name_list:postfix_name_list: + /// - at line:number:line_contents + /// - ^expected:expectation_description + /// If error_description is empty then the first line containing it is not + /// written. If expectation_description is empty, then the expected:... part + /// of the last line is not written. If the context's name list, and the + /// pre/postfix name lists are all empty, then that line is not written. + std::string GetErrorText(const std::list<std::string>& prefix_name_list, + const std::list<std::string>& postfix_name_list, + const std::string& error_description, + const std::string& expectation_description) const; + + private: + /// @return The string with the contents of the prefix_name_list, name_list_ + /// and the postfix namelist concatenated with a ":" separator. + std::string GetNamesString( + const std::list<std::string>& prefix_name_list, + const std::list<std::string>& postfix_name_list) const; + + /// @return The line number string of the form line:XX, where XX is the data + /// line's number or "?" if the nmber is zero. + std::string GetLineNumberString(const DataLine& data_line) const; + + /// Gets the clipped and line ranges using the data line's range value. + void GetClippedAndLineRange(const DataLine& data_line, + DataRange* clipped_range, + DataRange* line_range) const; + + /// Gets the line string using the clipped and line ranges and updates the + /// number of spaces before the caret depending on the contents of the line. + std::string GetLineString(const DataRange& clipped_range, + const DataRange& line_range, + size_t* spaces_before_caret) const; + + /// See the constructor for documentation on the data members. + size_t location_; + DataRange range_; + const DataSegment& segment_; + const DataLineMap& line_info_map_; + std::list<std::string> name_list_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_CONTEXT_H_ // NOLINT diff --git a/includes/image_io/base/data_destination.h b/includes/image_io/base/data_destination.h new file mode 100644 index 0000000..e3c7466 --- /dev/null +++ b/includes/image_io/base/data_destination.h @@ -0,0 +1,74 @@ +#ifndef IMAGE_IO_BASE_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_DESTINATION_H_ // NOLINT + +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// DataDestination is the abstract base class for implementations that can +/// efficiently move data from one location and/or form to another. In such +/// a transfer, the StartTransfer() and FinishTransfer() functions are always +/// called, and in between the Transfer() function may be called zero or more +/// times. See the DataSource class to see how to initiate a transfer operation. +class DataDestination { + public: + /// These values indicate what should be done after a DataSource calls a + /// DataDestination's Transfer() function. + enum TransferStatus { + /// An error occurred in the transfer process. DataSource's TransferData() + /// function should stop calling DataDestination's Transfer() function, and + /// return to its caller. + kTransferError, + + /// The transfer was successful. DataSource's TransferData() function can + /// keep calling DataDestination's Transfer() of needed, or if not, + /// return to its caller. + kTransferOk, + + /// The transfer was successful and the DataDestination has decided that + /// it has enough data. DataSource's TransferData() function should stop + /// calling DataDestination's Transfer() function and return to its caller. + kTransferDone + }; + + virtual ~DataDestination() = default; + + /// This function is called prior to the first call to the Transfer() function + /// to allow implementation subclasses a chance to initialize their data + /// members for the transfer process. If a data destination sends its bytes + /// to another data destination, this function must call its StartTransfer() + /// function. + virtual void StartTransfer() = 0; + + /// This function is called to transfer a portion or all of the data in the + /// data segment from the caller to wherever the receiver needs it to go. + /// @param transfer_range The portion of the data in the data_segment that is + /// to be transferred. + /// @param data_segment The data, some or all of which is to be transferred. + /// @return A transfer status value indicating what should be done next. + virtual TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) = 0; + + /// This function is called after the final call to the Transfer() function to + /// allow implementation subclasses a chance to finalize their transfer + /// operations. If a data destination sends its bytes to another data + /// destination, this function must call its FinishTransfer() function. + virtual void FinishTransfer() = 0; + + /// @return The number of bytes written to the data destination. There is some + /// flexibility in the actual value returned. Most "end-point" destination + /// subclasses return the actual number of bytes received/written. Other + /// "mid-point" destinations are allowed to return the value from the next + /// destination in the chain, or the actual number of bytes they are asked + /// to transfer via the transfer_range parameter of the Transfer() + /// function. + virtual size_t GetBytesTransferred() const = 0; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/base/data_line_map.h b/includes/image_io/base/data_line_map.h new file mode 100644 index 0000000..d934410 --- /dev/null +++ b/includes/image_io/base/data_line_map.h @@ -0,0 +1,55 @@ +#ifndef IMAGE_IO_BASE_DATA_LINE_MAP_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_LINE_MAP_H_ // NOLINT + +#include <vector> + +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The line number and range of a text line in a data source. The range does +/// not include the terminating new line. Valid line numbers are greater than 0. +struct DataLine { + DataLine() : number(0) {} + DataLine(size_t a_number, const DataRange& a_range) + : number(a_number), range(a_range) {} + size_t number; + DataRange range; +}; + +/// A class that maps a data source location to a data line structure that has +/// the line number and data range of the line. +class DataLineMap { + public: + DataLineMap() : last_line_incomplete_(false) {} + + /// Returns the number of data lines in the map. + size_t GetDataLineCount() const; + + /// Returns the data line assocated with the location, or one the number of + /// which is zero and the range of which is invalid. + DataLine GetDataLine(size_t location) const; + + /// Finds the next set of data line numbers and ranges in the segment and adds + /// them to the map. If the map is empty, the line numbers will start at 1; + /// otherwise the numbering of the new lines will start at the next line + /// number indicated in the map. + void FindDataLines(const DataRange& range, const DataSegment& segment); + + /// Clears the map and returns it to its startup state. + void Clear(); + + private: + /// The data lines in the map, sorted by ascending range.GetBegin() value. + std::vector<DataLine> data_lines_; + + /// Whether the last data line in the vector is complete (ended in a newline). + bool last_line_incomplete_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_ BASE_DATA_LINE_MAP_H_ // NOLINT diff --git a/includes/image_io/base/data_match_result.h b/includes/image_io/base/data_match_result.h new file mode 100644 index 0000000..3bde081 --- /dev/null +++ b/includes/image_io/base/data_match_result.h @@ -0,0 +1,127 @@ +#ifndef IMAGE_IO_BASE_DATA_MATCH_RESULT_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_MATCH_RESULT_H_ // NOLINT + +#include "image_io/base/message.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The result of a some sort of match operation of the text in a data segment. +/// The data associated with a match result include the number of bytes +/// consumed to produce the result, type of match, and in the case of an error +/// an optional Message describing the error. +class DataMatchResult { + public: + /// The type of match. + enum Type { + /// An error occurred while performing the match operation. + kError = -1, + + /// No match was found. + kNone = 0, + + /// A partial match of some sort was found. + kPartial = 1, + + /// A partial match was found, but the end of the data in the segment or + /// the available range was found. + kPartialOutOfData = 2, + + /// A full match was found. + kFull = 3, + }; + + DataMatchResult() : DataMatchResult(kNone, 0) {} + explicit DataMatchResult(Type type) : DataMatchResult(type, 0) {} + DataMatchResult(Type type, size_t bytes_consumed) + : message_(Message::kStatus, 0, ""), + bytes_consumed_(bytes_consumed), + type_(type), + has_message_(false), + can_continue_(true) {} + + /// @return The type of the match result. + Type GetType() const { return type_; } + + /// @return Whether the result indicates processing can continue. + bool CanContinue() const { return can_continue_; } + + /// @return Whether the match result has a message associated with it. + bool HasMessage() const { return has_message_; } + + /// @return The message associated with the result. + const Message& GetMessage() const { return message_; } + + /// @return The number of bytes consumed to produce the result. + size_t GetBytesConsumed() const { return bytes_consumed_; } + + /// @param delta The byte count to increase the bytes consumed value with. + size_t IncrementBytesConsumed(size_t delta) { + bytes_consumed_ += delta; + return bytes_consumed_; + } + + /// @param type The type to use for this match result. + /// @return A reference to this match result. + DataMatchResult& SetType(Type type) { + type_ = type; + return *this; + } + + /// Sets the flag that indicates whether processing can continue. + /// @param can_continue The new value for the can_continue_ flag. + DataMatchResult& SetCanContinue(bool can_continue) { + can_continue_ = can_continue; + return *this; + } + + /// @param bytes_consumed The byte count to use for this match result. + /// @return A reference to this match result. + DataMatchResult& SetBytesConsumed(size_t bytes_consumed) { + bytes_consumed_ = bytes_consumed; + return *this; + } + + /// @param message The message to use for this match result. + /// @return A reference to this match result. + DataMatchResult& SetMessage(const Message& message) { + message_ = message; + has_message_ = true; + return *this; + } + + /// @param type The message type to use for this match result. + /// @param text The message text to use for this match result. + /// @return A reference to this match result. + DataMatchResult& SetMessage(const Message::Type type, + const std::string& text) { + return SetMessage(Message(type, 0, text)); + } + + /// @param other The other result to test for equality with this one. + /// @return Whether this and the other results are equal + bool operator==(const DataMatchResult& other) const { + return can_continue_ == other.can_continue_ && + has_message_ == other.has_message_ && type_ == other.type_ && + bytes_consumed_ == other.bytes_consumed_ && + message_ == other.message_; + } + + /// @param other The other result to test for inequality with this one. + /// @return Whether this and the other results are not equal + bool operator!=(const DataMatchResult& other) const { + return !(*this == other); + } + + private: + Message message_; + size_t bytes_consumed_; + Type type_; + bool has_message_; + bool can_continue_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_MATCH_RESULT_H_ // NOLINT diff --git a/includes/image_io/base/data_range.h b/includes/image_io/base/data_range.h new file mode 100644 index 0000000..e2e339a --- /dev/null +++ b/includes/image_io/base/data_range.h @@ -0,0 +1,89 @@ +#ifndef IMAGE_IO_BASE_DATA_RANGE_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_RANGE_H_ // NOLINT + +#include <algorithm> + +namespace photos_editing_formats { +namespace image_io { + +/// A class to specify a range of bytes in some sort of array. The range is +/// defined like others in STL to include the begin value and exclude the end +/// value: [begin,end). Invalid ranges where end <= begin are ok - no exceptions +/// are ever thrown - but the IsValid() function will return false, and other +/// functions will behave in an appropriate fashion. +class DataRange { + public: + /// The main constructor to define a range. + /// @param begin The begin location of the range. + /// @param end The end location of the range. + DataRange(size_t begin, size_t end) : begin_(begin), end_(end) {} + + /// The default construtor defines an invalid range in which both begin and + /// end are set to 0. + DataRange() : begin_(0), end_(0) {} + + DataRange(const DataRange& data_range) = default; + DataRange& operator=(const DataRange& data_range) = default; + + /// @return The begin value of the range. + size_t GetBegin() const { return begin_; } + + /// @return The end value of the rangel. + size_t GetEnd() const { return end_; } + + /// @return Whether the range is valid. + bool IsValid() const { return begin_ < end_; } + + /// @return The length of the range, or 0 if the range is invalid. + size_t GetLength() const { return IsValid() ? end_ - begin_ : 0; } + + /// Determines if the location is in this range or not. + /// @param location The location being considered for this test. + /// @return True if the location is in the range, else false. + bool Contains(size_t location) const { + return location >= begin_ && location < end_; + } + + /// Determines if another DataRange is a subrange of this range or not. + /// @param data_range The DataRange being considered for this test. + /// @return True if data_range is subrange of this range, else not. + bool Contains(const DataRange& data_range) const { + return IsValid() && data_range.IsValid() && data_range.begin_ >= begin_ && + data_range.end_ <= end_; + } + + /// Computes the DataRange that is the intersection of another range with this + /// one. If there is no intersection, the resulting range will be invalid. + /// @param data_range The DataRange to use compute the intersection with this + /// one. + /// @return The DataRange that represents the intersection, or one that is + /// is invalid if the ranges do not overlap at all. + DataRange GetIntersection(const DataRange& data_range) const { + return DataRange(std::max(data_range.begin_, begin_), + std::min(data_range.end_, end_)); + } + + /// @param rhs A DataRange to compare with this one. + /// @return True if the two ranges are equal (even if invalid), else false. + bool operator==(const DataRange& rhs) const { + return begin_ == rhs.begin_ && end_ == rhs.end_; + } + + /// @param rhs A DataRange to compare with this one. + /// @return True if the two ranges not equal (even if invalid), else false. + bool operator!=(const DataRange& rhs) const { + return begin_ != rhs.begin_ || end_ != rhs.end_; + } + + private: + /// The begin value of the range. + size_t begin_; + + /// The end value of the range. + size_t end_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_RANGE_H_ // NOLINT diff --git a/includes/image_io/base/data_range_tracking_destination.h b/includes/image_io/base/data_range_tracking_destination.h new file mode 100644 index 0000000..01fbf15 --- /dev/null +++ b/includes/image_io/base/data_range_tracking_destination.h @@ -0,0 +1,57 @@ +#ifndef IMAGE_IO_BASE_DATA_RANGE_TRACKING_DESTINATION_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_RANGE_TRACKING_DESTINATION_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataDestination that tracks the transfer_range values as they are passed +/// from the caller of the Transfer() function to next DataDestination. +/// Instances of this class can be used to track the number of bytes transferred +/// and/or to ensure that multiple calls to the Transfer() function are called +/// with transfer_range values that join in a end-to-begin fashion. This data +/// can be used to make sure that the data transferred meets the expectations of +/// the client. +class DataRangeTrackingDestination : public DataDestination { + public: + /// @param destination The DataDestination that is next in the chain, or + /// nullptr if there is no destination. + explicit DataRangeTrackingDestination(DataDestination* destination) + : destination_(destination), + bytes_transferred_(0), + has_disjoint_transfer_ranges_(false) {} + + /// @return The number of bytes written to the data destination. Bytes are + /// considered "written" even if the next destination is a nullptr. + size_t GetBytesTransferred() const override { return bytes_transferred_; } + + /// @return The tracked data range (see the class comment for how this value + /// is computed). + const DataRange& GetTrackedDataRange() const { return tracked_data_range_; } + + /// @return Whether disjoint transfer data ranges were detected by the + /// Transfer() function. Disjoint transfer ranges occur when two calls + /// to the Transfer() function occur where first_range.GetEnd() is not + //// equal to the second_range.GetBegin(). + bool HasDisjointTransferRanges() const { + return has_disjoint_transfer_ranges_; + } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + private: + DataDestination* destination_; + DataRange tracked_data_range_; + size_t bytes_transferred_; + bool has_disjoint_transfer_ranges_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_RANGE_TRACKING_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/base/data_scanner.h b/includes/image_io/base/data_scanner.h new file mode 100644 index 0000000..0d1af26 --- /dev/null +++ b/includes/image_io/base/data_scanner.h @@ -0,0 +1,188 @@ +#ifndef IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT + +#include <string> + +#include "image_io/base/data_context.h" +#include "image_io/base/data_match_result.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// Provides a means to scan a textual portion of a data segment for a sequence +/// of characters and return the data associated with the resulting match. The +/// scanners also maintain state information for repeated calling in case the +/// text data is split over multipe data segments. The scanners also maintain +/// a data range where the result of the scanner's match can be found. These +/// scanners are written to allow copy semantics to make memory management +/// easier. Several types of scanners are provided. +class DataScanner { + public: + /// The type of scanner. + enum Type { + /// A scanner to look for text that matches exactly one or more characters. + /// The text to look for is given to the CreateLiteralScanner() function. + kLiteral, + + /// A scanner to look for text that matches a name. A name must begin with + /// one of the characters in "[A-Z][a-z]:_". Subsequent characters can + /// include "[0-9]-.". + kName, + + /// A scanner to look for a quoted string. A quoted string is delimited by + /// a single (') or double (") quote, and include any character except the + /// quote mark. + kQuotedString, + + /// A scanner to look for one character from a set of characters. The set of + /// characters are given to the CreateSentinelScanner() function. + kSentinel, + + /// A scanner to accept all text up to and including a literal text value. + /// The text to look for is given to the CreateThroughLiteralScanner() + /// function. + kThroughLiteral, + + /// A scanner to skip white space characters. At least one whitespace + /// character must be scanned. The set of white space characters is given + /// by the GetWhitespaceChars() function. + kWhitespace, + + /// A scanner to skip white space characters, but unlike the kWhitespace + /// scanner, this scanner will not return an error result if there are no + /// whitespace characters scanned. + kOptionalWhitespace, + }; + + /// @return The set of whitespace characters: " \t\n\r". + static std::string GetWhitespaceChars(); + + /// @param literal The literal to use for the scanner. + /// @return A kLiteral type scanner. + static DataScanner CreateLiteralScanner(const std::string& literal); + + /// @return A kName type scanner. + static DataScanner CreateNameScanner(); + + /// @return A kQuoteString type scanner. + static DataScanner CreateQuotedStringScanner(); + + /// @param sentinels The set of sentinels to scan for. The "~" character is + /// used as an "abbreviation" for any of the characters that can make up the + /// first character of a kName type sentinel. + /// @return a kSentinel type scanner. + static DataScanner CreateSentinelScanner(const std::string& sentinels); + + /// @param literal The literal to use for the scanner. + /// @return A kThroughLiteral type scanner. + static DataScanner CreateThroughLiteralScanner(const std::string& literal); + + /// @return A kWhitespace type scanner; + static DataScanner CreateWhitespaceScanner(); + + /// @return A kOptionalWhitespace type scanner; + static DataScanner CreateOptionalWhitespaceScanner(); + + /// @return The type of the scanner. + Type GetType() const { return type_; } + + /// @return A description of the scanner, based on the type. + std::string GetDescription() const; + + /// @return The literal value of a kLiteral or kThroughLiteral type scanner, + /// or an empty string otherwise. + std::string GetLiteral() const; + + /// @return The set of sentinels for a kSentinal type scanner, or an empty + /// string otherwise. + std::string GetSentenels() const; + + /// @return The sentinel character from the set of characters passed to the + /// CreateSentinelScanner() function that was matched by a successful scan + /// operation, or 0 otherwise. + char GetSentinel() const; + + /// @return The range of characters that the scanner found during one or more + /// successful Scan() function operations. + const DataRange& GetTokenRange() const { return token_range_; } + + /// @return The number of tiomes the Scan() function has been called. + size_t GetScanCallCount() const { return scan_call_count_; } + + /// @param context The data context to use for the scan operation. + /// @return The match result of the scan operation. + DataMatchResult Scan(const DataContext& context); + + /// Reset the scanner's token range to an invalid value. + void ResetTokenRange(); + + /// Reset the scanner state to the value it had when it was first constructed. + void Reset(); + + private: + explicit DataScanner(Type type) : DataScanner(type, "") {} + DataScanner(Type type, const std::string& literal_or_sentinels) + : literal_or_sentinels_(literal_or_sentinels), + data_(0), + scan_call_count_(0), + type_(type) {} + + /// @param delta_length The byte count to use to extend the token range end. + /// @return The new length of the token range. + size_t ExtendTokenLength(size_t delta_length); + + /// The worker functions for scanning each type of literal. + /// @param cbytes The pointer value to the buffer at the context's location. + /// @param bytes_available The number of bytes available for the scan. + /// @param context The data context for message generation purposes. + DataMatchResult ScanLiteral(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanName(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanQuotedString(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanSentinel(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanThroughLiteral(const char* cbytes, size_t bytes_available, + const DataContext& context); + DataMatchResult ScanWhitespace(const char* cbytes, size_t bytes_available, + const DataContext& context); + + /// Sets the match result to kError and generates an internal error message. + /// @param context The data context for message generation purposes. + /// @param error_description A description of the type of internal error. + /// @param result The result to receive the kError type and message. + void SetInternalError(const DataContext& context, + const std::string& error_description, + DataMatchResult* result); + + /// Sets the match result to kError and generates an syntax error message. + /// @param context The data context for message generation purposes. + /// @param error_description A description of the type of syntax error. + /// @param result The result to receive the kError type and message. + void SetSyntaxError(const DataContext& context, + const std::string& error_description, + DataMatchResult* result); + + /// The string used for kLiteral, kThroughLiteral and kSentinel type scanners. + std::string literal_or_sentinels_; + + /// The token range built by one or calls to the Scan() function. + DataRange token_range_; + + /// State data used in different ways by different scanner types. + size_t data_; + + /// The number of times the scanner's Scan function has been called. + size_t scan_call_count_; + + /// The type of scanner. + Type type_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT diff --git a/includes/image_io/base/data_segment.h b/includes/image_io/base/data_segment.h new file mode 100644 index 0000000..ec6d584 --- /dev/null +++ b/includes/image_io/base/data_segment.h @@ -0,0 +1,183 @@ +#ifndef IMAGE_IO_BASE_DATA_SEGMENT_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_SEGMENT_H_ // NOLINT + +#include <memory> + +#include "image_io/base/data_range.h" +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +class DataRange; + +/// A DataSegment represents a portion of a larger "array" of byte data. Each +/// data segment knows (via its DataRange member) its location in the larger +/// array. The byte data of each data segment is accessed by its location +/// in that larger array. Instances of DataSegment are typically created or +/// managed by instances of DataSource which offers them up to client code. +/// A shared_ptr is used to control the lifetime of DataSegments. For more +/// information on this, see the comments in DataSource. +class DataSegment { + public: + /// A creation parameter for indicating whether or not, upon destruction, the + /// DataSegment's buffer should be deallocated. + enum BufferDispositionPolicy { + /// Policy to deallocate the buffer upon destruction. + kDelete, + + /// Policy to leave the buffer upon destruction. + kDontDelete + }; + + /// Creates a new DataSegment with the given DataRange and byte buffer. + /// @param data_range The DataRange of the byte data in the buffer. + /// @param buffer The byte data of the data segment. + /// @param buffer_policy The buffer ownership policy (Delete or DontDelete). + /// @return A shared pointer to the data segment. + static std::shared_ptr<DataSegment> Create( + const DataRange& data_range, const Byte* buffer, + BufferDispositionPolicy buffer_policy); + + /// Creates a new DataSegment with the given DataRange and byte buffer. + /// The DataSegment takes ownership of the buffer and will delete the buffer + /// when the DataSegment itself is destroyed. + /// @param data_range The DataRange of the byte data in the buffer. + /// @param buffer The byte data of the data segment; The DataSegment takes + /// ownership of the buffer and will delete it when it is deleted. + /// @return A shared pointer to the data segment. + static std::shared_ptr<DataSegment> Create(const DataRange& data_range, + const Byte* buffer) { + return Create(data_range, buffer, BufferDispositionPolicy::kDelete); + } + + /// @return The DataRange of the data in the segment. + const DataRange& GetDataRange() const { return data_range_; } + + /// @return The begin location of the segment's data range. + size_t GetBegin() const { return data_range_.GetBegin(); } + + /// @return The end location of the segment's data range. + size_t GetEnd() const { return data_range_.GetEnd(); } + + /// @return The length of the segment's data range. + size_t GetLength() const { return data_range_.GetLength(); } + + /// @return Whether the segment's range is valid. + bool Contains(size_t location) const { + return data_range_.Contains(location); + } + + /// Gets the validated byte value of the segment at the given location. + /// @param location The location in the segment to get the byte value of. + /// @return The validated byte at the given location or 0/false if the + /// segment's range does does not contain the location. + ValidatedByte GetValidatedByte(size_t location) const { + return Contains(location) ? ValidatedByte(buffer_[location - GetBegin()]) + : InvalidByte(); + } + + /// Returns a pointer to the type at the give location in the dta segment. + /// @param location The location of the byte to get the buffer pointer of. + /// @return The pointer to the byte in the segment's buffer, or the nullptr + /// if the segment does not contain the location. + const Byte* GetBuffer(size_t location) const { + return Contains(location) ? &buffer_[location - GetBegin()] : nullptr; + } + + /// Finds the location of the string in the data segment. Although a data + /// segment has an array of Bytes (an unsigned quantity), very often the + /// data they contain are strings - a sequence of bytes, none of which have + /// the sign bit set. As an aid in expressing the alorithms for finding such + /// strings, this function allows client code to express the searched-for + /// string as a zero-terminated array of chars. + /// @param start_location The location at which to start looking. + /// @param str The string to find in the segment. The strlen function is + /// used to find the length of the string to search for. + /// @return The location of the start of the string, or the segment's end + /// location value. + size_t Find(size_t start_location, const char* str) const { + return Find(start_location, str, strlen(str)); + } + + /// Finds the location of the string in the data segment. Although a data + /// segment has an array of Bytes (an unsigned quantity), very often the + /// data they contain are strings - a sequence of bytes, none of which have + /// the sign bit set. As an aid in expressing the alorithms for finding such + /// strings, this function allows client code to express the searched-for + /// string as an array of chars and a length. + /// @param start_location The location at which to start looking. + /// @param str The string to find in the segment. + /// @param str_length The length of the string to find. + /// @return The location of the start of the string, or the segment's end + /// location value. + size_t Find(size_t location, const char* str, size_t str_length) const; + + /// Finds the location of the given byte value in the data segment. + /// @param start_location The location at which to start looking. + /// @param value The byte value to search for. + /// @return The location of the value, or the segment's end location value. + size_t Find(size_t start_location, Byte value) const; + + /// Sometimes the data of concern spans two data segments. For instance, a + /// JPEG data segment marker may start at the end of one data segment and + /// extend into the following segment. This helper function makes it easier to + /// write code to treat two data segments as one entity for the purpose of + /// getting the byte value at the given location. + /// @param location The location in the segment to get the byte value of. + /// @param segment1 The first data segment to use (maybe nullptr). + /// @param segment2 The second data segment to use (may be nullptr). + /// @return The validated byte at the given location, or InvalidByte() if + /// neither segment contains the location. + static ValidatedByte GetValidatedByte(size_t location, + const DataSegment* segment1, + const DataSegment* segment2); + + /// Sometimes the data of concern spans two data segments. For instance, a + /// JPEG data segment marker may start at the end of one data segment and + /// extend into the following segment. This helper function makes it easier to + /// write code to treat two data segments as one entity for the purpose of + /// finding a byte value. + /// @param start_location The location at which to start looking. + /// @param value The byte value to search for. + /// @param segment1 The first data segment to use. + /// @param segment2 The second data segment to use. + /// @return The location of the value if it's found and the two segments are + /// contiguous (i.e., if segment1->GetEnd() == segment2->GetBegin()), + /// else the max(segment1->GetEnd(), segment2->GetEnd()). + static size_t Find(size_t start_location, Byte value, + const DataSegment* segment1, const DataSegment* segment2); + + private: + DataSegment(const DataRange& data_range, const Byte* buffer, + BufferDispositionPolicy buffer_policy) + : data_range_(data_range), + buffer_(buffer), + buffer_policy_(buffer_policy) {} + + ~DataSegment() { + // If kDelete is not set (default) the buffer memory will remain allocated. + if (buffer_policy_ == BufferDispositionPolicy::kDelete) { + delete[] buffer_; + } + } + + /// The default_delete needs to be a friend so that the shared pointer can + /// call the private destructor. + friend struct std::default_delete<DataSegment>; + + private: + /// The data range of the data segment. + DataRange data_range_; + + /// The buffer that contains the segment data. + const Byte* buffer_; + + /// The policy that dictates whether or not the buffer will be deallocated. + BufferDispositionPolicy buffer_policy_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_SEGMENT_H_ // NOLINT diff --git a/includes/image_io/base/data_segment_data_source.h b/includes/image_io/base/data_segment_data_source.h new file mode 100644 index 0000000..6ef6c3e --- /dev/null +++ b/includes/image_io/base/data_segment_data_source.h @@ -0,0 +1,30 @@ +#ifndef IMAGE_IO_BASE_DATA_SEGMENT_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_SEGMENT_DATA_SOURCE_H_ // NOLINT + +#include "image_io/base/data_source.h" + +namespace photos_editing_formats { +namespace image_io { + +/// DataSegmentDataSource is an implementation of DataSource that provides +/// access to requested DataSegment instances from a single (possibly large) +/// in-memory DataSegment. +class DataSegmentDataSource : public DataSource { + public: + explicit DataSegmentDataSource( + const std::shared_ptr<DataSegment>& shared_data_segment) + : shared_data_segment_(shared_data_segment) {} + void Reset() override; + std::shared_ptr<DataSegment> GetDataSegment(size_t begin, + size_t min_size) override; + TransferDataResult TransferData(const DataRange& data_range, size_t best_size, + DataDestination* data_destination) override; + + private: + std::shared_ptr<DataSegment> shared_data_segment_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_SEGMENT_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/base/data_source.h b/includes/image_io/base/data_source.h new file mode 100644 index 0000000..8b96d4b --- /dev/null +++ b/includes/image_io/base/data_source.h @@ -0,0 +1,104 @@ +#ifndef IMAGE_IO_BASE_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_SOURCE_H_ // NOLINT + +#include <memory> + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// DataSource is the abstract base class for implementations that can provide +/// data from a file or memory buffer or some other container. A data source +/// supports both a pull model for obtaining data, via the GetDataSegment() +/// function, and a push model via a collaborating DataDestination and the +/// TransferData() function. +/// +/// Pushing with a DataSource can be a convenient alternative to using a +/// DataDestination directly when there is a large amount of data that is +/// located in a file, or some type of memory structure that be "wrapped" in +/// a DataSource. The push model provides the most efficient (i.e., least +/// copying of bytes) way to move data from one place to another. For usage of +/// this library on mobile devices with limited memory, this mode of operation +/// is the most attractive. Unfortunately, the push model typically assumes the +/// code knows what portion of bytes to push. The discovery of that portion is +/// most often easier to accomplish with a pull model. +/// +/// The pull model, while needed for efficient implementation of objects that +/// scan the contents of a data source, does represent a challenge when managing +/// the lifetime of the DataSegment instances returned by the GetDataSegment() +/// function - depending on the implementation of the DataSource, the segment it +/// returns might represent the entire array of data, or it might represent just +/// a portion of it that was read from a file. In the first case, the DataSource +/// would probably want to keep ownership of the DataSegment, while in the other +/// case, the DataSource might very well want to pass ownership on to the caller +/// of GetDataSegment(). This problem is solved by allowing sharing of the +/// ownership of the DataSegment via a std::shared_ptr. +/// +/// The push model implemented does not have these complications, so the +/// DataDestination class's Transfer() function takes a simple const reference +/// to a DataSegment, with the ownership firmly held by the DataSource. +class DataSource { + public: + /// The result of a TransferData() operation. + enum TransferDataResult { + /// An error occurred while calling DataDestination::Transfer(), or the + /// data destination was a nullptr. + kTransferDataError, + + /// The DataDestination::Transfer() function was not called because the + /// DataRange was empty or the DataSource was not able to supply any data + /// in the range. + kTransferDataNone, + + /// The data transfer was successful. + kTransferDataSuccess + }; + + virtual ~DataSource() = default; + + /// Requests the data source to return a DataSegment with a range starting at + /// the given begin location and extending best_size bytes in length if + /// possible. (If not possible, a shorter range of data may be returned. A + /// larger range may also be returned, depending on the DataSource). + /// If a non-null data segment returned, its DataRange is guarenteed to have + /// at least some overlap with the requested range. + /// @param begin The begin location of the requested data segment. + /// @param min_size The min size of the requested data segment. The size of + /// the data segment returned may be larger depending on the data source. + /// @return The data segment, or a nullptr if the range of data did not exist + /// in the data source. + virtual std::shared_ptr<DataSegment> GetDataSegment(size_t begin, + size_t min_size) = 0; + + /// Some data sources may need to be reset if they are accessed via repeated + /// calls to GetDataSegment() all the way to the end of the array of bytes. + /// (For example a file-based DataSource might have eof bits that need to be + /// cleared before re-reading data). This function does that kind of thing. + virtual void Reset() = 0; + + /// Requests the data source to transfer data in the given range to the given + /// DataDestination. Callers must call the data destination's StartTransfer() + /// function before calling this function, and call its FinishTransfer() + /// after this call. This function will call the data destination's Transfer() + /// function zero or more times. + /// @param data_range The range of data to transfer from this data source to + /// the destination. + /// @param best_size The "best" size of the requested data segment to be sent + /// to the data destination. The size of the data segment that is sent to + /// the data destination may be larger than this value, depending on the + /// data source, or it may be smaller if the requested data range extends + /// past the end of the data source's range. + /// @param data_destination The receiver of the data. + virtual TransferDataResult TransferData( + const DataRange& data_range, size_t best_size, + DataDestination* data_destination) = 0; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/base/istream_data_source.h b/includes/image_io/base/istream_data_source.h new file mode 100644 index 0000000..4564e14 --- /dev/null +++ b/includes/image_io/base/istream_data_source.h @@ -0,0 +1,46 @@ +#ifndef IMAGE_IO_BASE_ISTREAM_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_BASE_ISTREAM_DATA_SOURCE_H_ // NOLINT + +#include <iostream> +#include <memory> +#include <utility> + +#include "image_io/base/data_source.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataSource that obtains data from an istream. +class IStreamDataSource : public DataSource { + public: + /// Constructs an IStreamDataSource using the given istream. + /// @param istram_ptr The istream from which to read. + explicit IStreamDataSource(std::unique_ptr<std::istream> istream_ptr) + : istream_(std::move(istream_ptr)) {} + + void Reset() override; + std::shared_ptr<DataSegment> GetDataSegment(size_t begin, + size_t min_size) override; + TransferDataResult TransferData(const DataRange& data_range, size_t best_size, + DataDestination* data_destination) override; + + private: + /// The worker function to create a DataSegment and fill it with the given + /// number of bytes read from the istream, starting at the given location. + /// @param begin The location in the istream at which to start reading. + /// @param count The number of bytes to read. + /// @return A DataSegment pointer, or nullptr if the read failed. + std::shared_ptr<DataSegment> Read(size_t begin, size_t count); + + private: + /// The istream from which to read. + std::unique_ptr<std::istream> istream_; + + /// The current data segment that was read in the GetDataSegment() function. + std::shared_ptr<DataSegment> current_data_segment_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_ISTREAM_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/base/message.h b/includes/image_io/base/message.h new file mode 100644 index 0000000..8c225d8 --- /dev/null +++ b/includes/image_io/base/message.h @@ -0,0 +1,84 @@ +#ifndef IMAGE_IO_BASE_MESSAGE_H_ // NOLINT +#define IMAGE_IO_BASE_MESSAGE_H_ // NOLINT + +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +/// A message that is reported to and managed by the MessageHandler, and +/// possibly written by a MessageWriter. +class Message { + public: + /// The types of Messages. + enum Type { + /// A Status message. + kStatus, + + /// An error from the stdlib was detected. The std::errno variable can be + /// used to programmatically decide what to do, or use the std::strerror + /// function to get a string description of the error. + kStdLibError, + + /// A premature end of the data being processed was found. + kPrematureEndOfDataError, + + /// An expected string value was not found in the data being processed. + kStringNotFoundError, + + /// An error occurred while decoding the data being processed. + kDecodingError, + + /// An error occurred while parsing the data. + kSyntaxError, + + /// An error occurred while using the data. + kValueError, + + /// An internal error of some sort occurred. + kInternalError + }; + + /// @param type The type of message to create. + /// @param system_errno The errno value to use for kStdLibError type messages. + /// @param text The text of the message. + Message(Type type, int system_errno, const std::string& text) + : type_(type), system_errno_(system_errno), text_(text) {} + + Message() = delete; + + bool operator==(const Message& rhs) const { + return type_ == rhs.type_ && system_errno_ == rhs.system_errno_ && + text_ == rhs.text_; + } + + bool operator!=(const Message& rhs) const { + return type_ != rhs.type_ || system_errno_ != rhs.system_errno_ || + text_ != rhs.text_; + } + + /// @return The type of message. + Type GetType() const { return type_; } + + /// @return The system errno value used for kStdLibError messages. + int GetSystemErrno() const { return system_errno_; } + + /// @return The text of the message. + const std::string& GetText() const { return text_; } + + private: + /// The type of message. + Type type_; + + /// If type == kStdLibError, the system's errno value at the time + /// the error was reported, else it's value is 0. + int system_errno_; + + /// The text associated with the message. + std::string text_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_MESSAGE_H_ // NOLINT diff --git a/includes/image_io/base/message_handler.h b/includes/image_io/base/message_handler.h new file mode 100644 index 0000000..dc33679 --- /dev/null +++ b/includes/image_io/base/message_handler.h @@ -0,0 +1,102 @@ +#ifndef IMAGE_IO_BASE_MESSAGE_HANDLER_H_ // NOLINT +#define IMAGE_IO_BASE_MESSAGE_HANDLER_H_ // NOLINT + +#include <memory> +#include <vector> + +#include "image_io/base/message.h" +#include "image_io/base/message_store.h" +#include "image_io/base/message_writer.h" + +namespace photos_editing_formats { +namespace image_io { + +/// MessageHandler provides the functions that all the code in this library uses +/// to report status and error conditions. +class MessageHandler { + public: + /// Initializes the MessageHandler for client use. Multithread applications + /// might find this function useful to call in their initialization section, + /// to guarentee that threads will not create race conditions when calling the + /// Get function for the first time. + static void Init(std::unique_ptr<MessageWriter> message_writer, + std::unique_ptr<MessageStore> message_store); + + /// This function is thread-safe as long as the Init() function is called in + /// non-multiple-threaded startup code; if the Init() fucnction was not called + /// there may be race conditions that causes the message handler returned from + /// Get() called in one thread to be different from that returned by the call + /// in a different thread. + /// @return The message handler used by the code in this library. + static MessageHandler* Get(); + + /// Sets the message writer to use when ReportMessage() is called. If client + /// code does not call this function, the MessageHandler returned by the Get() + /// function will have a CoutMessageWriter by default. If client code calls + /// this function with a null, then ReportMessage() will not write messages at + /// all, but just add them to the messages store. + /// @param message_writer The message writer that ReportMessage uses, or null. + void SetMessageWriter(std::unique_ptr<MessageWriter> message_writer); + + /// Sets the message store to use when ReportMessage() is called. If client + /// code does not call this function, the MessageHandler returned by the Get() + /// function will have a VectorMessageStore by default. If client code calls + /// this function with a null, then ReportMessage() will not save messages at + /// all, but just write them to the messages writer. + /// @param message_store The message store that ReportMessage uses, or null. + void SetMessageStore(std::unique_ptr<MessageStore> message_store); + + /// Clears the messages maintained by the message handler's store. Client code + /// should call this function before calling any other standalone or class + /// function in this library so as to provide a clean starting point with + /// respect to error and status messages. Once all the calls have been made, + /// client code should examine the messages or call HasErrorMessages() to + /// determine the whether the calls succeeded or not. Finally client code + /// should call this function again so that memory is not leaked when it is + /// done using this library. + void ClearMessages() { + if (message_store_) { + message_store_->ClearMessages(); + } + } + + /// @return Whether the message handler's store has error messages or not. + bool HasErrorMessages() const { + return message_store_ ? message_store_->HasErrorMessages() : false; + } + + /// @return The vector of errors maintained by the message handler's store. + std::vector<Message> GetMessages() const { + return message_store_ ? message_store_->GetMessages() + : std::vector<Message>(); + } + + /// Reports an error or a status message. This function is called from library + /// code when it detects an error condition or wants to report status. If the + /// message type is Message::kStdLibError, then the current value of the + /// system's errno variable is used when the message is created. The message + /// is added to the messages vector and if the message writer is not null, its + /// WriteMessage function is called. + /// @param type The type of message. + /// @param text Text associated with the message. + void ReportMessage(Message::Type type, const std::string& text); + + /// @param message The message to report. + void ReportMessage(const Message& message); + + private: + MessageHandler() = default; + ~MessageHandler(); + + private: + /// The message writer used by ReportMessage, or null. + std::unique_ptr<MessageWriter> message_writer_; + + /// The message store for saving messages for later, or null. + std::unique_ptr<MessageStore> message_store_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_MESSAGE_HANDLER_H_ // NOLINT diff --git a/includes/image_io/base/message_store.h b/includes/image_io/base/message_store.h new file mode 100644 index 0000000..1d492f1 --- /dev/null +++ b/includes/image_io/base/message_store.h @@ -0,0 +1,80 @@ +#ifndef IMAGE_IO_BASE_MESSAGE_STORE_H_ // NOLINT +#define IMAGE_IO_BASE_MESSAGE_STORE_H_ // NOLINT + +#include <vector> +#include "image_io/base/message.h" + +namespace photos_editing_formats { +namespace image_io { + +/// An abstract base class for storing and reporting on Messages. +class MessageStore { + public: + virtual ~MessageStore() = default; + + /// Clears the messages maintained by the store. + virtual void ClearMessages() = 0; + + // @message The message to add to the store. + virtual void AddMessage(const Message& message) = 0; + + /// @return A vector of messages maintained by the store; this vector may be + /// empty even if the AddMessage function was called, depending on the + /// concrete subclass is implemented. + virtual std::vector<Message> GetMessages() const = 0; + + /// @return Whether the store has error messages or not. This value is + /// guarenteed to be accurate based on the latest calls to the + /// ClearMessages and AddMessage functions. + virtual bool HasErrorMessages() const = 0; +}; + +/// A MessageStore that saves the messages in a vector. The implementation of +/// this class is not thread safe. +class VectorMessageStore : public MessageStore { + public: + void ClearMessages() override { messages_.clear(); } + void AddMessage(const Message& message) override { + messages_.push_back(message); + } + std::vector<Message> GetMessages() const override { return messages_; } + bool HasErrorMessages() const override { + for (const auto& message : messages_) { + if (message.GetType() != Message::kStatus) { + return true; + } + } + return false; + } + + private: + std::vector<Message> messages_; +}; + +/// A MessageStore that simply keeps track of whether error messages have been +/// added or not, but does not store the messages themselves. The implementation +/// of this class is should not cause any crashes if run in a multi-threaded +/// environment, though there may be some cases where erroneous results are +/// returned by the HasErrorMessages function. +class ErrorFlagMessageStore : public MessageStore { + public: + ErrorFlagMessageStore() : has_error_(false) {} + void ClearMessages() override { has_error_ = false; } + void AddMessage(const Message& message) override { + if (message.GetType() != Message::kStatus) { + has_error_ = true; + } + } + std::vector<Message> GetMessages() const override { + return std::vector<Message>(); + } + bool HasErrorMessages() const override { return has_error_; } + + private: + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_MESSAGE_STORE_H_ // NOLINT diff --git a/includes/image_io/base/message_writer.h b/includes/image_io/base/message_writer.h new file mode 100644 index 0000000..d3240a0 --- /dev/null +++ b/includes/image_io/base/message_writer.h @@ -0,0 +1,46 @@ +#ifndef IMAGE_IO_BASE_MESSAGE_WRITER_H_ // NOLINT +#define IMAGE_IO_BASE_MESSAGE_WRITER_H_ // NOLINT + +#include "image_io/base/message.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A message writer is used by MessageHandler to write messages as they are +/// reported via the ReportMessage function. The main function, WriteMessage +/// must be implemented by subclasses. Subclasses can also override any or all +/// of the other virtual functions, GetFormattedMessage(), GetTypeCategory() +/// and GetTypeDescription() to suit their needs. +class MessageWriter { + public: + virtual ~MessageWriter() = default; + + /// This function is called to write a message. Implementations can call the + /// GetFormattedMessage function and write it wherever it needs to go, or + /// do something else entirely. + /// @param message The message to write. + virtual void WriteMessage(const Message& message) = 0; + + /// Formats the message into a single string suitable for writing. This + /// implementation returns a string that has the format + /// <GetTypeCategory()><GetTypeDescription()>:text + /// @param message The message for which a formatted string is wanted. + /// @return A string describing the message. + virtual std::string GetFormattedMessage(const Message& message) const; + + /// @param type The type of message to get the category of. + /// @return A string describing the type category; this implementation returns + /// (the obviously nonlocalized strings) "STATUS" or "ERROR" + virtual std::string GetTypeCategory(Message::Type type) const; + + /// @param type The type of message to get the description of. + /// @param system_errno Used for kStdLibError type messages. + /// @return A (non-localized) string description of the type. + virtual std::string GetTypeDescription(Message::Type type, + int system_errno) const; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_MESSAGE_WRITER_H_ // NOLINT diff --git a/includes/image_io/base/ostream_data_destination.h b/includes/image_io/base/ostream_data_destination.h new file mode 100644 index 0000000..15a1155 --- /dev/null +++ b/includes/image_io/base/ostream_data_destination.h @@ -0,0 +1,58 @@ +#ifndef IMAGE_IO_BASE_OSTREAM_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_BASE_OSTREAM_DATA_DESTINATION_H_ // NOLINT + +#include <iostream> +#include <memory> +#include <string> +#include <utility> + +#include "image_io/base/data_destination.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataDestination that writes its output to an ostream. +class OStreamDataDestination : public DataDestination { + public: + /// Constructs an OStreamDataDestination using the given ostream. + /// @param ostream_ptr The ostream to which data is written. + explicit OStreamDataDestination(std::unique_ptr<std::ostream> ostream_ptr) + : ostream_(std::move(ostream_ptr)), + bytes_transferred_(0), + has_error_(false) {} + + /// @param name A name to associate with the ostream. Used for error messages. + void SetName(const std::string& name) { name_ = name; } + + /// @return The name associated with the ostream. + const std::string& GetName() const { return name_; } + + /// @return The number of bytes written to the ostream. + size_t GetBytesTransferred() const override { return bytes_transferred_; } + + /// @return True if errors were encountered while writing to the ostream. + bool HasError() const { return has_error_; } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + private: + /// The ostream written to. + std::unique_ptr<std::ostream> ostream_; + + /// The number of bytes written so far. + size_t bytes_transferred_; + + /// A (file) name to associate with the ostream, used with error messages. + std::string name_; + + /// If true indicates an error has occurred writing to the ostream. + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_OSTREAM_DATA_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/base/types.h b/includes/image_io/base/types.h new file mode 100644 index 0000000..aaa88b3 --- /dev/null +++ b/includes/image_io/base/types.h @@ -0,0 +1,42 @@ +#ifndef IMAGE_IO_BASE_TYPES_H_ // NOLINT +#define IMAGE_IO_BASE_TYPES_H_ // NOLINT + +#include <cstdint> +#include <cstdlib> + +namespace photos_editing_formats { +namespace image_io { + +/// Byte is the noumenon unit of data. +using Byte = std::uint8_t; + +/// A Byte value and a validity flag. +struct ValidatedByte { + explicit ValidatedByte(Byte value_arg) : value(value_arg), is_valid(true) {} + ValidatedByte(const ValidatedByte&) = default; + ValidatedByte& operator=(const ValidatedByte&) = default; + Byte value; + bool is_valid; +}; + +/// Equality operator for ValidatedByte +inline bool operator==(const ValidatedByte& lhs, const ValidatedByte& rhs) { + return lhs.value == rhs.value && lhs.is_valid == rhs.is_valid; +} + +/// Inquality operator for ValidatedByte +inline bool operator!=(const ValidatedByte& lhs, const ValidatedByte& rhs) { + return lhs.value != rhs.value || lhs.is_valid != rhs.is_valid; +} + +/// @return a validated byte that has a false is_valid value. +inline ValidatedByte InvalidByte() { + ValidatedByte invalid_byte(0); + invalid_byte.is_valid = false; + return invalid_byte; +} + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_TYPES_H_ // NOLINT diff --git a/includes/image_io/extras/base64_decoder_data_destination.h b/includes/image_io/extras/base64_decoder_data_destination.h new file mode 100644 index 0000000..cde1dbc --- /dev/null +++ b/includes/image_io/extras/base64_decoder_data_destination.h @@ -0,0 +1,60 @@ +#ifndef IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_noumenon_base64_h + +#include <vector> + +#include "image_io/base/data_destination.h" + +namespace photos_editing_formats { +namespace image_io { + +/// Base64DecoderDataDestination is typically used in a chain of DataDestination +/// instances. For example, it can be used to decode base64 encoded JPEG data in +/// APP1/XMP data segments. +class Base64DecoderDataDestination : public DataDestination { + public: + /// @param next_destination The next DataDestination in the chain which will + /// be sent the decoded bytes received by the Transfer() function. + explicit Base64DecoderDataDestination(DataDestination* next_destination) + : next_destination_(next_destination), + next_decoded_location_(0), + has_error_(false) {} + + /// @return True if there was an error in the decoding process. + bool HasError() const { return has_error_; } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + /// @return The number of bytes written not to this decoder destination, but + /// to the next destination. Returns zero if the next destination is null. + size_t GetBytesTransferred() const override { + return next_destination_ ? next_destination_->GetBytesTransferred() : 0; + } + + private: + /// The destination that the decoded data is sent to. + DataDestination* next_destination_; + + /// If the transfer_range parameter of the Transfer function does not have a + /// length that is a multiple of 4, then the leftover bytes are placed in this + /// vector and are prepended to the data in the next call to Transfer. + std::vector<Byte> leftover_bytes_; + + /// The DataRanges supplied to the Transfer function can't be sent down the + /// chain to the next destination because the number of bytes differ (by 4/3). + /// This value records the number of bytes decoded so far, and the beginning + /// of the DataRange sent to the destination's Transfer function. + size_t next_decoded_location_; + + /// A true value indicates that an error occurred in the decoding process. + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_EXTRAS_BASE64_DECODER_DATA_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/extras/string_view_data_source.h b/includes/image_io/extras/string_view_data_source.h new file mode 100644 index 0000000..0a90b44 --- /dev/null +++ b/includes/image_io/extras/string_view_data_source.h @@ -0,0 +1,33 @@ +#ifndef IMAGE_IO_EXTRAS_STRING_VIEW_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_EXTRAS_STRING_VIEW_DATA_SOURCE_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment_data_source.h" + +#include "third_party/absl/strings/string_view.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataSource that reads bytes from a string_view. The underlying string data +/// must have a lifetime that exceeds the lifetime of this data source, and the +/// string contents must not change while the data source is referencing it. +class StringViewDataSource : public DataSegmentDataSource { + public: + /// Constructs a StringViewDataSource using the given string_view. + /// @param str The string_view to read from. + explicit StringViewDataSource(absl::string_view string_src); + + /// Returns the string view being used as the data source. + absl::string_view GetStringView() const { return string_src_; } + + private: + /// The string_view to read from. + absl::string_view string_src_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_EXTRAS_STRING_VIEW_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/gcontainer/gcontainer.h b/includes/image_io/gcontainer/gcontainer.h new file mode 100644 index 0000000..c0bd66f --- /dev/null +++ b/includes/image_io/gcontainer/gcontainer.h @@ -0,0 +1,36 @@ +#ifndef IMAGE_IO_GCONTAINER_GCONTAINER_H_ // NOLINT +#define IMAGE_IO_GCONTAINER_GCONTAINER_H_ // NOLINT + +#include <string> +#include <vector> + +namespace photos_editing_formats { +namespace image_io { +namespace gcontainer { + +// Writes an image to a output_file_name, appending other_files (if they each +// exist) after the image's EOI marker. +// input_file_name must be a JPEG file. +bool WriteImageAndFiles(const std::string& input_file_name, + const std::vector<std::string>& other_files, + const std::string& output_file_name); + +// Retrieves the bytes (of size file_length) starting at file_starT_offset +// bytes after the EOI marker in input_file_name. Returns true if parsing was +// successful, false otherwise. GContainer callers are expected to have +// file_start_offset and file_length from the image metadata. +// +// input_file_name must be a JPEG. +// file_start_offset is the nth byte after (and excluding) the EOI marker in +// input_file_name. file_length is the size (in bytes) of content to parse. +// out_file_contents is populated with the requsted contents only if parsing is +// successful. +bool ParseFileAfterImage(const std::string& input_file_name, + size_t file_start_offset, size_t file_length, + std::string* out_file_contents); + +} // namespace gcontainer +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_GCONTAINER_GCONTAINER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_apple_depth_builder.h b/includes/image_io/jpeg/jpeg_apple_depth_builder.h new file mode 100644 index 0000000..7f5c595 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_apple_depth_builder.h @@ -0,0 +1,102 @@ +#ifndef IMAGE_IO_JPEG_JPEG_APPLE_DEPTH_BUILDER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_APPLE_DEPTH_BUILDER_H_ // NOLINT + +#include <vector> + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_range.h" +#include "image_io/base/data_source.h" + +namespace photos_editing_formats { +namespace image_io { + +/// Builds an Apple depth file containing a (possibly scaled down) primary image +/// and original depth image. +class JpegAppleDepthBuilder { + public: + JpegAppleDepthBuilder() + : primary_image_data_source_(nullptr), + depth_image_data_source_(nullptr), + data_destination_(nullptr) {} + + /// @param primary_image_data_source The data source containing the primary + /// image. The builder uses the first image in this data source. + /// @param depth_image_data_source The data source containing the depth image. + /// The builder finds the depth image using a JpegInfoBuilder and the + /// JpegInfo::GetAppleDepthImageRange() function. Consequently, this + /// image source must refer a valid Apple depth file. + /// @param data_destination The data destination for the combined primary + /// and depth images. + /// @return Whether the building and transfer was successful. + bool Run(DataSource* primary_image_data_source, + DataSource* depth_image_data_source, + DataDestination* data_destination); + + private: + /// Gets the data associated with the primary image its data source. + /// @return Whether the primary image data was gotten successfully. + bool GetPrimaryImageData(); + + /// Gets the data associated with the depth image from its data source. + /// @return Whether the depth image data was gotten successfully. + bool GetDepthImageData(); + + /// Transfers the primary image from its data source to the data destination, + /// adding and transforming the jpeg segments it needs to make the resulting + /// data destination a valid Apple depth file. + /// @return Whether the transfer was successful or not. + bool TransferPrimaryImage(); + + /// Transfers the depth image from its data source to the data destination. + /// @return Whether the transfer was successful or not. + bool TransferDepthImage(); + + /// Modifies the existing primary Jfif segment to contain the information + /// needed for a valid Apple depth file, and transfers the result to the data + /// destination. + /// @param jfif_length_delta The increased size of the Jfif segment. + /// @return Whether the transfer was successful or not. + bool TransferNewJfifSegment(size_t *jfif_length_delta); + + /// Creates a new Mpf segment needed for a valid Apple depth file and + /// transfers the result to the data destination. + /// @param jfif_length_delta The increased size of the Jfif segment. + /// @return Whether the transfer was successful or not. + bool TransferNewMpfSegment(size_t jfif_length_delta); + + /// @param data_source The data source from which to transfer bytes to the + /// data destination. + /// @param data_range The data range in the data source to transfer. + bool TransferData(DataSource *data_source, const DataRange& data_range); + + /// The data source containing the primary image. + DataSource* primary_image_data_source_; + + /// The data source representing a valid Apple depth file. + DataSource* depth_image_data_source_; + + /// The final destination of the new Apple depth data. + DataDestination* data_destination_; + + /// The range in the primary image data source containing the primary image. + DataRange primary_image_range_; + + /// The range in the primary image data source containing the primary image's + /// Jfif segment. + DataRange primary_image_jfif_segment_range_; + + /// The bytes of the primary image's Jfif segment. + std::vector<Byte> primary_image_jfif_segment_bytes_; + + /// The range in the primary image data source containing the primary images's + /// Mpf segment, or the location at a new Mpf segment should be written. + DataRange primary_image_mpf_segment_range_; + + /// The range in the depth image data source containing the depth image. + DataRange depth_image_range_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_APPLE_DEPTH_BUILDER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_image_extractor.h b/includes/image_io/jpeg/jpeg_image_extractor.h new file mode 100644 index 0000000..91237e5 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_image_extractor.h @@ -0,0 +1,73 @@ +#ifndef IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_source.h" +#include "image_io/jpeg/jpeg_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class that can make use of the data in a JpegInfo instance to transfer +/// Apple depth and GDepth/GImage images from a DataSource and ship it to a +/// DataDestination. +class JpegImageExtractor { + public: + /// @param jpeg_info The JpegInfo instance containing depth/image data. + /// @param data_source The DataSource from which to transfer depth/image data. + JpegImageExtractor(const JpegInfo& jpeg_info, DataSource* data_source) + : jpeg_info_(jpeg_info), data_source_(data_source) {} + + /// This function extracts the Apple depth image from the DataSource and sends + /// the bytes to the DataDestination. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractAppleDepthImage(DataDestination* image_destination); + + /// This function extracts the Apple matte image from the DataSource and sends + /// the bytes to the DataDestination. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractAppleMatteImage(DataDestination* image_destination); + + /// This function extracts the GDepth type image from the DataSource and + /// sends the bytes to the DataDestination. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractGDepthImage(DataDestination* image_destination); + + /// This function extracts the GImage type image from the DataSource and + /// sends the bytes to the DataDestination. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractGImageImage(DataDestination* image_destination); + + private: + /// Worker function called for GDepth/GImage type image extraction. + /// @param xmp_info_type The type of image to extract. + /// @param image_destination The DataDestination to receive the image data. + /// @return True if an image was extracted. + bool ExtractImage(JpegXmpInfo::Type xmp_info_type, + DataDestination* image_destination); + + /// Worker function called for Apple depth/matte type image extraction. + /// @param image_range The range of the image data to extract. If invalid, + /// the image_destination's StartTransfer/FinishTransfer functions are + /// still called, and this function will return true (i.e., zero bytes + /// "successfully" transferred). + /// @param image_destination The DataDestination to receive the image data. + /// @return True if the transfer succeeded. + bool ExtractImage(const DataRange& image_range, + DataDestination* image_destination); + + /// The jpeg info object contains the location of the Apple and Google images. + JpegInfo jpeg_info_; + + /// The data source from which the images are extracted. + DataSource* data_source_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_IMAGE_EXTRACTOR_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_info.h b/includes/image_io/jpeg/jpeg_info.h new file mode 100644 index 0000000..8aedf9e --- /dev/null +++ b/includes/image_io/jpeg/jpeg_info.h @@ -0,0 +1,153 @@ +#ifndef IMAGE_IO_JPEG_JPEG_INFO_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_INFO_H_ // NOLINT + +#include <vector> + +#include "image_io/base/data_range.h" +#include "image_io/jpeg/jpeg_segment_info.h" +#include "image_io/jpeg/jpeg_xmp_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class to represent interesting depth and image information in a JPEG file, +/// and where it is located so that it can be efficiently extracted. +class JpegInfo { + public: + JpegInfo() { JpegXmpInfo::InitializeVector(&xmp_info_vector_); } + JpegInfo(const JpegInfo&) = default; + JpegInfo& operator=(const JpegInfo&) = default; + + /// @return The vector of data ranges indicating the locations of the images. + const std::vector<DataRange>& GetImageRanges() const { return image_ranges_; } + + /// @return The vector of interesting segment info structures. + const std::vector<JpegSegmentInfo>& GetSegmentInfos() const { + return segment_infos_; + } + + /// @param image_index The image containing the sought after segment info. + /// @param type The type of segment info to get. + /// @return The segment info, or one that is invalid if not found. + JpegSegmentInfo GetSegmentInfo(size_t image_index, + const std::string& type) const { + for (const auto& segment_info : GetSegmentInfos()) { + if (segment_info.GetImageIndex() == image_index && + segment_info.GetType() == type) { + return segment_info; + } + } + return JpegSegmentInfo(0, DataRange(), ""); + } + + /// @return True if there is Apple depth information. + bool HasAppleDepth() const { return apple_depth_image_range_.IsValid(); } + + /// @return True if there is Apple matte information. + bool HasAppleMatte() const { return apple_matte_image_range_.IsValid(); } + + /// @return True if there is GDepth type depth information. + bool HasGDepth() const { + return HasImage(JpegXmpInfo::kGDepthInfoType); + } + + /// @return True if there is GImage information. + bool HasGImage() const { + return HasImage(JpegXmpInfo::kGImageInfoType); + } + + /// @return True if there is either Apple or GDepth information. + bool HasDepth() const { return HasAppleDepth() || HasGDepth(); } + + /// @return True if there is an extratable image present. + bool HasExtractableImage() const { + return HasAppleDepth() || HasAppleMatte() || HasGDepth() || HasGImage(); + } + + /// @param xmp_info_type The type of xmp image information desired. + /// @return True if there is information of the given type. + bool HasImage(JpegXmpInfo::Type xmp_info_type) const { + return !GetSegmentDataRanges(xmp_info_type).empty(); + } + + /// @return The DataRange where the Apple depth information is located. + const DataRange& GetAppleDepthImageRange() const { + return apple_depth_image_range_; + } + + /// @return The DataRange where the Apple matte information is located. + const DataRange& GetAppleMatteImageRange() const { + return apple_matte_image_range_; + } + + /// @param type The type of Xmp data to get the mime type of. + /// @return The mime type for the Xmp data of the given type. + std::string GetMimeType(JpegXmpInfo::Type type) const { + return xmp_info_vector_[type].GetMimeType(); + } + + /// @param type The type of Xmp data to get the segment data ranges of. + /// @return The segment data ranges containing the Xmp data of the given type. + const std::vector<DataRange>& GetSegmentDataRanges( + JpegXmpInfo::Type type) const { + return xmp_info_vector_[type].GetSegmentDataRanges(); + } + + /// Adds a DataRange to the vector of image DataRanges. + /// @param image_range The data range of an image. + void AddImageRange(const DataRange& image_range) { + image_ranges_.push_back(image_range); + } + + /// Adds a JpegSegmentInfo to the vector of JpegSegmentInfos. + /// @param jpeg_segment_info The info structure to add. + void AddSegmentInfo(const JpegSegmentInfo& segment_info) { + segment_infos_.push_back(segment_info); + } + + /// @param data_range The DataRange where Apple depth information is located. + void SetAppleDepthImageRange(const DataRange& data_range) { + apple_depth_image_range_ = data_range; + } + + /// @param data_range The DataRange where Apple matte information is located. + void SetAppleMatteImageRange(const DataRange& data_range) { + apple_matte_image_range_ = data_range; + } + + /// @param type The type of Xmp data to set the mime type of. + /// @param mime_type The mime type of the Xmp data. + void SetMimeType(JpegXmpInfo::Type type, const std::string& mime_type) { + xmp_info_vector_[type].SetMimeType(mime_type); + } + + /// @param type The type of Xmp data to set segment data ranges of. + /// @param segment_data_ranges The segment that contain the Xmp data. + void SetSegmentDataRanges(JpegXmpInfo::Type type, + const std::vector<DataRange>& segment_data_ranges) { + xmp_info_vector_[type].SetSegmentDataRanges(segment_data_ranges); + } + + private: + /// The DataRanges of all images. + std::vector<DataRange> image_ranges_; + + /// Interesting segment information. Currently information about APP0/JFIF, + /// APP1/EXIF and APP2/MPF segments are saved here. + std::vector<JpegSegmentInfo> segment_infos_; + + /// The DataRange of the Apple depth information. + DataRange apple_depth_image_range_; + + /// The DataRange of the Apple depth information. + DataRange apple_matte_image_range_; + + /// A vector holding information about the Xmp segments containing GDepth and + /// GImage data. + std::vector<JpegXmpInfo> xmp_info_vector_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_INFO_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_info_builder.h b/includes/image_io/jpeg/jpeg_info_builder.h new file mode 100644 index 0000000..ee4d611 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_info_builder.h @@ -0,0 +1,133 @@ +#ifndef IMAGE_IO_JPEG_JPEG_INFO_BUILDER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_INFO_BUILDER_H_ // NOLINT + +#include <set> +#include <string> +#include <vector> + +#include "image_io/base/data_range.h" +#include "image_io/jpeg/jpeg_info.h" +#include "image_io/jpeg/jpeg_segment_processor.h" +#include "image_io/jpeg/jpeg_xmp_info_builder.h" + +namespace photos_editing_formats { +namespace image_io { + +/// JpegInfoBuilder is JpegSegmentProcessor that collects the location and type +/// of depth information in the JPEG file so that subsequent operations can +/// efficiently maniuplate it. +class JpegInfoBuilder : public JpegSegmentProcessor { + public: + JpegInfoBuilder(); + + /// @return The JpegInfo with the depth information obtained from the + /// scanner as a result of processing the segments it processes. + const JpegInfo& GetInfo() const { return jpeg_info_; } + + /// @param image_limit The max number of images to process. By default there + /// is no limit on the number of images processed. + void SetImageLimit(int image_limit) { image_limit_ = image_limit; } + + /// By default the info builder does not capture the value of the segment in + /// the segment infos contained in the @c JpegInfo object. Call this function + /// to capture the bytes of the indicated segment types. + /// @param type The type of segment info to capture the value of. + void SetCaptureSegmentBytes(const std::string& segment_info_type); + + void Start(JpegScanner* scanner) override; + void Process(JpegScanner* scanner, const JpegSegment& segment) override; + void Finish(JpegScanner* scanner) override; + + private: + /// @return True if the data members indicate Apple depth is present. + bool HasAppleDepth() const; + + /// @return True if the data members indicate Apple matte is present. + bool HasAppleMatte() const; + + /// @return True if the segment is a primary Xmp segment. + bool IsPrimaryXmpSegment(const JpegSegment& segment) const; + + /// @return True if the segment is an extended Xmp segment. + bool IsExtendedXmpSegment(const JpegSegment& segment) const; + + /// @return True if the segment is an Mpf segment. + bool IsMpfSegment(const JpegSegment& segment) const; + + /// @return True if the segment is an Exif segment. + bool IsExifSegment(const JpegSegment& segment) const; + + /// @return True if the segment is an Jfif segment. + bool IsJfifSegment(const JpegSegment& segment) const; + + /// Captures the segment bytes into the a JpegSegmentInfo's byte vector if + /// the SetCaptureSegmentBytes() has been called for the segment info type. + /// @param type The type of segment info being processed. + /// @param segment The segment being processed. + /// @param bytes A vector to hold the segment bytes. + void MaybeCaptureSegmentBytes(const std::string& type, + const JpegSegment& segment, + std::vector<Byte>* bytes) const; + + /// @return True if the segment's extended xmp guid matches the one from the + /// primary xmp segment. + bool HasMatchingExtendedXmpGuid(const JpegSegment& segment) const; + + /// @return True if the segment contains the given id. + bool HasId(const JpegSegment& segment, const char* id) const; + + /// Sets the primary segment guid value using properties in the given segment. + /// @param The segment from which to obtain the primary xmp guid value. + void SetPrimaryXmpGuid(const JpegSegment& segment); + + /// Sets the Xmp mime type using property values in the given segment. + /// @param The segment from which to obtain the mime property value. + /// @param xmp_info_type The type of xmp data that determines the mime + /// property name to look for. + void SetXmpMimeType(const JpegSegment& segment, + JpegXmpInfo::Type xmp_info_type); + + /// The limit on the number of images to process. After this many images have + /// been found, the Process() function will tell the JpegScanner to stop. + int image_limit_; + + /// The number of images encountered in the JPEG file so far. + int image_count_; + + /// The number of APP2/MPF segments encountered per image. One criterial used + /// to determine if Apple depth data is present is that the first image has + /// an APP2/MPF segment. + std::vector<int> image_mpf_count_; + + /// The number of APP1/XMP segments encountered per image. Another criteria + /// used to determine if Apple depth data is present is that the second or + /// following image contains one of these segments. + std::vector<int> image_xmp_apple_depth_count_; + + /// The number of APP1/XMP segments encountered per image. Another criteria + /// used to determine if Apple matte data is present is that the second or + /// following image contains one of these segments. + std::vector<int> image_xmp_apple_matte_count_; + + /// The DataRange of the most recent SOI type segment. This is used to compute + /// the range of the image that represents the Apple depth data. + DataRange most_recent_soi_marker_range_; + + /// The GUID value of the APP1/XMP segments that contain GDepth/GImage data. + std::string primary_xmp_guid_; + + /// Builder helpers for gdepth and gimage xmp type segments. + JpegXmpInfoBuilder gdepth_info_builder_; + JpegXmpInfoBuilder gimage_info_builder_; + + /// The collected data describing the type/location of data in the JPEG file. + JpegInfo jpeg_info_; + + /// The types of the segment info type to capture the bytes of. + std::set<std::string> capture_segment_bytes_types_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_INFO_BUILDER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_marker.h b/includes/image_io/jpeg/jpeg_marker.h new file mode 100644 index 0000000..507382d --- /dev/null +++ b/includes/image_io/jpeg/jpeg_marker.h @@ -0,0 +1,86 @@ +#ifndef IMAGE_IO_JPEG_JPEG_MARKER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_MARKER_H_ // NOLINT + +#include <bitset> +#include <string> + +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The size of the array that would be needed to reference all marker types. +const size_t kJpegMarkerArraySize = 256; + +/// A JpegMarker begins each JpegSegment in a JPEG file. The first byte of a +/// marker is 0xFF, and the second byte is the marker type value. Bytes with +/// values 0x00 and 0xFF indicate not a JpegMarker, but a zero byte or fill +/// byte, respectively. That is the sequence FF00 must be interpreted as a +/// single byte with a 0 value. The specification says that multiple fill bytes +/// may appear before a valid marker start: FFFFFFDA - the leading FFFF should +/// be ignored. +class JpegMarker { + public: + /// The length of the marker in the JPEG file. One byte for the 0xFF value, + /// and one byte for the marker type. + static const size_t kLength = 2; + + /// The offset from the start of the JpegMarker that contains the marker type. + static const size_t kTypeOffset = 1; + + /// The special byte value that may start a marker. + static const Byte kStart = 0xFF; + + /// Special marker type values referenced elsewhere in the code. + static const Byte kZERO = 0; + static const Byte kSOS = 0xDA; + static const Byte kSOI = 0xD8; + static const Byte kEOI = 0xD9; + static const Byte kAPP0 = 0xE0; + static const Byte kAPP1 = 0xE1; + static const Byte kAPP2 = 0xE2; + static const Byte kFILL = 0xFF; + + /// A set of bits, one for each type of marker. + using Flags = std::bitset<kJpegMarkerArraySize>; + + /// Creates a JpegMarker with the given type value. + explicit JpegMarker(Byte type) : type_(type) {} + + JpegMarker() = delete; + + /// Not all byte values are used to represent markers. Bytes with values 0x00 + /// and 0xFF indicate a zero byte or fill byte, respectively. + /// @return Whether this is a valid marker. + bool IsValid() const { return type_ != kZERO && type_ != kFILL; } + + /// @return The type of the marker. + Byte GetType() const { return type_; } + + /// @return The name of the marker type. + const std::string GetName() const; + + /// @param prefix A prefix for the returned string. + /// @return The <prefix>XX hex string representation of the type. + const std::string GetHexString(const std::string& prefix) const; + + /// Some markers have two extra bytes that indicate the size of the segment's + /// data payload. See https://www.w3.org/Graphics/JPEG/itu-t81.pdf, Table B-2. + /// @return Whether this marker type has such a variable length payload. + bool HasVariablePayloadSize() const; + + /// Some markers are delimiters in an otherwise continuous stream of bytes in + /// the JPEG file. See https://www.w3.org/Graphics/JPEG/itu-t81.pdf, Section + /// B.2.1. + /// @return Whether this is an entropy segment delimiter marker. + bool IsEntropySegmentDelimiter() const; + + private: + /// The type value of the marker. + Byte type_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_MARKER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_scanner.h b/includes/image_io/jpeg/jpeg_scanner.h new file mode 100644 index 0000000..0ab0488 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_scanner.h @@ -0,0 +1,100 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SCANNER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SCANNER_H_ // NOLINT + +#include <memory> + +#include "image_io/base/data_segment.h" +#include "image_io/base/data_source.h" +#include "image_io/jpeg/jpeg_marker.h" +#include "image_io/jpeg/jpeg_segment_processor.h" + +namespace photos_editing_formats { +namespace image_io { + +/// JpegScanner reads DataSegments from a DataSource, finds interesting +/// JpegSegments and passes them on to a JpegSegmentProcessor for further +/// examination. +class JpegScanner { + public: + JpegScanner() + : data_source_(nullptr), + segment_processor_(nullptr), + current_location_(0), + done_(false), + has_error_(false) {} + + /// Called to start and run the scanner. + /// @param data_source The DataSource from which to obtain DataSegments. + /// @param segment_processor The processor of the JpegSegment instances. + void Run(DataSource* data_source, JpegSegmentProcessor* segment_processor); + + /// If the JpegSegmentProcessor determines that it has seen enough JpegSegment + /// instances, it can call this function to terminate the scanner prematurely. + void SetDone() { done_ = true; } + + /// @return True if the done flag was set by SetDone(), else false. + bool IsDone() const { return done_; } + + /// @return True if the scanner encountered errors. + bool HasError() const { return has_error_; } + + /// @return The DataSource from which DataSegments are being read. + DataSource* GetDataSource() const { return data_source_; } + + /// JpegSegmentProcessor instances can call this function to inform the + /// scanner about the types of JpegSegment instances it is interested in. + /// The JpegScanner will not send any uninteresting segments to the processor. + void UpdateInterestingMarkerFlags(const JpegMarker::Flags& marker_flags) { + interesting_marker_flags_ = marker_flags; + } + + private: + /// Called from the Run() function to do the heavy lifting. + void FindAndProcessSegments(); + + /// @param marker The marker of the JpegSegment under construction. + /// @param begin_location The start of the JpegSegment under construction. + /// @return The size of the segment payload of given marker type that starts + /// at the specified location. + size_t GetPayloadSize(const JpegMarker& marker, size_t begin_location); + + /// @return The validated byte value at the given location. + ValidatedByte GetValidatedByte(size_t location); + + /// Calls GetValidatedByte() and returns its value if the byte is valid, else + /// sets the get_byte_error_ flag. + /// @return the byte value at the given location, or 0 if the byte is invalid. + Byte GetByte(size_t location); + + /// Asks the DataSource for the next DataSegment. + void GetNextSegment(); + + private: + /// The DataSource from which DataSegments are obtained. + DataSource* data_source_; + + /// The JpegSegmentProcessor to which JpegSegments are sent. + JpegSegmentProcessor* segment_processor_; + + /// The JpegSegment types of interest to the JpegSegmentProcessor. + JpegMarker::Flags interesting_marker_flags_; + + /// Depending on the DataSource, a given JpegSegment may span up to two + /// DataSegments. These are they. + std::shared_ptr<DataSegment> current_segment_; + std::shared_ptr<DataSegment> next_segment_; + + /// The current location of the scanner in the DataSource. + size_t current_location_; + + /// A flag that indicates the scanner is done, naturally or prematurely. + bool done_; + + /// A flag that indicates an error occurred while getting Byte data. + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SCANNER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment.h b/includes/image_io/jpeg/jpeg_segment.h new file mode 100644 index 0000000..c4a79fd --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment.h @@ -0,0 +1,178 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_H_ // NOLINT + +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" +#include "image_io/jpeg/jpeg_marker.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A JpegSegment is an entity in a JPEG file that starts with a JpegMarker and +/// is followed by zero or more payload bytes. The JpegSegment has a DataRange +/// that indicates the position of the segment in the originating DataSource. +/// A JpegScanner obtains DataSegment instances from a DataSource in such a way +/// that it can guarantee that a JpegSegment will span at most two DataSegment +/// instances. Clients of JpegSegment need not be concerned with the number of +/// underlying DataSegments if they use the member functions defined here to +/// access the segment's bytes. +class JpegSegment { + public: + /// If a JpegSegment has a variable length data payload, the payload data is + /// located at this offset from the start of the payload. + static constexpr size_t kVariablePayloadDataOffset = 2; + + /// Constructs a JpegSegment starting and ending at the indicated points in + /// the given DataSegment instances, the second of which may be null. + /// @param begin The start of JpegSegment range. + /// @param end The end of JpegSegment range. + /// @param begin_segment The DataSegment that contains the begin location of + /// the JpegSegment and the end if the end_segment is null. + /// @param end_segment The DataSegment that contains the end location of the + /// JpegSegment if it is not null. + JpegSegment(size_t begin, size_t end, const DataSegment* begin_segment, + const DataSegment* end_segment) + : data_range_(begin, end), + begin_segment_(begin_segment), + end_segment_(end_segment){} + ~JpegSegment() = default; + + /// @return The DataRange of the data in the segment. + const DataRange& GetDataRange() const { return data_range_; } + + /// @return The begin location of the segment's data range. + size_t GetBegin() const { return data_range_.GetBegin(); } + + /// @return The end location of the segment's data range. + size_t GetEnd() const { return data_range_.GetEnd(); } + + /// @return The length of the segment's data range. + size_t GetLength() const { return data_range_.GetLength(); } + + /// @return True if the segment's range contains the location, else false. + bool Contains(size_t location) const { + return data_range_.Contains(location); + } + + /// @return The location of the segment's JpegMarker. + size_t GetMarkerLocation() const { return GetBegin(); } + + /// @return The location of the segment's payload, which includes the payload + /// length if applicable for the type of segment. + size_t GetPayloadLocation() const { return GetBegin() + JpegMarker::kLength; } + + /// @return The location of the segment's payload's data. + size_t GetPayloadDataLocation() const { + return GetMarker().HasVariablePayloadSize() + ? GetPayloadLocation() + kVariablePayloadDataOffset + : GetPayloadLocation(); + } + + /// @param The location at which to obtain the byte value. + /// @return The validated byte value at the location, or 0/false if the + /// segment's range does not contain the location. + ValidatedByte GetValidatedByte(size_t location) const { + return DataSegment::GetValidatedByte(location, begin_segment_, + end_segment_); + } + + /// @return The payload size or zero if the segment's marker indicates the + /// segment does not have a payload. The payload size includes the two + /// bytes that encode the length of the payload. I.e., the payload data + /// size is two less than the value returned by this function. + size_t GetVariablePayloadSize() const; + + /// @param location The start location of the compare operation. + /// @param str The string to compare the bytes with. + /// @return True if the segment's bytes at the given location equals the str. + bool BytesAtLocationStartWith(size_t location, const char* str) const; + + /// @param location The start location of the search operation. + /// @param str The string to search for. + /// @return True if the segment's contains the string, starting at location. + bool BytesAtLocationContain(size_t location, const char* str) const; + + /// @param start_location The location at which to start the search. + /// @param value The byte value to search for. + /// @return The location in the segment's bytes of the next occurrence of the + /// given byte value, starting at the indicated location, or the segment's + /// range's GetEnd() location if not found. + size_t Find(size_t start_location, Byte value) const; + + /// @param start_location The location at which to start the search. + /// @param str The string to search for. + /// @return the location in the segment's bytes of the next occurrence of the + /// given string value, starting at the indicated location, or the + /// segment's range's GetEnd() location if not found. + size_t Find(size_t location, const char* str) const; + + /// XMP property names have the syntax property_name="property_value". + /// @param segment The segment in which to look for the property name/value. + /// @param start_location Where to start looking for the property name. + /// @param property_name The name of the property to look for. + /// @return The string value associated with the xmp property name, or an + /// empty string if the property was not found. + std::string ExtractXmpPropertyValue(size_t start_location, + const char* property_name) const; + + /// XMP property names have the syntax property_name="property_value". + /// @start_location The location in the segment to begin looking for the + /// property_name=" syntax. + /// @return The location of the next byte following the quote, or GetEnd() if + /// the property_name=" syntax was not found. + size_t FindXmpPropertyValueBegin(size_t start_location, + const char* property_name) const; + + /// XMP property names have the syntax property_name="property_value". + /// @start_location The location in the segment to begin looking for the final + /// quote of the property value. + /// @return The location of quote that terminates the property_value, or + /// GetEnd() if the final quote was not found. + size_t FindXmpPropertyValueEnd(size_t start_location) const; + + /// @param The DataRange to use to extract a string from the segment's bytes. + /// @return The string extracted from the segment at locations indicated by + /// the data_range, or an empty string if the data_range is not contained + /// in the segment's range, or any invalid or zero bytes are encountered. + std::string ExtractString(const DataRange& data_range) const; + + /// @return the JpegMarker of this segment. + JpegMarker GetMarker() const { + size_t marker_type_location = GetMarkerLocation() + 1; + // An invalid ValidatedByte has a value of 0, and a JpegMarker with a 0 + // type value is invalid, so its ok to just grab the ValidatedByte's value. + return JpegMarker(GetValidatedByte(marker_type_location).value); + } + + /// Fills two strings with byte_count bytes from the start of the segment's + /// payload in a form suitable for creating a "hex dump" of the segment. Note + /// that if the jpeg segment has a entropy delimiter type marker, there is + /// technically no payload to dump. However in this case, as long as a valid + /// byte can be obtained from the jpeg segment's underlying data segments, a + /// byte value will be dumped to the strings. + /// @param byte_count The number of bytes to dump from the segment's payload. + /// @param hex_string A string that will be at most 2 * byte_count in length + /// that will contain the hex values of the bytes. + /// @param ascii_string A string that will be at most byte_count in length + /// that will contain the printable character of the bytes, or a '.' for + /// non-printable byte values. + void GetPayloadHexDumpStrings(size_t byte_count, std::string* hex_string, + std::string* ascii_string) const; + + private: + /// The DataRange of the JpegSegment. + DataRange data_range_; + + /// The DataSegment that contains the begin of the range and possibly the + /// end. This DataSegment will never be null. + const DataSegment* begin_segment_; + + /// The DataSegment, that if not null, will contain the end location of the + /// JPegSegment's DataRange. + const DataSegment* end_segment_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment_builder.h b/includes/image_io/jpeg/jpeg_segment_builder.h new file mode 100644 index 0000000..e8e714f --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment_builder.h @@ -0,0 +1,140 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_BUILDER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_BUILDER_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/base/byte_buffer.h" +#include "image_io/jpeg/jpeg_xmp_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A helper to assemble the data in a JpegSegment. Currently this is only used +/// for testing purposes, but in the future may prove useful in the image_io +/// library itself. +class JpegSegmentBuilder { + public: + /// Sets the payload size value of the JpegSegment data in the byte buffer. + /// This function assumes that the byte buffer contains the data for exactly + /// one JpegSegment, and that the segment type has a variable payload size. + /// The byte buffer must have a size in the range [4:65535] for this to work. + /// @param byte_buffer The data defining the JpegSegment. + /// @return Whether the byte buffer's size was valid and the payload size set. + static bool SetPayloadSize(ByteBuffer* byte_buffer); + + /// @return The vector of ByteData. + const std::vector<ByteData>& GetByteData() const { return byte_data_; } + + /// @return The concatenated string values of all byte data, or an empty + /// string if there are invalid byte data entries. Note that the string + /// may have embedded null characters if there are any kAscii0 type + /// byte data elements present. + std::string GetByteDataValues() const; + + /// Adds the byte data to the vector. + /// @param byte_data The byte data to add. + void AddByteData(const ByteData& byte_data) { + byte_data_.push_back(byte_data); + } + + /// Adds a segment marker of the given type and payload size. + /// @param marker_type The type of segment marker to add. + /// @param size The size of the payload if the marker has a variable + /// size payload. This value must be in the range [2:65535], although no + /// check is performed to ensure that is the case. + void AddMarkerAndSize(Byte marker_type, size_t size); + + /// Adds a segment marker of the given type, and "0000" placeholder value if + /// the type has a variable payload size. The SetSizePlaceholder() function + /// can be called later to set the actual size of the segment. + /// @param marker_type The type of segment marker to add. + /// @return The index in the vector of ByteData where the marker was added. + size_t AddMarkerAndSizePlaceholder(Byte marker_type); + + /// Replacess the size of the segment marker that was previously added using + /// the AddMarkerAndSizePlaceholder() function. The first two bytes of the + /// ByteData at the given index must represent a valid JpegMarker that has + /// a variable length payload size. + /// @param index The index in the vector of ByteData set the size of. + /// @param size The size of the segment, including the size field itself. + /// This value must be in the range [2:65535]. + /// @return Whether the size was set successfully. + bool ReplaceSizePlaceholder(size_t index, size_t size); + + /// Adds the bytes that define an XMP header. + /// @param xmp_guid The guid value of the XMP data. If this value is not 32 + /// bytes long, it is either truncated or extended with 0s. + void AddExtendedXmpHeader(const std::string& xmp_guid); + + /// Adds the XMP syntax that appears at the start of an XMP segment. This + /// syntax appears after the XMP header in a segment, so this function should + /// be called after the AddExtendedXmpHeader() function. + void AddXmpMetaPrefix(); + + /// Adds the XMP syntax that appears at the end of an XMP segment. This syntax + /// finishes the XMP data, so it should be the last function called when + /// assembling the data for such a segment. + void AddXmpMetaSuffix(); + + /// Adds the RDF prefix that appears within the body of an XMP segment. This + /// syntax should be added before any XMP property names and values are added. + void AddRdfPrefix(); + + /// Adds the RDF suffix that appears within the body of an XMP segment. This + /// syntax should be added after all XMP property names and values are added. + void AddRdfSuffix(); + + /// Adds the RDF:Description prefix that appears within the body of an XMP + /// segment. This syntax should be added after the RDF prefix is added, but + /// before any XMP property names and values are added. + void AddRdfDescriptionPrefix(); + + /// Adds the RDF:Description suffix that appears within the body of an XMP + /// segment. This syntax should be added after after all XMP property names + /// and values are added, but before the RDF syntax is added. + void AddRdfDescriptionSuffix(); + + /// Adds the property name, and the '="' string that defines + /// the start of the name="value" string. After this call, you can + /// add the property value to the byte data vector, and then call the + /// AddXmpPropertySuffix() function to finish the definition. + /// @param property_name The name of the property to add. + void AddXmpPropertyPrefix(const std::string& property_name); + + /// Adds a final quote to finish off the definition of a name="value" string. + void AddXmpPropertySuffix(); + + /// Adds the name="value" strings to define the XMP property name and value. + /// @param property_name The name of the property to add. + /// @param property_value The value of the property to add. + void AddXmpPropertyNameAndValue(const std::string& property_name, + const std::string& property_value); + + /// Adds segment marker and the extended XMP header for an APP1/XMP type + /// segment that as extended XMP data. After this call you can either all the + /// AddXmpAndRdfPrefixes() function (if this is the first extended segment, or + /// just continue adding the property value contained in this segment. + /// @param xmp_guid The guid value of the XMP data. If this value is not 32 + /// bytes long, it is either truncated or extended with 0s. + void AddApp1XmpMarkerAndXmpExtendedHeader(const std::string& xmp_guid); + + /// Adds segment marker and all the prefixes to start the xmpmeta/rdf section + /// of the segment. After this call property names and values can be added, + /// and optionally the section can be completed by calling the + /// AddXmpAndRdfSuffixes() function. + void AddXmpAndRdfPrefixes(); + + /// Adds the suffixes to complete the definition of an APP1/XMP segment. Call + /// this function after the AddApp1XmpPrefixes() and after adding property + /// names and values to the byte data. + void AddXmpAndRdfSuffixes(); + + private: + std::vector<ByteData> byte_data_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_BUILDER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment_info.h b/includes/image_io/jpeg/jpeg_segment_info.h new file mode 100644 index 0000000..6fb25db --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment_info.h @@ -0,0 +1,85 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_INFO_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_INFO_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/base/data_range.h" +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// Interesting segment types. +const char kExif[] = "Exif"; +const char kJfif[] = "JFIF"; +const char kMpf[] = "MPF"; + +/// A class that holds interesting information about a JpegSegment. +class JpegSegmentInfo { + public: + /// @param image_index The index of the image in a @c DataSource that contains + /// the segment. + /// @param data_range The range in the segment in the @c DataSource. + /// @param type The type of segment. + JpegSegmentInfo(size_t image_index, const DataRange& data_range, + const std::string& type) + : image_index_(image_index), data_range_(data_range), type_(type) {} + + /// Constructs an empty, invalid segment info. + JpegSegmentInfo() : image_index_(0) {} + + JpegSegmentInfo(const JpegSegmentInfo&) = default; + JpegSegmentInfo& operator=(const JpegSegmentInfo&) = default; + + /// @param rhs The segment info to compare with this one. + /// @return Whether the segment infos are equal + bool operator==(const JpegSegmentInfo& rhs) const { + return image_index_ == rhs.image_index_ && data_range_ == rhs.data_range_ && + type_ == rhs.type_ && bytes_ == rhs.bytes_; + } + + /// @param rhs The segment info to compare with this one. + /// @return Whether the segment infos are not equal + bool operator!=(const JpegSegmentInfo& rhs) const { + return !(*this == rhs); + } + + /// @return Whether the segment info is valid. + bool IsValid() const { return !type_.empty() && data_range_.IsValid(); } + + /// @return The image index of the segment info. + size_t GetImageIndex() const { return image_index_; } + + /// @return The data range of the segment info. + const DataRange& GetDataRange() const { return data_range_; } + + /// @return The type of the segment info. + const std::string& GetType() const { return type_; } + + /// @return The (optional) bytes of the segment to which the info refers. The + /// vector will be empty unless the GetMutableBytes() function has been + /// and the vector filled with the segment contents. + const std::vector<Byte>& GetBytes() const { return bytes_; } + + /// @return A non-const pointer to the bytes vector. + std::vector<Byte>* GetMutableBytes() { return &bytes_; } + + private: + // The image index where the segment is located. + size_t image_index_; + + // The data range of the segment. + DataRange data_range_; + + // The type of segment. + std::string type_; + + // The (optional) bytes of the segment. + std::vector<Byte> bytes_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_INFO_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment_lister.h b/includes/image_io/jpeg/jpeg_segment_lister.h new file mode 100644 index 0000000..ca2a19d --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment_lister.h @@ -0,0 +1,35 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_LISTER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_LISTER_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/jpeg/jpeg_segment_processor.h" + +namespace photos_editing_formats { +namespace image_io { + +/// JpegSegmentLister is an implementation of JpegSegmentProcesor that creates +/// a listing (in the form of a vector of strings) describing the segments. +class JpegSegmentLister : public JpegSegmentProcessor { + public: + JpegSegmentLister(); + void Start(JpegScanner* scanner) override; + void Process(JpegScanner* scanner, const JpegSegment& segment) override; + void Finish(JpegScanner* scanner) override; + + /// @return The lines representing the listing of the segments. + const std::vector<std::string>& GetLines() const { return lines_; } + + private: + /// The number of occurences of the various segment types. + std::vector<int> marker_type_counts_; + + /// The lines representing the listing output. + std::vector<std::string> lines_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_LISTER_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_segment_processor.h b/includes/image_io/jpeg/jpeg_segment_processor.h new file mode 100644 index 0000000..a193797 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_segment_processor.h @@ -0,0 +1,44 @@ +#ifndef IMAGE_IO_JPEG_JPEG_SEGMENT_PROCESSOR_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_SEGMENT_PROCESSOR_H_ // NOLINT + +#include "image_io/jpeg/jpeg_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +class JpegScanner; + +/// JpegSegmentProcessor is the abstract base class for implementations that do +/// something with the JPEG segments that the JpegScanner identifies. +class JpegSegmentProcessor { + public: + virtual ~JpegSegmentProcessor() = default; + + /// This function is called at the start of the JPegScanner::Run() function to + /// allow this JpegProcessor to initialize its data structures. It can also + /// inform the JpegScanner about preferences for the types of segments it is + /// interested in by calling the JpegScanner::UpdateInterestingMarkerFlags() + /// function. + /// @param scanner The scanner that is starting the JpegProcessor. + virtual void Start(JpegScanner* scanner) = 0; + + /// This function is called repeatedly by the JpegScanner as it identifies + /// segments in the JPEG file. The JpegProcessor can access the data in the + /// segment to do interesting things, or can update the scanner's preferences + /// like in the Start() function. + /// @param scanner The scanner that is providing the segment to the processor. + /// @param segment The segment provided by the scanner to the processor. + virtual void Process(JpegScanner* scanner, const JpegSegment& segment) = 0; + + /// This function is called after the JpegScanner has provided all the + /// segments to the JpegProcessor to allow the processor to finish its work + /// processing the segments. + /// @param scanner The scanner that is informing the processor that it is done + /// finding segments. + virtual void Finish(JpegScanner* scanner) = 0; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_SEGMENT_PROCESSOR_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_xmp_data_extractor.h b/includes/image_io/jpeg/jpeg_xmp_data_extractor.h new file mode 100644 index 0000000..30d62a1 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_xmp_data_extractor.h @@ -0,0 +1,66 @@ +#ifndef IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/jpeg/jpeg_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class that can make use of the data in a JpegInfo instance to extract +/// the xmp data JpegSegments passed to it and forward it to a DataDestination. +class JpegXmpDataExtractor : public DataDestination { + public: + /// @param xmp_info_type The type of xmp data being extracted. + /// @param segment_count The number of segment ranges over which the xmp + /// data is spread. + /// @param data_destination The destination to which the extracted xmp data + /// is to be sent. + JpegXmpDataExtractor(JpegXmpInfo::Type xmp_info_type, size_t segment_count, + DataDestination* data_destination) + : xmp_info_type_(xmp_info_type), + last_segment_index_(segment_count - 1), + data_destination_(data_destination), + has_error_(false) {} + + /// Set the current segment index to the given value. + /// @param segment_index The index of the segment currently being processed. + void SetSegmentIndex(size_t segment_index) { segment_index_ = segment_index; } + + /// @return True if there was an error in the extraction process. + bool HasError() const { return has_error_; } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + /// @return The number of bytes written not to this extractor destination, but + /// to the next destination. Returns zero if the next destination is null. + size_t GetBytesTransferred() const override { + return data_destination_ ? data_destination_->GetBytesTransferred() : 0; + } + + private: + /// The type of xmp data being extracted. + JpegXmpInfo::Type xmp_info_type_; + + /// The xmp data require special processing when the last segment is being + /// transferred. This value is the index of the last segment. + size_t last_segment_index_; + + /// The DataDestination that the extracted xmp data is sent to. + DataDestination* data_destination_; + + /// The xmp data is spread over one or more segments in the DataSource. This + /// index tracks which one is being transferred. + size_t segment_index_; + + /// A true value indicates that an error occurred in the decoding process. + bool has_error_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_XMP_DATA_EXTRACTOR_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_xmp_info.h b/includes/image_io/jpeg/jpeg_xmp_info.h new file mode 100644 index 0000000..2bda3f5 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_xmp_info.h @@ -0,0 +1,92 @@ +#ifndef IMAGE_IO_JPEG_JPEG_XMP_INFO_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_XMP_INFO_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/base/data_range.h" + +namespace photos_editing_formats { +namespace image_io { + +const size_t kXmpGuidSize = 32; +const char kXmpId[] = "http://ns.adobe.com/xap/1.0/"; +const char kXmpExtendedId[] = "http://ns.adobe.com/xmp/extension/"; +const size_t kXmpExtendedHeaderSize = + sizeof(kXmpExtendedId) + kXmpGuidSize + 2 * sizeof(std::uint32_t); + +/// Constants used to find and process information in APP1/XMP type segments. +const char kXmpAppleDepthId[] = "http://ns.apple.com/depthData/1.0"; +const char kXmpAppleMatteId[] = "http://ns.apple.com/portraitEffectsMatte/1.0/"; +const char kXmpGDepthV1Id[] = "http://ns.google.com/photos/1.0/depthmap/"; +const char kXmpGImageV1Id[] = "http://ns.google.com/photos/1.0/image/"; +const char kXmpHasExtendedId[] = "xmpNote:HasExtendedXMP"; + +/// JpegXmpInfo maintains information about the data in an Xmp property, such as +/// are used to store the GDepth and GImage data. +class JpegXmpInfo { + public: + /// The possible types of Xmp information. + enum Type { + /// GDepth:Data type information. + kGDepthInfoType, + + /// GImage:Data type information. + kGImageInfoType, + }; + + /// Initializes a vector of JpegXmpinfo instances, indexed by their type. + /// @param xmp_info_vector The vector to initialize. + static void InitializeVector(std::vector<JpegXmpInfo>* xmp_info_vector); + + /// @param xmp_info_type The type to get the identifier of. + /// @return The identfier that appears at the start of the Xmp segment. + static std::string GetIdentifier(Type jpeg_xmp_info_type); + + /// @param xmp_info_type The type to get the data property name of. + /// @return The name of the data property that appears in the Xmp segment. + static std::string GetDataPropertyName(Type jpeg_xmp_info_type); + + /// @param xmp_info_type The type to get the mime property name of. + /// @return The name of the mime property that appears in the primary + /// Xmp segment. + static std::string GetMimePropertyName(Type jpeg_xmp_info_type); + + explicit JpegXmpInfo(Type type) : type_(type) {} + JpegXmpInfo(const JpegXmpInfo&) = default; + JpegXmpInfo& operator=(const JpegXmpInfo&) = default; + + /// @return The type of the Xmp property information. + Type GetType() const { return type_; } + + /// @return The mime type of the Xmp data. + std::string GetMimeType() const { return mime_type_; } + + /// @param mime_type The mime type to assign to this instance. + void SetMimeType(const std::string& mime_type) { mime_type_ = mime_type; } + + /// @return The segment's data ranges where this Xmp data occurs. + const std::vector<DataRange>& GetSegmentDataRanges() const { + return segment_data_ranges_; + } + + /// @param The segment data ranges to assign to this instance. + void SetSegmentDataRanges(const std::vector<DataRange>& segment_data_ranges) { + segment_data_ranges_ = segment_data_ranges; + } + + private: + /// The type of the Xmp information. + Type type_; + + /// The mime type of the Xmp data. + std::string mime_type_; + + /// The segment data ranges that contain the Xmp data. + std::vector<DataRange> segment_data_ranges_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_XMP_INFO_H_ // NOLINT diff --git a/includes/image_io/jpeg/jpeg_xmp_info_builder.h b/includes/image_io/jpeg/jpeg_xmp_info_builder.h new file mode 100644 index 0000000..62b3ac4 --- /dev/null +++ b/includes/image_io/jpeg/jpeg_xmp_info_builder.h @@ -0,0 +1,42 @@ +#ifndef IMAGE_IO_JPEG_JPEG_XMP_INFO_BUILDER_H_ // NOLINT +#define IMAGE_IO_JPEG_JPEG_XMP_INFO_BUILDER_H_ // NOLINT + +#include <vector> + +#include "image_io/jpeg/jpeg_segment.h" +#include "image_io/jpeg/jpeg_xmp_info.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A helper class for building information about the segments that contain +/// extended xmp data of various types. +class JpegXmpInfoBuilder { + public: + /// @param xmp_info_type The type of xmp information to build. + explicit JpegXmpInfoBuilder(JpegXmpInfo::Type xmp_info_type) + : xmp_info_type_(xmp_info_type) {} + + /// @param segment The segment to examine for xmp data. + void ProcessSegment(const JpegSegment& segment); + + /// @return The vector of segment data ranges that contains xmp property data. + const std::vector<DataRange>& GetPropertySegmentRanges() const { + return property_segment_ranges_; + } + + private: + /// The type of xmp data to collect. + JpegXmpInfo::Type xmp_info_type_; + + /// The vector of segment data ranges that contains xmp property data. + std::vector<DataRange> property_segment_ranges_; + + /// The segment data range that contains the xmp property data end. + DataRange property_end_segment_range_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_JPEG_JPEG_XMP_INFO_BUILDER_H_ // NOLINT diff --git a/includes/image_io/tools/image_tool_function.h b/includes/image_io/tools/image_tool_function.h new file mode 100644 index 0000000..f730359 --- /dev/null +++ b/includes/image_io/tools/image_tool_function.h @@ -0,0 +1,28 @@ +#ifndef IMAGE_IO_TOOLS_IMAGE_TOOL_FUNCTION_H_ // NOLINT +#define IMAGE_IO_TOOLS_IMAGE_TOOL_FUNCTION_H_ // NOLINT + +#include <functional> +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +/// All output of the ImageTool() function and the underlying image_io functions +/// are sent to this type of function that is passed to ImageTool(). Client code +/// can use a function that writes the line to stdout or to a log file. The +/// str parameter may have embedded new line characters in it. The function +/// should not write its own new line at the end of the str. +using ImageToolOutputter = std::function<void(const std::string& str)>; + +/// The ImageTool entry point, easily callable from a main() type function. +/// @param argc The number of strings in the argv array. +/// @param argv The options and values used in the command line. +/// @param outputter A function to output the strings produced by ImageTool(). +/// @return A zero value for successful, non-zero for an error. +int ImageTool(int argc, const char* argv[], + const ImageToolOutputter& outputter); + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_TOOLS_IMAGE_TOOL_FUNCTION_H_ // NOLINT diff --git a/includes/image_io/utils/file_utils.h b/includes/image_io/utils/file_utils.h new file mode 100644 index 0000000..d1a469d --- /dev/null +++ b/includes/image_io/utils/file_utils.h @@ -0,0 +1,41 @@ +#ifndef IMAGE_IO_UTILS_FILE_UTILS_H_ // NOLINT +#define IMAGE_IO_UTILS_FILE_UTILS_H_ // NOLINT + +#include <iostream> +#include <memory> +#include <string> + +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A policy that controls whether an error is reported or not. +enum class ReportErrorPolicy { kDontReportError, kReportError }; + +/// @param file_name The name of the file to get the size in bytes of. +/// @param size A pointer to a variable to receive the size. +/// @return Whether file size was obtained properly. +bool GetFileSize(const std::string& file_name, size_t* size); + +/// @param file_name The name of the file to open for output. +/// @return An ostream pointer or nullptr if the open failed. +std::unique_ptr<std::ostream> OpenOutputFile( + const std::string& file_name, ReportErrorPolicy report_error_policy); + +/// @param file_name The name of the file to open for input. +/// @return An istream pointer or nullptr if the open failed. +std::unique_ptr<std::istream> OpenInputFile( + const std::string& file_name, ReportErrorPolicy report_error_policy); + +/// Opens the named file for input, gets its size, and reads the entire contents +/// into a data segment that is returned to the caller. +/// @param file_name The name of the file to open for input. +/// @return A DataSegment pointer or nullptr if the open and reading failed. +std::shared_ptr<DataSegment> ReadEntireFile( + const std::string& file_name, ReportErrorPolicy report_error_policy); + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_UTILS_FILE_UTILS_H_ // NOLINT diff --git a/includes/image_io/xml/xml_action.h b/includes/image_io/xml/xml_action.h new file mode 100644 index 0000000..dce6c7d --- /dev/null +++ b/includes/image_io/xml/xml_action.h @@ -0,0 +1,57 @@ +#ifndef IMAGE_IO_XML_XML_ACTION_H_ // NOLINT +#define IMAGE_IO_XML_XML_ACTION_H_ // NOLINT + +#include <functional> + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_handler_context.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlActionContext; +class XmlTerminal; + +/// The definition for an action function associated with an XmlTerminal. +/// If the action does not need to change the result of the terminal, it can +/// simply return the value from XmlActionContext::GetResult(). +using XmlAction = + std::function<DataMatchResult(const XmlActionContext& context)>; + +/// The data context passed from an XmlTerminal to its action function. +class XmlActionContext : public XmlHandlerContext { + public: + XmlActionContext(const XmlHandlerContext& context, XmlTerminal* terminal, + const DataMatchResult& result) + : XmlHandlerContext(context), terminal_(terminal), result_(result) {} + XmlActionContext(size_t location, const DataRange& range, + const DataSegment& segment, const DataLineMap& data_line_map, + XmlHandler* handler, XmlTerminal* terminal, + const DataMatchResult& result) + : XmlHandlerContext(location, range, segment, data_line_map, handler), + terminal_(terminal), + result_(result) {} + + /// @return The terminal associated with the context. + XmlTerminal* GetTerminal() const { return terminal_; } + + /// @return The result associated with the constext. + const DataMatchResult& GetResult() const { return result_; } + + /// @param bytes_consumed The value to set in the returned result. + /// @return A result based on the context's action, but with its bytes + /// consumed value set to the given value. + DataMatchResult GetResultWithBytesConsumed(size_t bytes_consumed) const { + auto result = result_; + return result.SetBytesConsumed(bytes_consumed); + } + + private: + XmlTerminal* terminal_; + DataMatchResult result_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ACTION_H_ // NOLINT diff --git a/includes/image_io/xml/xml_attribute_rule.h b/includes/image_io/xml/xml_attribute_rule.h new file mode 100644 index 0000000..564af07 --- /dev/null +++ b/includes/image_io/xml/xml_attribute_rule.h @@ -0,0 +1,33 @@ +#ifndef IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlAttributeRule parses the following syntax: +/// S? Name S? = S? 'Value' +/// S? Name S? = S? "Value" +class XmlAttributeRule : public XmlRule { + public: + XmlAttributeRule(); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's AttributeName() function. + /// @param context The action context from the name terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleName(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's AttributeValue() function. + /// @param context The action context from the quoted string terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleValue(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_cdata_and_comment_rules.h b/includes/image_io/xml/xml_cdata_and_comment_rules.h new file mode 100644 index 0000000..0cc2e50 --- /dev/null +++ b/includes/image_io/xml/xml_cdata_and_comment_rules.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT +#define IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlCdataRule parses the following syntax "<![CDATA[ ... ]]>". +/// As mentioned in the comments for the XmlHandler::Cdata() function, the token +/// value that is passed to the handler never includes the leading "<![CDATA[" +/// syntax and always includes the trailing "]]>" syntax. This considerably +/// simplifies the parsing task. The alternate start point constructor is used +/// by the XmlCdataOrCommentRule. +class XmlCdataRule : public XmlRule { + public: + XmlCdataRule(); + explicit XmlCdataRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Cdata() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleCdataValue(const XmlActionContext& context); +}; + +/// The XmlCommentRule parses the following syntax "<!-- ... -->". +/// As mentioned in the comments for the XmlHandler::Comment() function, the +/// token value that is passed to the handler never includes the leading "<!--" +/// syntax and always includes the trailing "-->" syntax. This considerably +/// simplifies the parsing task. The alternate start point constructor is used +/// by the XmlCdataOrCommentRule. +class XmlCommentRule : public XmlRule { + public: + XmlCommentRule(); + explicit XmlCommentRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Comment() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleCommentValue(const XmlActionContext& context); +}; + +/// This rule will use chain delegation to start either the XmlCdataRule or the +/// XmlCommentRule, depending on the text being parsed. The syntax for XML is +/// pretty poor here - the parser needs to look ahead two characters from the < +/// character to determine what to do. The alternate start point constructor is +/// used by the XmlElementContentRule. +class XmlCdataOrCommentRule : public XmlRule { + public: + XmlCdataOrCommentRule(); + explicit XmlCdataOrCommentRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and creates the + /// XmlCdataRule or XmlCommentRule to chain to depending on what character + /// follows the exclamation point of the "<!" syntax. + /// @param context The action context from the rule's terminal. + /// @return The result value from the action context. + DataMatchResult HandlePostBangChar(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT diff --git a/includes/image_io/xml/xml_element_rules.h b/includes/image_io/xml/xml_element_rules.h new file mode 100644 index 0000000..f40f370 --- /dev/null +++ b/includes/image_io/xml/xml_element_rules.h @@ -0,0 +1,92 @@ +#ifndef IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT +#define IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlElementRule parses the following syntax: +/// Element ::= EmptyElemTag | STag content ETag +/// EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' +/// STag ::= '<' Name (S Attribute)* S? '>' +/// ETag ::= '</' Name S? '>' +/// The Attribute syntax is parsed by XmlAttributeRule, which this rule +/// delegates to as a child rule. The EmptyElemTag type syntax is handled by +/// this rule. The STag part of the syntax is handled by this rule, but the +/// element contents and the ETag syntax is handled by the XmlElementContentRule +/// that is chained to by this rule. +class XmlElementRule : public XmlRule { + public: + XmlElementRule(); + explicit XmlElementRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's StartElement() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleName(const XmlActionContext& context); + + /// Handles the book keeping after parsing the whitespace following the name + /// of the element, basically looking ahead to see if an XmlAttributeRule has + /// to be delegated to as a child rule, or if the element ends. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandlePostWhitespaceChar(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's FinishElement() function in response to the final literal in + /// the EmptyElemTag type sytax. As written in the comment for the XmlHandler + /// FinishElement() function, the token context passed to the handler in this + /// case will have an invalid range and a XmlPortion value of kNone - i.e., + /// the element name is not available tfor this form of the element syntax. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleEmptyElemTagEnd(const XmlActionContext& context); + + /// Handles the book keeping after parsing the final ">" literal of the STag + /// syntax of the rule, creating an XmlElementContentRule for use as a chained + /// to rule. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandleSTagEnd(const XmlActionContext& context); +}; + +/// The XmlElementContentRule parses the following syntax: +/// (c? Element | PI | CDATA | Comment )+ ETag +/// The "c?" syntax represents the character data passed to the XmlHandler's +/// ElementContent() function. The syntax for Element, PI, CDATA and Comment +/// all cause a child rule to be created and delegated to. The ETag syntax will +/// cause this element to be finished with a DataMatchResult type of kFull. +class XmlElementContentRule : public XmlRule { + public: + XmlElementContentRule(); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's ElementContent() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleContent(const XmlActionContext& context); + + /// Handles the book keeping after parsing the element's content characters, + /// and the first character literal ("<") of the Element, PI, CDATA or Comment + /// syntax, creating an appropriate child rule to delegate the processing to. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandlePostOpenChar(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's FinishElement() function. No check is done by the rule to verify + /// that the element name matches the one that was passed to the handler's + /// StartElement. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleEndTag(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT diff --git a/includes/image_io/xml/xml_handler.h b/includes/image_io/xml/xml_handler.h new file mode 100644 index 0000000..cdf3d30 --- /dev/null +++ b/includes/image_io/xml/xml_handler.h @@ -0,0 +1,107 @@ +#ifndef IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT +#define IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The handler that is called by XmlRule instances as they parse XML syntax +/// and produce tokens defined in the XmlTokenContext. Each handler function +/// may be called multiple times with different XmlPortion values. The first +/// time the XmlPortion::kBegin bit will be set. The last time, XmlPortion::kEnd +/// will be set. In between, XmlPortion::kMiddle will be set. If the entire +/// token value is available for the handler, all three bits will be set. +/// The implementation of each function in this base class returns the +/// DataMatchResult value that the context provides. The function overrides in +/// subclasses can return the same context value, or a copy that is modified +/// with a different result type, message and "can continue" flag. +class XmlHandler { + public: + virtual ~XmlHandler() = default; + + /// This function is called to start an XML element. Once started, any of + /// the other handler functions may be called. + /// @param context The token context used to specify the element name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult StartElement(const XmlTokenContext& context); + + /// This function is called to finish an XML element. Each call to this + /// function should be paired with a call to a StartElement function. + /// @param context The token context used to obtain the match result for + /// returning. For this function, the context might not have a valid token + /// value: the XmlPortion will always be kNone and the token range invalid. + /// This is the case if the syntax parsed is an empty element like this: + /// "<SomeElement [Attribute=Name]... />". For non empty elements with syntax: + /// "<SomeElement>...</SomeElement>", the value will be the element name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult FinishElement(const XmlTokenContext& context); + + /// This function is called to define an attribute name. This function will + /// never be called unless an element has been started with a prior call to + /// the StartElement() function. + /// @param context The token context used to specify the attribute name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult AttributeName(const XmlTokenContext& context); + + /// This function is called to define an attribute value. The token value + /// passed to this function always includes the quote marks at the begin and + /// end of the token value. The quote marks always match and may be either a + /// single quote (') or a double quote ("). Sometimes attribute values can be + /// very long, so implementations of this function should use care if they + /// retain the value as a string for later processing. This function will + /// never be called unless an element has been started with a prior call to + /// the StartElement() and AttributeName() functions. + /// @param context The token context used to specify the attribute value. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult AttributeValue(const XmlTokenContext& context); + + /// This function is called to define a block of characters in the body of + /// an element. This function may be called multiple times for a given + /// element. Handlers that are interested in the character content for an + /// element should concatenate the token values from all calls to obtain the + /// full value for the element. + /// @param context The token context used to specify the content value. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult ElementContent(const XmlTokenContext& context); + + /// This function is called to inform the handler of a comment. A comment in + /// XML has the syntax "<!--...-->". In order to simplify the XML parsing + /// task, the tokens passed to this function never include the leading "<!--" + /// characters, but always include the trailing "-->". + /// @param context The token context used to specify the comment. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Comment(const XmlTokenContext& context); + + /// This function is called to inform the handler CDATA block. A CDATA block + /// in XML has the syntax "<![CDATA[...]]>". In order to simplify the XML + /// parsing task, the tokens passed to this function never include the leading + /// "<![CDATA[" characters, but always include the trailing "]]". + /// @param context The token context used to specify the CDATA block. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Cdata(const XmlTokenContext& context); + + /// This function is called to define a processing instruction. Processing + /// instructions have an XML syntax "<?...?>". In order to simplify the XML + /// parsing task, no parsing of the processing instruction is done: handlers + /// that need the contents parsed are on their own. Also, again to simplify + /// the XML parsing task, the tokens passed to this function never include the + /// leading "<?" characters, but always include the trailing "?>". + /// @param context The token context used to specify the processing data. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Pi(const XmlTokenContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT diff --git a/includes/image_io/xml/xml_handler_context.h b/includes/image_io/xml/xml_handler_context.h new file mode 100644 index 0000000..5595118 --- /dev/null +++ b/includes/image_io/xml/xml_handler_context.h @@ -0,0 +1,31 @@ +#ifndef IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT +#define IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT + +#include "image_io/base/data_context.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlHandler; + +class XmlHandlerContext : public DataContext { + public: + XmlHandlerContext(const DataContext& context, XmlHandler* handler) + : DataContext(context), handler_(handler) {} + + XmlHandlerContext(size_t location, const DataRange& range, + const DataSegment& segment, + const DataLineMap& data_line_map, XmlHandler* handler) + : DataContext(location, range, segment, data_line_map), + handler_(handler) {} + + XmlHandler* GetHandler() const { return handler_; } + + private: + XmlHandler* handler_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT diff --git a/includes/image_io/xml/xml_pi_rule.h b/includes/image_io/xml/xml_pi_rule.h new file mode 100644 index 0000000..674a3fa --- /dev/null +++ b/includes/image_io/xml/xml_pi_rule.h @@ -0,0 +1,32 @@ +#ifndef IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlPiRule parses the processing information syntax: "<?...?>". This +/// syntax is considerably simplified from the official XML specification. As +/// documented in the comments for the XmlHandler Pi() function, The leading +/// "<?" syntax is never sent to the handler, while the trailing "?>" literal +/// is always sent as part of the processing content token. This approach makes +/// it much easier to parse XML syntax. The alternate start point constructor +/// is used by the XmlElementContentRule. +class XmlPiRule : public XmlRule { + public: + XmlPiRule(); + explicit XmlPiRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Pi() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandlePiValue(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_portion.h b/includes/image_io/xml/xml_portion.h new file mode 100644 index 0000000..673c958 --- /dev/null +++ b/includes/image_io/xml/xml_portion.h @@ -0,0 +1,48 @@ +#ifndef IMAGE_IO_XML_XML_PORTION_H_ // NOLINT +#define IMAGE_IO_XML_XML_PORTION_H_ // NOLINT + +namespace photos_editing_formats { +namespace image_io { + +/// An bit-type enum for indicating what part of an entity is defined: the +/// begin, middle and or end. Bitwise "and" and "or" operators are defined to +/// combine and test values. +enum class XmlPortion { + kNone = 0, + kBegin = 1, + kMiddle = 2, + kEnd = 4, +}; + +/// @return The value that results from the bitwise "and" of given portions. +inline XmlPortion operator&(XmlPortion lhs, XmlPortion rhs) { + int lhs_value = static_cast<int>(lhs); + int rhs_value = static_cast<int>(rhs); + return static_cast<XmlPortion>(lhs_value & rhs_value); +} + +/// @return The value that results from the bitwise "or" of given portions. +inline XmlPortion operator|(XmlPortion lhs, XmlPortion rhs) { + int lhs_value = static_cast<int>(lhs); + int rhs_value = static_cast<int>(rhs); + return static_cast<XmlPortion>(lhs_value | rhs_value); +} + +/// @param value The value to use for the test. +/// @param mask The mask to use for the test. +/// @return Whether any of the bits in the mask are set in the value. +inline bool ContainsAny(XmlPortion value, XmlPortion mask) { + return (value & mask) != XmlPortion::kNone; +} + +/// @param value The value to use for the test. +/// @param mask The mask to use for the test. +/// @return Whether all of the bits in the mask are set in the value. +inline bool ContainsAll(XmlPortion value, XmlPortion mask) { + return (value & mask) == mask; +} + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_PORTION_H_ // NOLINT diff --git a/includes/image_io/xml/xml_reader.h b/includes/image_io/xml/xml_reader.h new file mode 100644 index 0000000..9a42b74 --- /dev/null +++ b/includes/image_io/xml/xml_reader.h @@ -0,0 +1,105 @@ +#ifndef IMAGE_IO_XML_XML_READER_H_ // NOLINT +#define IMAGE_IO_XML_XML_READER_H_ // NOLINT + +#include <memory> +#include <string> +#include <vector> + +#include "image_io/base/data_line_map.h" +#include "image_io/base/data_match_result.h" +#include "image_io/base/message.h" +#include "image_io/xml/xml_handler_context.h" +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class for reading and parsing the text of a data segment, resulting in the +/// functions of an XmlHandler to be called. This reader's Parse() function can +/// be called multiple times for text that spans multiple data segments. Errors +/// are reported to the message handler as they are encountered. In general, +/// there will be three types of errors: internal (programming), syntax, and +/// value errors. Internal errors can come from any where in this code base; +/// Only one such error is permitted per StartParse/Parse... sequence. Syntax +/// errors are usually issued by XmlRule instances; like internal errors, only +/// one such error is tolerated per StartParse/Parse... sequence. XmlHandler +/// functions may issue value errors; multiple such value errors are tolerated. +class XmlReader { + public: + explicit XmlReader(XmlHandler* handler) + : handler_(handler), + bytes_parsed_(0), + has_internal_or_syntax_error_(false), + has_errors_(false) {} + + /// Sets up the reader for parsing data segment text using the given XmlRule. + /// @param rule The top level rule to use when parsing the data segment text. + /// @return Whether the reader was set up propertly. + bool StartParse(std::unique_ptr<XmlRule> rule); + + /// Parses the text portion of the data segment starting at a location. This + /// function may be called multiple times for text that spans multiple data + /// segments. + /// @param start_location The location at which to start reading/parsing. + /// This location must be contained in the range parameter. + /// @param range The portion of the data segment to parse. This range value + /// must be contained in the range returned by DataSegment::GetRange() + /// @param segment The segment containing the text to parse. + /// @return Whether the parsing was successful. + bool Parse(size_t start_location, const DataRange& range, + const DataSegment& segment); + + /// Finishes up the reading/parsing process. The rule passed to StartParse() + /// must have consumed all the text of the segments and be "done", otherwise + /// this function will issue an error message. + /// @param Whether the reading/parsing operation was completed successfully. + bool FinishParse(); + + /// @return The total number of bytes of text that have been read/parsed. + size_t GetBytesParsed() const { return bytes_parsed_; } + + /// @return Whether errors have been encountered in reading/parsing the text. + bool HasErrors() const { return has_errors_; } + + /// @return The handler that handles the output of the parsing operations. + XmlHandler* GetHandler() const { return handler_; } + + private: + /// Sets up the context's name list that is used when creating error message. + /// @parma context The context to set up. + void InitializeContextNameList(XmlHandlerContext* context); + + /// Reports the message indicated in the result to the message handler and + /// updates the data boolean data members indicating errors. + /// @param result The result value for an XmlRule::Parse function. + /// @param context The context for generating an error message if needed. + void ReportError(const DataMatchResult& result, const DataContext& context); + + /// Reports the message to the message handler and updates the data boolean + /// data members indicating errors. + /// @param message The message to send to the message handler. + void ReportError(const Message& message); + + /// The reader's handler. + XmlHandler* handler_; + + /// A data line map used for error message creation. + DataLineMap data_line_map_; + + /// The pending and active rules. + std::vector<std::unique_ptr<XmlRule>> rule_stack_; + + /// The total number of bytes that have been parsed. + size_t bytes_parsed_; + + /// Whether an internal or syntax error has occurred. + bool has_internal_or_syntax_error_; + + /// Whether any type of error has occurred. + bool has_errors_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_READER_H_ // NOLINT diff --git a/includes/image_io/xml/xml_rule.h b/includes/image_io/xml/xml_rule.h new file mode 100644 index 0000000..c76f87c --- /dev/null +++ b/includes/image_io/xml/xml_rule.h @@ -0,0 +1,175 @@ +#ifndef IMAGE_IO_XML_XML_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_RULE_H_ // NOLINT + +#include <memory> +#include <string> +#include <vector> + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_handler_context.h" +#include "image_io/xml/xml_terminal.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A rule represents a sequence of terminals to match text from a DataSource, +/// and the state needed to keep track the parsing operation in case the text +/// is split across multiple DataSegments. XmlRules collaborate with an instance +/// of XmlHandler to process the token values the terminals produce. +/// +/// Terminals are added in the constructors of the rule subclasses, and are +/// not typically accessed directly from the clients of an XmlRule. Instead, +/// XmlRule clients normally just call the rule's Parse function and take action +/// based on the DataMatchResult value that is returned. The functions of the +/// XmlHandler are called internally by the rule's terminals as they parse the +/// text in the data segment. +/// +/// Normally, the terminals are parsed by the Parse() function in a sequential +/// manner until they are exhausted. At which time the Parse function returns +/// with a DataMatchResult that has a type equal to kFull. If the DataSegment +/// runs out of data before the end of the final terminal, the result type will +/// be kPartialOutOfData. Of course if any of the terminals' scanners detect an +/// error the result type will be kError. +/// +/// Rules may decide to delegate the parsing process to another rule. There are +/// two types of delegation: +/// 1. Rule chaining - in this case a rule decides that another rule should +/// be used instead to continue the parsing process. This situation is +/// indicated when the result type is kFull and the rule's HasNextRule() +/// function returns true. The chained-to rule is obtained by calling the +/// rule's GetNextRule() function. The current rule can be discarded. +/// 2. Child rules - in this case a "parent" rule decides that the next set of +/// syntax should be parsed by another "child" rule, and after that rule +/// completes, the parsing task should be returned to the parent rule. This +/// situaltion is indicated when the result type is kPartial and the rule's +/// HasNextRule() returns true. The child rule is obtained by calling the +/// rule's GetNextRule() function. The current parent rule should be placed +/// on a stack until the child rule is done, and then the child discarded and +/// the parent rule used for the next Parse operation. +/// The action functions associated with a terminal are typically used to create +/// the next rule and set the result type and thus initiate the delegation +/// process. When the XmlRule::Parse function detects a delegation has been +/// requested, it returns to its caller so that the caller can handle the +/// delegation in the appropriate fashion. For an example, see the XmlReader's +/// Parse() function. +/// +/// In addition to delegation the action functions associated with a terminal +/// can change the order of the terminals processed from a strictly sequential +/// order to whatever the rule so desires. This is done by calling the rule's +/// SetTerminalIndex() function. Terminals can be identified by name using the +/// GetTerminalIndexFromName() function if the rule's terminals were +/// constructed with names. If the terminal index of a rule is set to a +/// terminal that has already been used, the terminal's scanners state must be +/// reset in order for it to parse successfully again. Sometimes the entire +/// rule is "restarted" in which case the ResetTerminalScanners() function can +/// be called to reset the scanners of all the rules terminals. +/// +/// Finally, because of the look-ahead needs of the XML grammar, some rules +/// support alternate "starting points", allowing them to skip some set of +/// initial terminals when the rule's Parse() function is called. Rules that +/// support this feature will have a constructor with an StartPoint parameter. +class XmlRule { + public: + /// For rules that support alternate starting points, this enum provides the + /// values at which a rule's Parse() function can begin. + enum StartPoint { + /// Start parsing at the first terminal position. + kFirstStartPoint, + + /// STart parsing at a second (alternative) position. + kSecondStartPoint, + }; + + virtual ~XmlRule() = default; + explicit XmlRule(const std::string& name); + + /// @return The name of the rule. + const std::string& GetName() const { return name_; } + + /// Parse the text indicated in the context's data segment and range and call + /// the context's XmlHandler functions as needed. The implementation of this + /// function makes use of the terminals contained by the rule, but it is + /// declared virtual so that subclasses can customize as needed. + /// @param context The context describing the text to parse and the handler + /// to call. + /// @param A result that indicates the type of match that occurred, the number + /// of bytes consumed and an error message if needed. + virtual DataMatchResult Parse(XmlHandlerContext context); + + /// Adds a literal terminal to the rule. + /// @param literal The literal value to scan for. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddLiteralTerminal(const std::string& literal); + + /// Adds a name terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddNameTerminal(); + + /// Adds a quoted string terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddQuotedStringTerminal(); + + /// Adds a sentinel terminal to the rule. + /// @param sentinels The sentinel values to scan for. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddSentinelTerminal(const std::string& sentinels); + + /// Adds a scan through literal terminal to the rule. + /// @param literal The literal value to scan through. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddThroughLiteralTerminal(const std::string& literal); + + /// Adds a whitespace terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddWhitespaceTerminal(); + + /// Adds an optional whitespace terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddOptionalWhitespaceTerminal(); + + /// @return The number of terminals in the rule. + size_t GetTerminalCount() const { return terminals_.size(); } + + /// @return The index of the terminal currently parsing text. + size_t GetTerminalIndex() const { return terminal_index_; } + + /// @param name The name of the terminal to look for. + /// @return The index of the terminal with the given name, or the value + /// returned by the rule's GetTerminalCount() if not found. + size_t GetTerminalIndexFromName(const std::string name) const; + + /// @param terminal_index The index of the terminal that should next be used + /// for parsing the input text. + void SetTerminalIndex(size_t terminal_index); + + /// @return The terminal currently parsing text, or nullptr if there is none. + XmlTerminal* GetCurrentTerminal(); + + /// @param index The index of the terminal to get. + /// @return The terminal at the given index, or nullptr if index is invalid. + XmlTerminal* GetTerminal(size_t index); + + /// Resets the scanner's state of all the terminals in the rule. + void ResetTerminalScanners(); + + /// @return Whether the rule has a next rule for delegation. + bool HasNextRule() const; + + /// @return Returns the next rule to the caller. If there is no next rule, + /// the get function of the returned unique_ptr will return nullptr. + std::unique_ptr<XmlRule> ReleaseNextRule(); + + /// @param next_rule The new rule to use for delegation purposes. + void SetNextRule(std::unique_ptr<XmlRule> next_rule); + + private: + std::string name_; + std::vector<XmlTerminal> terminals_; + std::unique_ptr<XmlRule> next_rule_; + size_t terminal_index_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_terminal.h b/includes/image_io/xml/xml_terminal.h new file mode 100644 index 0000000..7d999f0 --- /dev/null +++ b/includes/image_io/xml/xml_terminal.h @@ -0,0 +1,61 @@ +#ifndef IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT +#define IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT + +#include <string> + +#include "image_io/base/data_scanner.h" +#include "image_io/xml/xml_action.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A terminal represents a part of a rule that uses a DataScanner to match +/// zero or more characters from a DataSource. A terminal can also have a name +/// that can be be used in error messages and also used to identify it in a +/// rule. A terminal can also have an action function associated with it that it +/// can use to validate the token produced by the terminal/scanner, and do +/// further processing with the token. Finally, the terminal's action function +/// can manipulate the DataMatchResult that was produced by the terminal's +/// scanner and accessible via the action function's XmlActionContext param. +class XmlTerminal { + public: + explicit XmlTerminal(const DataScanner& scanner) : scanner_(scanner){} + + /// Sets the name of the terminal. Looks best with an XmlRule::AddTerminal + /// function: AddWhitespaceTerminal().WithName("SomeName"); + /// @param name The name to give to the terminal. + /// @return A reference to the terminal. + XmlTerminal& WithName(const std::string& name) { + name_ = name; + return *this; + } + + /// Sets the action of the terminal. Looks best with an XmlRule::AddTerminal + /// function: AddWhitespaceTerminal().WithAction(SomeAction); + /// @param action The action to give to the terminal. + /// @return A reference to the terminal. + XmlTerminal& WithAction(const XmlAction& action) { + action_ = action; + return *this; + } + + /// @return The terminal's scanner. + DataScanner* GetScanner() { return &scanner_; } + + /// @return The terminal's name. + const std::string& GetName() const { return name_; } + + /// @return The terminal's action function. + const XmlAction& GetAction() const { return action_; } + + private: + DataScanner scanner_; + XmlAction action_; + std::string name_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT diff --git a/includes/image_io/xml/xml_token_context.h b/includes/image_io/xml/xml_token_context.h new file mode 100644 index 0000000..cceca5c --- /dev/null +++ b/includes/image_io/xml/xml_token_context.h @@ -0,0 +1,62 @@ +#ifndef IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT +#define IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT + +#include <string> + +#include "image_io/base/data_context.h" +#include "image_io/base/data_match_result.h" +#include "image_io/base/data_range.h" +#include "image_io/xml/xml_portion.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlActionContext; + +/// A token context is passed from the action of an XmlTerminal to an XmlHandler +/// associated with the XmlActionContext used to call the action function. +class XmlTokenContext : public DataContext { + public: + explicit XmlTokenContext(const XmlActionContext& context); + XmlTokenContext(size_t location, const DataRange& range, + const DataSegment& segment, const DataLineMap& data_line_map, + const DataMatchResult& result, const DataRange& token_range, + const XmlPortion& token_portion); + + /// @return The result associated with the context. + const DataMatchResult& GetResult() const { return result_; } + + /// @return The token range for the token. Note that the token range may not + /// be a subrange of the context's GetRange() or even the context's segment's + /// data range. Such would be the case when a token's value is split across + /// two or more data segments. + const DataRange& GetTokenRange() const { return token_range_; } + + /// @return The portion of the token that this context represents. This + /// portion value can be the bitwise or of any of the XmlPortion bit values. + const XmlPortion& GetTokenPortion() const { return token_portion_; } + + /// Builds the string value of the token. If the context's token portion has + /// the XmlPortion::kBegin bit set, the string value is first cleared. Then + /// the string is extracted from the context's data source and appended onto + /// the value. Remember that some token values (especially attribute values) + /// can be quite long so care should be excercised when obtaining values with + /// this function. + /// @param value The value of the token being built. + /// @return Whether the token value is complete (i.e., the context's portion + /// had the XmlPortion::kEnd bit set). + bool BuildTokenValue(std::string* value) const; + + static XmlPortion ComputeTokenPortion(size_t token_scan_count, + DataMatchResult::Type result_type); + + private: + DataMatchResult result_; + DataRange token_range_; + XmlPortion token_portion_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT |