diff options
Diffstat (limited to 'src/xml')
-rw-r--r-- | src/xml/xml_attribute_rule.cc | 32 | ||||
-rw-r--r-- | src/xml/xml_cdata_and_comment_rules.cc | 83 | ||||
-rw-r--r-- | src/xml/xml_element_rules.cc | 182 | ||||
-rw-r--r-- | src/xml/xml_handler.cc | 39 | ||||
-rw-r--r-- | src/xml/xml_pi_rule.cc | 28 | ||||
-rw-r--r-- | src/xml/xml_reader.cc | 189 | ||||
-rw-r--r-- | src/xml/xml_rule.cc | 187 | ||||
-rw-r--r-- | src/xml/xml_token_context.cc | 119 | ||||
-rw-r--r-- | src/xml/xml_writer.cc | 141 |
9 files changed, 1000 insertions, 0 deletions
diff --git a/src/xml/xml_attribute_rule.cc b/src/xml/xml_attribute_rule.cc new file mode 100644 index 0000000..955e60c --- /dev/null +++ b/src/xml/xml_attribute_rule.cc @@ -0,0 +1,32 @@ +#include "image_io/xml/xml_attribute_rule.h" + +#include "image_io/xml/xml_handler.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +XmlAttributeRule::XmlAttributeRule() : XmlRule("Attribute") { + // S? Name S? = S? 'Value' + AddOptionalWhitespaceTerminal(); + AddNameTerminal().WithAction( + [&](const XmlActionContext& context) { return HandleName(context); }); + AddOptionalWhitespaceTerminal(); + AddLiteralTerminal("="); + AddOptionalWhitespaceTerminal(); + AddQuotedStringTerminal().WithAction( + [&](const XmlActionContext& context) { return HandleValue(context); }); +} + +DataMatchResult XmlAttributeRule::HandleName(const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->AttributeName(token_context); +} + +DataMatchResult XmlAttributeRule::HandleValue(const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->AttributeValue(token_context); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_cdata_and_comment_rules.cc b/src/xml/xml_cdata_and_comment_rules.cc new file mode 100644 index 0000000..d3a4d50 --- /dev/null +++ b/src/xml/xml_cdata_and_comment_rules.cc @@ -0,0 +1,83 @@ +#include "image_io/xml/xml_cdata_and_comment_rules.h" + +#include <utility> + +#include "image_io/xml/xml_handler.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +XmlCdataRule::XmlCdataRule() : XmlCdataRule(kFirstStartPoint) {} + +XmlCdataRule::XmlCdataRule(StartPoint start_point) : XmlRule("CDATA") { + // <![CDATA[ ... ]]> + AddLiteralTerminal("<!"); + AddLiteralTerminal("[CDATA["); + AddThroughLiteralTerminal("]]>").WithAction( + [&](const XmlActionContext& context) { + return HandleCdataValue(context); + }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlCdataRule::HandleCdataValue( + const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->Cdata(token_context); +} + +XmlCommentRule::XmlCommentRule() : XmlCommentRule(kFirstStartPoint) {} + +XmlCommentRule::XmlCommentRule(StartPoint start_point) : XmlRule("Comment") { + // <!-- ... --> + AddLiteralTerminal("<!"); + AddLiteralTerminal("--"); + AddThroughLiteralTerminal("-->").WithAction( + [&](const XmlActionContext& context) { + return HandleCommentValue(context); + }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlCommentRule::HandleCommentValue( + const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->Comment(token_context); +} + +XmlCdataOrCommentRule::XmlCdataOrCommentRule() + : XmlCdataOrCommentRule(kFirstStartPoint) {} + +XmlCdataOrCommentRule::XmlCdataOrCommentRule(StartPoint start_point) + : XmlRule("CdataOrComment") { + // <![CDATA[ ... ]]> or <!-- ... --> + // So after the initial "<!" literal can come a "[" or a "-". + AddLiteralTerminal("<!"); + AddSentinelTerminal("[-").WithAction([&](const XmlActionContext& context) { + return HandlePostBangChar(context); + }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlCdataOrCommentRule::HandlePostBangChar( + const XmlActionContext& context) { + char sentinel = context.GetTerminal()->GetScanner()->GetSentinel(); + if (sentinel == '[') { + std::unique_ptr<XmlRule> rule(new XmlCdataRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } else if (sentinel == '-') { + std::unique_ptr<XmlRule> rule(new XmlCommentRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } + return context.GetResultWithBytesConsumed(0); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_element_rules.cc b/src/xml/xml_element_rules.cc new file mode 100644 index 0000000..53feb87 --- /dev/null +++ b/src/xml/xml_element_rules.cc @@ -0,0 +1,182 @@ +#include "image_io/xml/xml_element_rules.h" + +#include <utility> + +#include "image_io/xml/xml_attribute_rule.h" +#include "image_io/xml/xml_cdata_and_comment_rules.h" +#include "image_io/xml/xml_handler.h" +#include "image_io/xml/xml_pi_rule.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +namespace { + +/// Some names of terminals used by these rules. +const char kWhitespace[] = "Whitespace"; +const char kEmptyElementEnd[] = "EmptyElementEnd"; +const char kElementEnd[] = "ElementEnd"; +const char kElementSentinalDescription[] = + "The start of an attribute name or the end of the element ('>' or '/>')"; + +/// A shortcut for referring to all XmlPortion bits. +const XmlPortion kAllPortions = + XmlPortion::kBegin | XmlPortion::kMiddle | XmlPortion::kEnd; + +/// @param context The action context passed to an action handler. +/// @param token_range The token range to use when building the token context. +/// @param portion The token portion to use when building the token context. +/// @param A token context for use in calling an XmlHandler function. +XmlTokenContext GetTokenContext(const XmlActionContext& context, + const DataRange& token_range, + XmlPortion portion) { + return XmlTokenContext(context.GetLocation(), context.GetRange(), + context.GetSegment(), context.GetDataLineMap(), + context.GetResult(), token_range, portion); +} + +} // namespace + +XmlElementRule::XmlElementRule() : XmlElementRule(kFirstStartPoint) {} + +XmlElementRule::XmlElementRule(XmlRule::StartPoint start_point) + : XmlRule("Element") { + AddLiteralTerminal("<"); + AddNameTerminal().WithAction( + [&](const XmlActionContext& context) { return HandleName(context); }); + AddOptionalWhitespaceTerminal().WithName(kWhitespace); + AddSentinelTerminal("~/>") + .WithDescription(kElementSentinalDescription) + .WithAction([&](const XmlActionContext& context) { + return HandlePostWhitespaceChar(context); + }); + AddLiteralTerminal("/>") + .WithName(kEmptyElementEnd) + .WithAction([&](const XmlActionContext& context) { + return HandleEmptyElemTagEnd(context); + }); + AddLiteralTerminal(">") + .WithName(kElementEnd) + .WithAction([&](const XmlActionContext& context) { + return HandleSTagEnd(context); + }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlElementRule::HandleName(const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->StartElement(token_context); +} + +DataMatchResult XmlElementRule::HandlePostWhitespaceChar( + const XmlActionContext& context) { + DataMatchResult result = context.GetResultWithBytesConsumed(0); + char sentinel = context.GetTerminal()->GetScanner()->GetSentinel(); + if (sentinel == '/') { + size_t index = GetTerminalIndexFromName(kEmptyElementEnd); + SetTerminalIndex(index); + } else if (sentinel == '>') { + size_t index = GetTerminalIndexFromName(kElementEnd); + SetTerminalIndex(index); + } else if (sentinel == '~') { + std::unique_ptr<XmlRule> rule(new XmlAttributeRule); + SetNextRule(std::move(rule)); + ResetTerminalScanners(); + size_t index = GetTerminalIndexFromName(kWhitespace); + SetTerminalIndex(index); + result.SetType(DataMatchResult::kPartial); + } + return result; +} + +DataMatchResult XmlElementRule::HandleEmptyElemTagEnd( + const XmlActionContext& context) { + SetTerminalIndex(GetTerminalCount()); + return context.GetHandler()->FinishElement( + GetTokenContext(context, DataRange(), XmlPortion::kNone)); +} + +DataMatchResult XmlElementRule::HandleSTagEnd(const XmlActionContext& context) { + DataMatchResult result = context.GetResult(); + std::unique_ptr<XmlRule> rule(new XmlElementContentRule); + SetNextRule(std::move(rule)); + return result; +} + +XmlElementContentRule::XmlElementContentRule() : XmlRule("ElementContent") { + // ElementContent until + // <N... Element + // <?N ... ?> PI + // <!-- ... --> Comment + // <![CDATA[ ... ]]> CDATA + // </Nws> Element Etag + // &...; EntityRef or CharRef (Don't care about this) + AddThroughLiteralTerminal("<").WithAction( + [&](const XmlActionContext& context) { return HandleContent(context); }); + AddSentinelTerminal("~?!/").WithAction([&](const XmlActionContext& context) { + return HandlePostOpenChar(context); + }); + AddNameTerminal().WithAction( + [&](const XmlActionContext& context) { return HandleEndTag(context); }); + AddLiteralTerminal(">"); +} + +DataMatchResult XmlElementContentRule::HandleContent( + const XmlActionContext& context) { + const auto& range = context.GetTerminal()->GetScanner()->GetTokenRange(); + if (range.IsValid()) { + size_t end = context.GetResult().GetType() == DataMatchResult::kFull + ? range.GetEnd() - 1 + : range.GetEnd(); + DataRange token_range(range.GetBegin(), end); + if (token_range.GetLength() > 0) { + XmlTokenContext token_context = + GetTokenContext(context, token_range, kAllPortions); + DataMatchResult result = + context.GetHandler()->ElementContent(token_context); + context.GetTerminal()->GetScanner()->ResetTokenRange(); + return result; + } + } + context.GetTerminal()->GetScanner()->ResetTokenRange(); + return context.GetResult(); +} + +DataMatchResult XmlElementContentRule::HandlePostOpenChar( + const XmlActionContext& context) { + DataMatchResult result = context.GetResult(); + char sentinel = context.GetTerminal()->GetScanner()->GetSentinel(); + if (sentinel == '~') { + result.SetBytesConsumed(0); + result.SetType(DataMatchResult::kPartial); + std::unique_ptr<XmlRule> rule(new XmlElementRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } else if (sentinel == '?') { + result.SetType(DataMatchResult::kPartial); + std::unique_ptr<XmlRule> rule(new XmlPiRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } else if (sentinel == '!') { + result.SetType(DataMatchResult::kPartial); + std::unique_ptr<XmlRule> rule(new XmlCdataOrCommentRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } else if (sentinel == '/') { + // Do nothing so that the next terminals (the 'name>' part of '</name>') + // will be activated and scanned. + return context.GetResult(); + } + ResetTerminalScanners(); + SetTerminalIndex(0); + return result; +} + +DataMatchResult XmlElementContentRule::HandleEndTag( + const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->FinishElement(token_context); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_handler.cc b/src/xml/xml_handler.cc new file mode 100644 index 0000000..591d43c --- /dev/null +++ b/src/xml/xml_handler.cc @@ -0,0 +1,39 @@ +#include "image_io/xml/xml_handler.h" + +namespace photos_editing_formats { +namespace image_io { + +DataMatchResult XmlHandler::AttributeName(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::AttributeValue(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::StartElement(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::FinishElement(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::ElementContent(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::Comment(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::Cdata(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::Pi(const XmlTokenContext& context) { + return context.GetResult(); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_pi_rule.cc b/src/xml/xml_pi_rule.cc new file mode 100644 index 0000000..071b8fd --- /dev/null +++ b/src/xml/xml_pi_rule.cc @@ -0,0 +1,28 @@ +#include "image_io/xml/xml_pi_rule.h" + +#include "image_io/xml/xml_handler.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +XmlPiRule::XmlPiRule() : XmlPiRule(kFirstStartPoint) {} + +XmlPiRule::XmlPiRule(XmlRule::StartPoint start_point) : XmlRule("PI") { + // <? ... ?> + AddLiteralTerminal("<?"); + AddThroughLiteralTerminal("?>").WithAction( + [&](const XmlActionContext& context) { return HandlePiValue(context); }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlPiRule::HandlePiValue(const XmlActionContext& context) { + XmlTokenContext token_context(context); + DataMatchResult result = context.GetHandler()->Pi(token_context); + return result; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_reader.cc b/src/xml/xml_reader.cc new file mode 100644 index 0000000..467cb82 --- /dev/null +++ b/src/xml/xml_reader.cc @@ -0,0 +1,189 @@ +#include "image_io/xml/xml_reader.h" + +#include <iomanip> +#include <sstream> +#include <string> +#include <utility> + +#include "image_io/base/message.h" +#include "image_io/base/message_handler.h" + +namespace photos_editing_formats { +namespace image_io { + +namespace { + +/// The reader name used for error messages. +const char kReaderName[] = "XmlReader"; + +} // namespace + +bool XmlReader::StartParse(std::unique_ptr<XmlRule> rule) { + bytes_parsed_ = 0; + rule_stack_.clear(); + if (!rule) { + std::string text = std::string(kReaderName) + ":StartParse:NoTopLevelRule"; + Message message(Message::kInternalError, 0, text); + ReportError(message); + return false; + } + rule_stack_.push_back(std::move(rule)); + has_internal_or_syntax_error_ = false; + has_errors_ = false; + return true; +} + +bool XmlReader::FinishParse() { + if (has_internal_or_syntax_error_) { + return false; + } + std::string error_text; + if (rule_stack_.empty() || + (rule_stack_.size() == 1 && + rule_stack_.back()->IsPermissibleToFinish(&error_text))) { + return true; + } + std::stringstream ss; + ss << kReaderName << ":"; + if (error_text.empty()) { + ss << "While parsing text with rule:"; + ss << rule_stack_.back()->GetName(); + XmlTerminal* terminal = rule_stack_.back()->GetCurrentTerminal(); + if (terminal) { + if (!terminal->GetName().empty()) { + ss << ":" << terminal->GetName(); + } + ss << ":" << terminal->GetScanner()->GetDescription(); + } + } else { + ss << error_text; + } + Message message(Message::kPrematureEndOfDataError, 0, ss.str()); + has_internal_or_syntax_error_ = true; + ReportError(message); + return false; +} + +bool XmlReader::Parse(const std::string& value) { + size_t location = GetBytesParsed(); + DataRange range(location, location + value.length()); + const Byte* bytes = reinterpret_cast<const Byte*>(value.c_str()); + auto segment = DataSegment::Create(range, bytes, DataSegment::kDontDelete); + return Parse(location, range, *segment); +} + +bool XmlReader::Parse(size_t start_location, const DataRange& range, + const DataSegment& segment) { + if (has_internal_or_syntax_error_) { + return false; + } + XmlHandlerContext context(start_location, range, segment, *data_line_map_, + handler_); + InitializeContextNameList(&context); + if (!context.IsValidLocationAndRange()) { + DataMatchResult result; + result.SetMessage(Message::kInternalError, + context.GetInvalidLocationAndRangeErrorText()); + ReportError(result, context); + return false; + } + if (rule_stack_.empty()) { + DataMatchResult result; + result.SetMessage(Message::kInternalError, "NoActiveRule"); + ReportError(result, context); + return false; + } + if (data_line_map_ == &internal_data_line_map_) { + internal_data_line_map_.FindDataLines(range, segment); + } + size_t bytes_remaining = range.GetEnd() - start_location; + while (bytes_remaining > 0 && !rule_stack_.empty() && + !has_internal_or_syntax_error_) { + auto& rule = rule_stack_.back(); + InitializeContextNameList(&context); + DataMatchResult result = rule->Parse(context); + switch (result.GetType()) { + case DataMatchResult::kError: + case DataMatchResult::kNone: + ReportError(result, context); + break; + case DataMatchResult::kPartial: + ReportMessageIfNeeded(result); + bytes_parsed_ += result.GetBytesConsumed(); + bytes_remaining -= result.GetBytesConsumed(); + context.IncrementLocation(result.GetBytesConsumed()); + if (rule->HasNextRule()) { + // Delegation by child rule: push the next. + rule_stack_.push_back(rule->ReleaseNextRule()); + } + break; + case DataMatchResult::kPartialOutOfData: + ReportMessageIfNeeded(result); + bytes_parsed_ += result.GetBytesConsumed(); + return true; + case DataMatchResult::kFull: + ReportMessageIfNeeded(result); + bytes_parsed_ += result.GetBytesConsumed(); + bytes_remaining -= result.GetBytesConsumed(); + context.IncrementLocation(result.GetBytesConsumed()); + if (rule->HasNextRule()) { + // Delegation by chaining: pop the current rule and push the next. + auto next_rule = rule->ReleaseNextRule(); + rule_stack_.pop_back(); + rule_stack_.push_back(std::move(next_rule)); + } else { + rule_stack_.pop_back(); + } + break; + } + } + if (bytes_remaining > 0 && rule_stack_.empty()) { + InitializeContextNameList(&context); + std::string text = context.GetErrorText("NoActiveRule", ""); + Message message(Message::kSyntaxError, 0, text); + ReportError(message); + return false; + } + return !has_internal_or_syntax_error_; +} + +void XmlReader::InitializeContextNameList(XmlHandlerContext* context) { + auto name_list = context->GetNameList(); + name_list.clear(); + name_list.push_back(kReaderName); + if (!rule_stack_.empty()) { + name_list.push_back(rule_stack_.back()->GetName()); + } +} + +void XmlReader::ReportMessageIfNeeded(const DataMatchResult& result) { + if (result.HasMessage()) { + ReportError(result.GetMessage()); + } +} + +void XmlReader::ReportError(const DataMatchResult& result, + const DataContext& context) { + if (!result.HasMessage()) { + Message message(Message::kInternalError, 0, + context.GetErrorText("Rule had error but no message", "")); + ReportError(message); + } + ReportError(result.GetMessage()); +} + +void XmlReader::ReportError(const Message& message) { + if (message_handler_) { + message_handler_->ReportMessage(message); + } + if (message.GetType() == Message::kInternalError || + message.GetType() == Message::kSyntaxError) { + has_internal_or_syntax_error_ = true; + } + if (message.IsError()) { + has_errors_ = true; + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_rule.cc b/src/xml/xml_rule.cc new file mode 100644 index 0000000..793381c --- /dev/null +++ b/src/xml/xml_rule.cc @@ -0,0 +1,187 @@ +#include "image_io/xml/xml_rule.h" + +#include <string> +#include <utility> + +#include "image_io/base/data_scanner.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::unique_ptr; + +namespace { + +/// A scanner is reentrant if it ran out of data. In these cases, the next data +/// segment sent into the rule for parsing may be non-contiguous with the +/// previous one. If that is the case, update the scanner's token length to +/// account for the missing bytes. (Scanner token ranges represent a bounding +/// box around the token value - in these cases the actual token value is really +/// a vector of ranges. Client handlers are responsible for dealing with that +/// reality, not the scanner or rule). +/// @param scanner The current possibly reentrant scanner. +/// @param context_range The new data range that is to be parsed. +void MaybeUpdateTokenLengthForReentrantScanner(DataScanner* scanner, + const DataRange& context_range) { + const auto& token_range = scanner->GetTokenRange(); + if (scanner->GetScanCallCount() > 0 && token_range.IsValid() && + context_range.GetBegin() > token_range.GetEnd()) { + size_t skipped_byte_count = context_range.GetBegin() - token_range.GetEnd(); + scanner->ExtendTokenLength(skipped_byte_count); + } +} + +} // namespace + +XmlRule::XmlRule(const std::string& name) : name_(name), terminal_index_(0) {} + +XmlTerminal& XmlRule::AddLiteralTerminal(const std::string& literal) { + terminals_.emplace_back(DataScanner::CreateLiteralScanner(literal)); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddNameTerminal() { + terminals_.emplace_back(DataScanner::CreateNameScanner()); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddQuotedStringTerminal() { + terminals_.emplace_back(DataScanner::CreateQuotedStringScanner()); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddSentinelTerminal(const std::string& sentinels) { + terminals_.emplace_back(DataScanner::CreateSentinelScanner(sentinels)); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddThroughLiteralTerminal(const std::string& literal) { + terminals_.emplace_back(DataScanner::CreateThroughLiteralScanner(literal)); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddWhitespaceTerminal() { + terminals_.emplace_back(DataScanner::CreateWhitespaceScanner()); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddOptionalWhitespaceTerminal() { + terminals_.emplace_back(DataScanner::CreateOptionalWhitespaceScanner()); + return terminals_.back(); +} + +size_t XmlRule::GetTerminalIndexFromName(const std::string name) const { + if (!name.empty()) { + for (size_t index = 0; index < terminals_.size(); ++index) { + if (terminals_[index].GetName() == name) { + return index; + } + } + } + return terminals_.size(); +} + +void XmlRule::SetTerminalIndex(size_t terminal_index) { + terminal_index_ = terminal_index; +} + +XmlTerminal* XmlRule::GetCurrentTerminal() { + return terminal_index_ < terminals_.size() ? &terminals_[terminal_index_] + : nullptr; +} + +XmlTerminal* XmlRule::GetTerminal(size_t index) { + return index < terminals_.size() ? &terminals_[index] : nullptr; +} + +void XmlRule::ResetTerminalScanners() { + for (auto& terminal : terminals_) { + terminal.GetScanner()->Reset(); + } +} + +bool XmlRule::IsPermissibleToFinish(std::string*) const { + return false; +} + +DataMatchResult XmlRule::Parse(XmlHandlerContext context) { + DataMatchResult result; + if (!context.IsValidLocationAndRange()) { + result.SetType(DataMatchResult::kError); + result.SetMessage(Message::kInternalError, + context.GetInvalidLocationAndRangeErrorText()); + return result; + } + bool force_parse_return = false; + size_t bytes_available = context.GetBytesAvailable(); + size_t current_terminal_index = GetTerminalIndex(); + if (current_terminal_index < terminals_.size()) { + MaybeUpdateTokenLengthForReentrantScanner( + terminals_[current_terminal_index].GetScanner(), context.GetRange()); + } + while (!force_parse_return && current_terminal_index < terminals_.size() && + bytes_available > 0) { + SetTerminalIndex(current_terminal_index); + auto& terminal = terminals_[current_terminal_index]; + DataMatchResult scanner_result = terminal.GetScanner()->Scan(context); + if (terminal.GetAction() && + (scanner_result.GetType() == DataMatchResult::kFull || + scanner_result.GetType() == DataMatchResult::kPartialOutOfData)) { + XmlActionContext action_context(context, &terminal, scanner_result); + scanner_result = terminal.GetAction()(action_context); + } + result.SetType(scanner_result.GetType()); + result.IncrementBytesConsumed(scanner_result.GetBytesConsumed()); + context.IncrementLocation(scanner_result.GetBytesConsumed()); + bytes_available -= scanner_result.GetBytesConsumed(); + switch (scanner_result.GetType()) { + case DataMatchResult::kError: + result.SetMessage(scanner_result.GetMessage()); + force_parse_return = true; + break; + case DataMatchResult::kNone: + result.SetType(DataMatchResult::kError); + result.SetMessage( + Message::kInternalError, + context.GetErrorText("Invalid scanner match result", + terminal.GetScanner()->GetDescription())); + force_parse_return = true; + break; + case DataMatchResult::kPartial: + case DataMatchResult::kPartialOutOfData: + if (scanner_result.HasMessage()) { + result.SetMessage(scanner_result.GetMessage()); + } + force_parse_return = true; + break; + case DataMatchResult::kFull: + if (scanner_result.HasMessage() && !result.HasMessage()) { + result.SetMessage(scanner_result.GetMessage()); + } + current_terminal_index = current_terminal_index == GetTerminalIndex() + ? current_terminal_index + 1 + : GetTerminalIndex(); + SetTerminalIndex(current_terminal_index); + if (current_terminal_index < GetTerminalCount()) { + result.SetType(DataMatchResult::kPartial); + } + force_parse_return = HasNextRule(); + break; + } + } + return result; +} + +bool XmlRule::HasNextRule() const { return next_rule_ != nullptr; } + +std::unique_ptr<XmlRule> XmlRule::ReleaseNextRule() { + return std::move(next_rule_); +} + +void XmlRule::SetNextRule(std::unique_ptr<XmlRule> next_rule) { + next_rule_ = std::move(next_rule); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_token_context.cc b/src/xml/xml_token_context.cc new file mode 100644 index 0000000..4ffea3f --- /dev/null +++ b/src/xml/xml_token_context.cc @@ -0,0 +1,119 @@ +#include "image_io/xml/xml_token_context.h" + +#include <string> + +#include "image_io/xml/xml_action.h" +#include "image_io/xml/xml_terminal.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::vector; + +namespace { + +const XmlPortion kAllPortions = + XmlPortion::kBegin | XmlPortion::kMiddle | XmlPortion::kEnd; + +XmlPortion GetPortion(const XmlActionContext& context) { + return XmlTokenContext::ComputeTokenPortion( + context.GetTerminal()->GetScanner()->GetScanCallCount(), + context.GetResult().GetType()); +} + +} // namespace + +XmlTokenContext::XmlTokenContext(const XmlActionContext& context) + : DataContext(context), + result_(context.GetResult()), + token_range_(context.GetTerminal()->GetScanner()->GetTokenRange()), + token_portion_(GetPortion(context)) {} + +XmlTokenContext::XmlTokenContext(size_t location, const DataRange& range, + const DataSegment& segment, + const DataLineMap& data_line_map, + const DataMatchResult& result, + const DataRange& token_range, + const XmlPortion& token_portion) + : DataContext(location, range, segment, data_line_map), + result_(result), + token_range_(token_range), + token_portion_(token_portion) {} + +bool XmlTokenContext::BuildTokenValue(std::string* value, + bool trim_first_and_last_chars) const { + bool contains_end = ContainsAny(token_portion_, XmlPortion::kEnd); + size_t end_delta = trim_first_and_last_chars && contains_end ? 1 : 0; + size_t begin_delta = 0; + if (ContainsAny(token_portion_, XmlPortion::kBegin)) { + begin_delta = trim_first_and_last_chars ? 1 : 0; + value->clear(); + } + if (ContainsAny(token_portion_, kAllPortions)) { + const auto& segment = GetSegment(); + DataRange range_with_deltas(token_range_.GetBegin() + begin_delta, + token_range_.GetEnd() - end_delta); + auto clipped_range = GetRange().GetIntersection(range_with_deltas); + if (clipped_range.IsValid()) { + const char* cbytes = reinterpret_cast<const char*>( + segment.GetBuffer(clipped_range.GetBegin())); + value->append(cbytes, clipped_range.GetLength()); + } + } + return contains_end; +} + +bool XmlTokenContext::BuildTokenValueRanges( + vector<DataRange>* value_ranges, bool trim_first_and_last_chars) const { + size_t delta = trim_first_and_last_chars ? 1 : 0; + auto clipped_range = GetRange().GetIntersection(token_range_); + if (ContainsAny(token_portion_, XmlPortion::kBegin)) { + value_ranges->clear(); + if (clipped_range.IsValid()) { + value_ranges->push_back( + DataRange(clipped_range.GetBegin() + delta, clipped_range.GetEnd())); + } + + } else if (ContainsAny(token_portion_, kAllPortions)) { + if (clipped_range.IsValid()) { + if (!value_ranges->empty() && + value_ranges->back().GetEnd() == clipped_range.GetBegin()) { + value_ranges->back() = + DataRange(value_ranges->back().GetBegin(), clipped_range.GetEnd()); + } else { + value_ranges->push_back(clipped_range); + } + } + } + bool has_end = ContainsAny(token_portion_, XmlPortion::kEnd); + if (has_end && !value_ranges->empty() && clipped_range.IsValid() && + trim_first_and_last_chars) { + auto& back_range = value_ranges->back(); + back_range = DataRange(back_range.GetBegin(), back_range.GetEnd() - delta); + } + return has_end; +} + +XmlPortion XmlTokenContext::ComputeTokenPortion( + size_t token_scan_count, DataMatchResult::Type result_type) { + const bool first_scan = token_scan_count == 1; + const bool subsequent_scan = token_scan_count > 1; + const bool full_match = result_type == DataMatchResult::kFull; + const bool partial_match = + result_type == DataMatchResult::kPartialOutOfData || + result_type == DataMatchResult::kPartial; + XmlPortion portion = XmlPortion::kNone; + if (first_scan && full_match) { + portion = kAllPortions; + } else if (first_scan && partial_match) { + portion = XmlPortion::kBegin | XmlPortion::kMiddle; + } else if (subsequent_scan && full_match) { + portion = XmlPortion::kMiddle | XmlPortion::kEnd; + } else if (subsequent_scan && partial_match) { + portion = XmlPortion::kMiddle; + } + return portion; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_writer.cc b/src/xml/xml_writer.cc new file mode 100644 index 0000000..e280b66 --- /dev/null +++ b/src/xml/xml_writer.cc @@ -0,0 +1,141 @@ +#include "image_io/xml/xml_writer.h" + +#include <iomanip> +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +using std::ostream; +using std::string; +using std::vector; + +namespace { + +const char kXmlnsColon[] = "xmlns:"; + +} // namespace + +XmlWriter::XmlWriter(std::ostream& os) + : os_(os), element_count_(0), quote_mark_('"') {} + +void XmlWriter::WriteXmlns(const string& prefix, const string& uri) { + string name = string(kXmlnsColon) + prefix; + WriteAttributeNameAndValue(name, uri, true); +} + +size_t XmlWriter::StartWritingElement(const string& element_name) { + MaybeWriteCloseBracket(true); + size_t current_depth = element_data_.size(); + if (current_depth > 0) { + element_data_.back().has_children = true; + } + element_data_.emplace_back(element_name); + os_ << indent_ << "<" << element_name; + indent_ += " "; + element_count_ += 1; + return current_depth; +} + +void XmlWriter::FinishWritingElement() { + if (!element_data_.empty()) { + if (indent_.size() >= 2) { + indent_.resize(indent_.size() - 2); + } + auto& data = element_data_.back(); + if (!data.has_content && !data.has_children) { + if (!data.has_attributes || data.has_children) { + os_ << indent_; + } + os_ << "/>" << std::endl; + } else { + if (!data.has_content) { + os_ << indent_; + } + os_ << "</" << data.name << ">" << std::endl; + } + element_data_.pop_back(); + } +} + +void XmlWriter::FinishWritingElementsToDepth(size_t depth) { + if (!element_data_.empty()) { + for (size_t index = element_data_.size(); index > depth; --index) { + FinishWritingElement(); + } + } +} + +size_t XmlWriter::StartWritingElements(const vector<string>& element_names) { + size_t current_depth = element_data_.size(); + for (const auto& element_name : element_names) { + StartWritingElement(element_name); + } + return current_depth; +} + +void XmlWriter::WriteElementAndContent(const string& element_name, + const string& content) { + StartWritingElement(element_name); + WriteContent(content); + FinishWritingElement(); +} + +void XmlWriter::WriteContent(const string& content) { + MaybeWriteCloseBracket(false); + if (!element_data_.empty()) { + auto& data = element_data_.back(); + data.has_content = true; + os_ << content; + } +} + +void XmlWriter::WriteAttributeNameAndValue(const string& name, + const string& value, + bool add_quote_marks) { + WriteAttributeName(name); + WriteAttributeValue(add_quote_marks, value, add_quote_marks); +} + +void XmlWriter::WriteAttributeName(const string& name) { + if (!element_data_.empty()) { + os_ << std::endl << indent_ << name << "="; + element_data_.back().has_attributes = true; + } +} + +void XmlWriter::WriteAttributeValue(bool add_leading_quote_mark, + const string& value, + bool add_trailing_quote_mark) { + if (!element_data_.empty()) { + if (add_leading_quote_mark) os_ << quote_mark_; + os_ << value; + if (add_trailing_quote_mark) os_ << quote_mark_; + } +} + +void XmlWriter::WriteComment(const std::string& comment) { + MaybeWriteCloseBracket(true); + os_ << indent_ << "<!-- " << comment << " -->" << std::endl; + if (!element_data_.empty()) { + auto& data = element_data_.back(); + data.has_children = true; + } +} + +bool XmlWriter::MaybeWriteCloseBracket(bool with_trailing_newline) { + if (!element_data_.empty()) { + auto& data = element_data_.back(); + if (!data.has_content && !data.has_children) { + os_ << ">"; + if (with_trailing_newline) { + os_ << std::endl; + } + return true; + } + } + return false; +} + +} // namespace image_io +} // namespace photos_editing_formats |