#include "image_io/base/data_scanner.h" namespace photos_editing_formats { namespace image_io { namespace { const char kWhitespaceChars[] = " \t\n\r"; /// This function is like strspn but does not assume a null-terminated string. size_t memspn(const char* s, size_t slen, const char* accept) { const char* p = s; const char* spanp; char c, sc; cont: c = *p++; if (slen-- == 0) return p - 1 - s; for (spanp = accept; (sc = *spanp++) != '\0';) if (sc == c) goto cont; return p - 1 - s; } /// @return Whether value is in the range [lo:hi]. bool InRange(char value, char lo, char hi) { return value >= lo && value <= hi; } /// @return Whether the value is the first character of a kName type scanner. bool IsFirstNameChar(char value) { return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' || value == ':'; } /// Scans the characters in the s string, where the characters can be any legal /// character in the name. /// @return The number of name characters scanned. size_t ScanOptionalNameChars(const char* s, size_t slen) { const char* kOptionalChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:"; return memspn(s, slen, kOptionalChars); } /// Scans the whitespace characters in the s string. /// @return The number of whitepace characters scanned. size_t ScanWhitespaceChars(const char* s, size_t slen) { return memspn(s, slen, kWhitespaceChars); } } // namespace std::string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; } DataScanner DataScanner::CreateLiteralScanner(const std::string& literal) { return DataScanner(DataScanner::kLiteral, literal); } DataScanner DataScanner::CreateNameScanner() { return DataScanner(DataScanner::kName); } DataScanner DataScanner::CreateQuotedStringScanner() { return DataScanner(DataScanner::kQuotedString); } DataScanner DataScanner::CreateSentinelScanner(const std::string& sentinels) { return DataScanner(DataScanner::kSentinel, sentinels); } DataScanner DataScanner::CreateThroughLiteralScanner( const std::string& literal) { return DataScanner(DataScanner::kThroughLiteral, literal); } DataScanner DataScanner::CreateWhitespaceScanner() { return DataScanner(DataScanner::kWhitespace); } DataScanner DataScanner::CreateOptionalWhitespaceScanner() { return DataScanner(DataScanner::kOptionalWhitespace); } size_t DataScanner::ExtendTokenLength(size_t delta_length) { token_range_ = DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length); return token_range_.GetLength(); } void DataScanner::SetInternalError(const DataContext& context, const std::string& error_description, DataMatchResult* result) { result->SetType(DataMatchResult::kError); result->SetMessage( Message::kInternalError, context.GetErrorText({}, {GetDescription()}, error_description, "")); } void DataScanner::SetSyntaxError(const DataContext& context, const std::string& error_description, DataMatchResult* result) { result->SetType(DataMatchResult::kError); result->SetMessage(Message::kSyntaxError, context.GetErrorText(error_description, GetDescription())); } DataMatchResult DataScanner::ScanLiteral(const char* cbytes, size_t bytes_available, const DataContext& context) { DataMatchResult result; size_t token_length = token_range_.GetLength(); if (token_length >= literal_or_sentinels_.length()) { SetInternalError(context, "Literal already scanned", &result); return result; } size_t bytes_still_needed = literal_or_sentinels_.length() - token_length; size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available); if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) == 0) { token_length = ExtendTokenLength(bytes_to_compare); result.SetBytesConsumed(bytes_to_compare); result.SetType(token_length == literal_or_sentinels_.length() ? DataMatchResult::kFull : DataMatchResult::kPartialOutOfData); } else { SetSyntaxError(context, "Expected literal", &result); } return result; } DataMatchResult DataScanner::ScanName(const char* cbytes, size_t bytes_available, const DataContext& context) { DataMatchResult result; size_t token_length = token_range_.GetLength(); if (token_length == 0) { if (!IsFirstNameChar(*cbytes)) { SetSyntaxError(context, "Expected first character of a name", &result); return result; } token_length = ExtendTokenLength(1); result.SetBytesConsumed(1); bytes_available -= 1; cbytes += 1; } size_t optional_bytes_consumed = ScanOptionalNameChars(cbytes, bytes_available); token_length = ExtendTokenLength(optional_bytes_consumed); result.IncrementBytesConsumed(optional_bytes_consumed); if (result.GetBytesConsumed() == 0 && token_length > 0) { result.SetType(DataMatchResult::kFull); } else if (optional_bytes_consumed < bytes_available) { result.SetType(DataMatchResult::kFull); } else { result.SetType(DataMatchResult::kPartialOutOfData); } return result; } DataMatchResult DataScanner::ScanQuotedString(const char* cbytes, size_t bytes_available, const DataContext& context) { const size_t kStart = 0; const size_t kDone = '.'; const size_t kSquote = '\''; const size_t kDquote = '"'; DataMatchResult result; size_t token_length = token_range_.GetLength(); if ((data_ == kStart && token_length != 0) || (data_ != kStart && data_ != kSquote && data_ != kDquote)) { SetInternalError(context, "Inconsistent state", &result); return result; } if (data_ == kStart) { if (*cbytes != kSquote && *cbytes != kDquote) { SetSyntaxError(context, "Expected start of a quoted string", &result); return result; } data_ = *cbytes++; bytes_available--; result.SetBytesConsumed(1); token_length = ExtendTokenLength(1); } const char* ebytes = reinterpret_cast( memchr(cbytes, static_cast(data_), bytes_available)); size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available; result.IncrementBytesConsumed(bytes_scanned); token_length = ExtendTokenLength(bytes_scanned); if (bytes_scanned == bytes_available) { result.SetType(DataMatchResult::kPartialOutOfData); } else { result.SetType(DataMatchResult::kFull); result.IncrementBytesConsumed(1); ExtendTokenLength(1); data_ = kDone; } return result; } DataMatchResult DataScanner::ScanSentinel(const char* cbytes, size_t bytes_available, const DataContext& context) { DataMatchResult result; if (data_ != 0) { SetInternalError(context, "Sentinel already scanned", &result); return result; } char cbyte = *cbytes; for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) { char sentinel = literal_or_sentinels_[index]; if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) { ExtendTokenLength(1); result.SetBytesConsumed(1).SetType(DataMatchResult::kFull); data_ = sentinel; break; } } if (result.GetBytesConsumed() == 0) { SetSyntaxError(context, "Expected sentinal character", &result); } return result; } DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes, size_t bytes_available, const DataContext& context) { DataMatchResult result; size_t& scanned_literal_length = data_; if (scanned_literal_length >= literal_or_sentinels_.length()) { SetInternalError(context, "Literal already scanned", &result); return result; } while (bytes_available > 0) { if (scanned_literal_length == 0) { // Literal scan not in progress. Find the first char of the literal. auto* matched_byte = reinterpret_cast( memchr(cbytes, literal_or_sentinels_[0], bytes_available)); if (matched_byte == nullptr) { // first char not found and chars exhausted. ExtendTokenLength(bytes_available); result.IncrementBytesConsumed(bytes_available); result.SetType(DataMatchResult::kPartialOutOfData); break; } else { // found the first char of the literal. size_t bytes_scanned = (matched_byte - cbytes) + 1; result.IncrementBytesConsumed(bytes_scanned); bytes_available -= bytes_scanned; cbytes += bytes_scanned; ExtendTokenLength(bytes_scanned); scanned_literal_length = 1; } } // check if the rest of the literal is there. size_t bytes_still_needed = literal_or_sentinels_.length() - scanned_literal_length; size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available); if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes, bytes_to_compare) == 0) { // Yes, the whole literal is there or chars are exhausted. ExtendTokenLength(bytes_to_compare); scanned_literal_length += bytes_to_compare; result.IncrementBytesConsumed(bytes_to_compare); result.SetType(scanned_literal_length == literal_or_sentinels_.length() ? DataMatchResult::kFull : DataMatchResult::kPartialOutOfData); break; } // false alarm, the firsts char of the literal were found, but not the // whole enchilada. Keep searching at one past the first char of the match. scanned_literal_length = 0; } return result; } DataMatchResult DataScanner::ScanWhitespace(const char* cbytes, size_t bytes_available, const DataContext& context) { DataMatchResult result; size_t token_length = token_range_.GetLength(); result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available)); token_length = ExtendTokenLength(result.GetBytesConsumed()); if (result.GetBytesConsumed() == 0) { if (token_length == 0 && type_ == kWhitespace) { SetSyntaxError(context, "Expected whitespace", &result); } else { result.SetType(DataMatchResult::kFull); } } else { result.SetType((result.GetBytesConsumed() < bytes_available) ? DataMatchResult::kFull : DataMatchResult::kPartialOutOfData); } return result; } DataMatchResult DataScanner::Scan(const DataContext& context) { scan_call_count_ += 1; DataMatchResult result; if (!context.IsValidLocationAndRange()) { SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(), &result); return result; } if (!token_range_.IsValid()) { token_range_ = DataRange(context.GetLocation(), context.GetLocation()); } size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation(); const char* cbytes = context.GetCharBytes(); switch (type_) { case kLiteral: result = ScanLiteral(cbytes, bytes_available, context); break; case kName: result = ScanName(cbytes, bytes_available, context); break; case kQuotedString: result = ScanQuotedString(cbytes, bytes_available, context); break; case kSentinel: result = ScanSentinel(cbytes, bytes_available, context); break; case kThroughLiteral: result = ScanThroughLiteral(cbytes, bytes_available, context); break; case kWhitespace: case kOptionalWhitespace: result = ScanWhitespace(cbytes, bytes_available, context); break; default: SetInternalError(context, "Undefined scanner type", &result); break; } return result; } void DataScanner::ResetTokenRange() { token_range_ = DataRange(); } void DataScanner::Reset() { data_ = 0; scan_call_count_ = 0; ResetTokenRange(); } std::string DataScanner::GetDescription() const { std::string description; switch (type_) { case kLiteral: description = "Literal:'"; description += literal_or_sentinels_; description += "'"; break; case kName: description = "Name"; break; case kQuotedString: description = "QuotedString"; break; case kSentinel: description = "OneOf:'"; description += literal_or_sentinels_; description += "'"; break; case kThroughLiteral: description = "ThruLiteral:'"; description += literal_or_sentinels_; description += "'"; break; case kWhitespace: description = "Whitespace"; break; case kOptionalWhitespace: description = "OptionalWhitespace"; break; } return description; } std::string DataScanner::GetLiteral() const { return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_ : ""; } std::string DataScanner::GetSentenels() const { return type_ == kSentinel ? literal_or_sentinels_ : ""; } char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; } } // namespace image_io } // namespace photos_editing_formats