diff options
Diffstat (limited to 'src/lib_json/json_reader.cpp')
-rw-r--r-- | src/lib_json/json_reader.cpp | 1590 |
1 files changed, 1348 insertions, 242 deletions
diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 6136230..19922a8 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -1,41 +1,69 @@ -// Copyright 2007-2011 Baptiste Lepilleur +// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors +// Copyright (C) 2016 InfoTeCS JSC. All rights reserved. // Distributed under MIT license, or public domain if desired and // recognized in your jurisdiction. // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE #if !defined(JSON_IS_AMALGAMATION) +#include "json_tool.h" #include <json/assertions.h> #include <json/reader.h> #include <json/value.h> -#include "json_tool.h" #endif // if !defined(JSON_IS_AMALGAMATION) -#include <iostream> -#include <fstream> -#include <utility> -#include <cstdio> +#include <algorithm> #include <cassert> #include <cstring> +#include <iostream> #include <istream> +#include <limits> +#include <memory> +#include <set> +#include <sstream> +#include <utility> + +#include <cstdio> +#if __cplusplus >= 201103L -#if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below -#define snprintf _snprintf +#if !defined(sscanf) +#define sscanf std::sscanf #endif -#if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0 +#endif //__cplusplus + +#if defined(_MSC_VER) +#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES) +#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1 +#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES +#endif //_MSC_VER + +#if defined(_MSC_VER) // Disable warning about strdup being deprecated. #pragma warning(disable : 4996) #endif +// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile +// time to change the stack limit +#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT) +#define JSONCPP_DEPRECATED_STACK_LIMIT 1000 +#endif + +static size_t const stackLimit_g = + JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue() + namespace Json { +#if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520) +using CharReaderPtr = std::unique_ptr<CharReader>; +#else +using CharReaderPtr = std::auto_ptr<CharReader>; +#endif + // Implementation of class Features // //////////////////////////////// -Features::Features() - : allowComments_(true), strictRoot_(false), - allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {} +Features::Features() = default; -Features Features::all() { return Features(); } +Features Features::all() { return {}; } Features Features::strictMode() { Features features; @@ -49,67 +77,39 @@ Features Features::strictMode() { // Implementation of class Reader // //////////////////////////////// -static inline bool in(Reader::Char c, - Reader::Char c1, - Reader::Char c2, - Reader::Char c3, - Reader::Char c4) { - return c == c1 || c == c2 || c == c3 || c == c4; -} - -static inline bool in(Reader::Char c, - Reader::Char c1, - Reader::Char c2, - Reader::Char c3, - Reader::Char c4, - Reader::Char c5) { - return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; -} - -static bool containsNewLine(Reader::Location begin, Reader::Location end) { - for (; begin < end; ++begin) - if (*begin == '\n' || *begin == '\r') - return true; - return false; +bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) { + return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; }); } // Class Reader // ////////////////////////////////////////////////////////////////// -Reader::Reader() - : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), - lastValue_(), commentsBefore_(), features_(Features::all()), - collectComments_() {} +Reader::Reader() : features_(Features::all()) {} -Reader::Reader(const Features& features) - : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), - lastValue_(), commentsBefore_(), features_(features), collectComments_() { -} +Reader::Reader(const Features& features) : features_(features) {} -bool -Reader::parse(const std::string& document, Value& root, bool collectComments) { - document_ = document; +bool Reader::parse(const std::string& document, Value& root, + bool collectComments) { + document_.assign(document.begin(), document.end()); const char* begin = document_.c_str(); const char* end = begin + document_.length(); return parse(begin, end, root, collectComments); } -bool Reader::parse(std::istream& sin, Value& root, bool collectComments) { - // std::istream_iterator<char> begin(sin); +bool Reader::parse(std::istream& is, Value& root, bool collectComments) { + // std::istream_iterator<char> begin(is); // std::istream_iterator<char> end; // Those would allow streamed input from a file, if parse() were a // template function. - // Since std::string is reference-counted, this at least does not + // Since String is reference-counted, this at least does not // create an extra copy. - std::string doc; - std::getline(sin, doc, (char)EOF); - return parse(doc, root, collectComments); + String doc; + std::getline(is, doc, static_cast<char> EOF); + return parse(doc.data(), doc.data() + doc.size(), root, collectComments); } -bool Reader::parse(const char* beginDoc, - const char* endDoc, - Value& root, +bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root, bool collectComments) { if (!features_.allowComments_) { collectComments = false; @@ -119,9 +119,9 @@ bool Reader::parse(const char* beginDoc, end_ = endDoc; collectComments_ = collectComments; current_ = begin_; - lastValueEnd_ = 0; - lastValue_ = 0; - commentsBefore_ = ""; + lastValueEnd_ = nullptr; + lastValue_ = nullptr; + commentsBefore_.clear(); errors_.clear(); while (!nodes_.empty()) nodes_.pop(); @@ -149,21 +149,20 @@ bool Reader::parse(const char* beginDoc, } bool Reader::readValue() { + // readValue() may call itself only if it calls readObject() or ReadArray(). + // These methods execute nodes_.push() just before and nodes_.pop)() just + // after calling readValue(). parse() executes one nodes_.push(), so > instead + // of >=. + if (nodes_.size() > stackLimit_g) + throwRuntimeError("Exceeded stackLimit in readValue()."); + Token token; skipCommentTokens(token); bool successful = true; if (collectComments_ && !commentsBefore_.empty()) { - // Remove newline characters at the end of the comments - size_t lastNonNewline = commentsBefore_.find_last_not_of("\r\n"); - if (lastNonNewline != std::string::npos) { - commentsBefore_.erase(lastNonNewline + 1); - } else { - commentsBefore_.clear(); - } - currentValue().setComment(commentsBefore_, commentBefore); - commentsBefore_ = ""; + commentsBefore_.clear(); } switch (token.type_) { @@ -181,32 +180,37 @@ bool Reader::readValue() { case tokenString: successful = decodeString(token); break; - case tokenTrue: - currentValue() = true; + case tokenTrue: { + Value v(true); + currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); - break; - case tokenFalse: - currentValue() = false; + } break; + case tokenFalse: { + Value v(false); + currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); - break; - case tokenNull: - currentValue() = Value(); + } break; + case tokenNull: { + Value v; + currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); - break; + } break; case tokenArraySeparator: + case tokenObjectEnd: + case tokenArrayEnd: if (features_.allowDroppedNullPlaceholders_) { // "Un-read" the current token and mark the current value as a null // token. current_--; - currentValue() = Value(); + Value v; + currentValue().swapPayload(v); currentValue().setOffsetStart(current_ - begin_ - 1); currentValue().setOffsetLimit(current_ - begin_); break; - } - // Else, fall through... + } // Else, fall through... default: currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); @@ -231,13 +235,6 @@ void Reader::skipCommentTokens(Token& token) { } } -bool Reader::expectToken(TokenType type, Token& token, const char* message) { - readToken(token); - if (token.type_ != type) - return addError(message, token); - return true; -} - bool Reader::readToken(Token& token) { skipSpaces(); token.start_ = current_; @@ -306,7 +303,7 @@ bool Reader::readToken(Token& token) { if (!ok) token.type_ = tokenError; token.end_ = current_; - return true; + return ok; } void Reader::skipSpaces() { @@ -319,7 +316,7 @@ void Reader::skipSpaces() { } } -bool Reader::match(Location pattern, int patternLength) { +bool Reader::match(const Char* pattern, int patternLength) { if (end_ - current_ < patternLength) return false; int index = patternLength; @@ -353,19 +350,39 @@ bool Reader::readComment() { return true; } -void -Reader::addComment(Location begin, Location end, CommentPlacement placement) { +String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) { + String normalized; + normalized.reserve(static_cast<size_t>(end - begin)); + Reader::Location current = begin; + while (current != end) { + char c = *current++; + if (c == '\r') { + if (current != end && *current == '\n') + // convert dos EOL + ++current; + // convert Mac EOL + normalized += '\n'; + } else { + normalized += c; + } + } + return normalized; +} + +void Reader::addComment(Location begin, Location end, + CommentPlacement placement) { assert(collectComments_); + const String& normalized = normalizeEOL(begin, end); if (placement == commentAfterOnSameLine) { - assert(lastValue_ != 0); - lastValue_->setComment(std::string(begin, end), placement); + assert(lastValue_ != nullptr); + lastValue_->setComment(normalized, placement); } else { - commentsBefore_ += std::string(begin, end); + commentsBefore_ += normalized; } } bool Reader::readCStyleComment() { - while (current_ != end_) { + while ((current_ + 1) < end_) { Char c = getNextChar(); if (c == '*' && *current_ == '/') break; @@ -376,23 +393,43 @@ bool Reader::readCStyleComment() { bool Reader::readCppStyleComment() { while (current_ != end_) { Char c = getNextChar(); - if (c == '\r' || c == '\n') + if (c == '\n') + break; + if (c == '\r') { + // Consume DOS EOL. It will be normalized in addComment. + if (current_ != end_ && *current_ == '\n') + getNextChar(); + // Break on Moc OS 9 EOL. break; + } } return true; } void Reader::readNumber() { - while (current_ != end_) { - if (!(*current_ >= '0' && *current_ <= '9') && - !in(*current_, '.', 'e', 'E', '+', '-')) - break; - ++current_; + Location p = current_; + char c = '0'; // stopgap for already consumed character + // integral part + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + // fractional part + if (c == '.') { + c = (current_ = p) < end_ ? *p++ : '\0'; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + } + // exponential part + if (c == 'e' || c == 'E') { + c = (current_ = p) < end_ ? *p++ : '\0'; + if (c == '+' || c == '-') + c = (current_ = p) < end_ ? *p++ : '\0'; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; } } bool Reader::readString() { - Char c = 0; + Char c = '\0'; while (current_ != end_) { c = getNextChar(); if (c == '\\') @@ -403,11 +440,12 @@ bool Reader::readString() { return c == '"'; } -bool Reader::readObject(Token& tokenStart) { +bool Reader::readObject(Token& token) { Token tokenName; - std::string name; - currentValue() = Value(objectValue); - currentValue().setOffsetStart(tokenStart.start_ - begin_); + String name; + Value init(objectValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(token.start_ - begin_); while (readToken(tokenName)) { bool initialTokenOk = true; while (tokenName.type_ == tokenComment && initialTokenOk) @@ -416,7 +454,7 @@ bool Reader::readObject(Token& tokenStart) { break; if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object return true; - name = ""; + name.clear(); if (tokenName.type_ == tokenString) { if (!decodeString(tokenName, name)) return recoverFromError(tokenObjectEnd); @@ -431,8 +469,8 @@ bool Reader::readObject(Token& tokenStart) { Token colon; if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { - return addErrorAndRecover( - "Missing ':' after object member name", colon, tokenObjectEnd); + return addErrorAndRecover("Missing ':' after object member name", colon, + tokenObjectEnd); } Value& value = currentValue()[name]; nodes_.push(&value); @@ -445,8 +483,8 @@ bool Reader::readObject(Token& tokenStart) { if (!readToken(comma) || (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment)) { - return addErrorAndRecover( - "Missing ',' or '}' in object declaration", comma, tokenObjectEnd); + return addErrorAndRecover("Missing ',' or '}' in object declaration", + comma, tokenObjectEnd); } bool finalizeTokenOk = true; while (comma.type_ == tokenComment && finalizeTokenOk) @@ -454,15 +492,16 @@ bool Reader::readObject(Token& tokenStart) { if (comma.type_ == tokenObjectEnd) return true; } - return addErrorAndRecover( - "Missing '}' or object member name", tokenName, tokenObjectEnd); + return addErrorAndRecover("Missing '}' or object member name", tokenName, + tokenObjectEnd); } -bool Reader::readArray(Token& tokenStart) { - currentValue() = Value(arrayValue); - currentValue().setOffsetStart(tokenStart.start_ - begin_); +bool Reader::readArray(Token& token) { + Value init(arrayValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(token.start_ - begin_); skipSpaces(); - if (*current_ == ']') // empty array + if (current_ != end_ && *current_ == ']') // empty array { Token endArray; readToken(endArray); @@ -477,19 +516,19 @@ bool Reader::readArray(Token& tokenStart) { if (!ok) // error already set return recoverFromError(tokenArrayEnd); - Token token; + Token currentToken; // Accept Comment after last item in the array. - ok = readToken(token); - while (token.type_ == tokenComment && ok) { - ok = readToken(token); + ok = readToken(currentToken); + while (currentToken.type_ == tokenComment && ok) { + ok = readToken(currentToken); } - bool badTokenType = - (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd); + bool badTokenType = (currentToken.type_ != tokenArraySeparator && + currentToken.type_ != tokenArrayEnd); if (!ok || badTokenType) { - return addErrorAndRecover( - "Missing ',' or ']' in array declaration", token, tokenArrayEnd); + return addErrorAndRecover("Missing ',' or ']' in array declaration", + currentToken, tokenArrayEnd); } - if (token.type_ == tokenArrayEnd) + if (currentToken.type_ == tokenArrayEnd) break; } return true; @@ -499,20 +538,13 @@ bool Reader::decodeNumber(Token& token) { Value decoded; if (!decodeNumber(token, decoded)) return false; - currentValue() = decoded; + currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool Reader::decodeNumber(Token& token, Value& decoded) { - bool isDouble = false; - for (Location inspect = token.start_; inspect != token.end_; ++inspect) { - isDouble = isDouble || in(*inspect, '.', 'e', 'E', '+') || - (*inspect == '-' && inspect != token.start_); - } - if (isDouble) - return decodeDouble(token, decoded); // Attempts to parse the number as an integer. If the number is // larger than the maximum supported value of an integer then // we decode the number as a double. @@ -520,18 +552,18 @@ bool Reader::decodeNumber(Token& token, Value& decoded) { bool isNegative = *current == '-'; if (isNegative) ++current; + // TODO: Help the compiler do the div and mod at compile time or get rid of + // them. Value::LargestUInt maxIntegerValue = - isNegative ? Value::LargestUInt(-Value::minLargestInt) + isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1 : Value::maxLargestUInt; Value::LargestUInt threshold = maxIntegerValue / 10; Value::LargestUInt value = 0; while (current < token.end_) { Char c = *current++; if (c < '0' || c > '9') - return addError("'" + std::string(token.start_, token.end_) + - "' is not a number.", - token); - Value::UInt digit(c - '0'); + return decodeDouble(token, decoded); + auto digit(static_cast<Value::UInt>(c - '0')); if (value >= threshold) { // We've hit or exceeded the max value divided by 10 (rounded down). If // a) we've only just touched the limit, b) this is the last digit, and @@ -544,7 +576,9 @@ bool Reader::decodeNumber(Token& token, Value& decoded) { } value = value * 10 + digit; } - if (isNegative) + if (isNegative && value == maxIntegerValue) + decoded = Value::minLargestInt; + else if (isNegative) decoded = -Value::LargestInt(value); else if (value <= Value::LargestUInt(Value::maxInt)) decoded = Value::LargestInt(value); @@ -557,7 +591,7 @@ bool Reader::decodeDouble(Token& token) { Value decoded; if (!decodeDouble(token, decoded)) return false; - currentValue() = decoded; + currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; @@ -565,59 +599,35 @@ bool Reader::decodeDouble(Token& token) { bool Reader::decodeDouble(Token& token, Value& decoded) { double value = 0; - const int bufferSize = 32; - int count; - int length = int(token.end_ - token.start_); - - // Sanity check to avoid buffer overflow exploits. - if (length < 0) { - return addError("Unable to parse token length", token); - } - - // Avoid using a string constant for the format control string given to - // sscanf, as this can cause hard to debug crashes on OS X. See here for more - // info: - // - // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html - char format[] = "%lf"; - - if (length <= bufferSize) { - Char buffer[bufferSize + 1]; - memcpy(buffer, token.start_, length); - buffer[length] = 0; - count = sscanf(buffer, format, &value); - } else { - std::string buffer(token.start_, token.end_); - count = sscanf(buffer.c_str(), format, &value); - } - - if (count != 1) - return addError("'" + std::string(token.start_, token.end_) + - "' is not a number.", - token); + String buffer(token.start_, token.end_); + IStringStream is(buffer); + if (!(is >> value)) + return addError( + "'" + String(token.start_, token.end_) + "' is not a number.", token); decoded = value; return true; } bool Reader::decodeString(Token& token) { - std::string decoded; - if (!decodeString(token, decoded)) + String decoded_string; + if (!decodeString(token, decoded_string)) return false; - currentValue() = decoded; + Value decoded(decoded_string); + currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } -bool Reader::decodeString(Token& token, std::string& decoded) { - decoded.reserve(token.end_ - token.start_ - 2); +bool Reader::decodeString(Token& token, String& decoded) { + decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2)); Location current = token.start_ + 1; // skip '"' Location end = token.end_ - 1; // do not include '"' while (current != end) { Char c = *current++; if (c == '"') break; - else if (c == '\\') { + if (c == '\\') { if (current == end) return addError("Empty escape sequence in string", token, current); Char escape = *current++; @@ -662,10 +672,8 @@ bool Reader::decodeString(Token& token, std::string& decoded) { return true; } -bool Reader::decodeUnicodeCodePoint(Token& token, - Location& current, - Location end, - unsigned int& unicode) { +bool Reader::decodeUnicodeCodePoint(Token& token, Location& current, + Location end, unsigned int& unicode) { if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) return false; @@ -674,10 +682,9 @@ bool Reader::decodeUnicodeCodePoint(Token& token, if (end - current < 6) return addError( "additional six characters expected to parse unicode surrogate pair.", - token, - current); - unsigned int surrogatePair; + token, current); if (*(current++) == '\\' && *(current++) == 'u') { + unsigned int surrogatePair; if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); } else @@ -685,22 +692,19 @@ bool Reader::decodeUnicodeCodePoint(Token& token, } else return addError("expecting another \\u token to begin the second half of " "a unicode surrogate pair", - token, - current); + token, current); } return true; } -bool Reader::decodeUnicodeEscapeSequence(Token& token, - Location& current, +bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current, Location end, - unsigned int& unicode) { + unsigned int& ret_unicode) { if (end - current < 4) return addError( - "Bad unicode escape sequence in string: four digits expected.", - token, + "Bad unicode escape sequence in string: four digits expected.", token, current); - unicode = 0; + int unicode = 0; for (int index = 0; index < 4; ++index) { Char c = *current++; unicode *= 16; @@ -713,14 +717,13 @@ bool Reader::decodeUnicodeEscapeSequence(Token& token, else return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", - token, - current); + token, current); } + ret_unicode = static_cast<unsigned int>(unicode); return true; } -bool -Reader::addError(const std::string& message, Token& token, Location extra) { +bool Reader::addError(const String& message, Token& token, Location extra) { ErrorInfo info; info.token_ = token; info.message_ = message; @@ -730,7 +733,7 @@ Reader::addError(const std::string& message, Token& token, Location extra) { } bool Reader::recoverFromError(TokenType skipUntilToken) { - int errorCount = int(errors_.size()); + size_t const errorCount = errors_.size(); Token skip; for (;;) { if (!readToken(skip)) @@ -742,8 +745,7 @@ bool Reader::recoverFromError(TokenType skipUntilToken) { return false; } -bool Reader::addErrorAndRecover(const std::string& message, - Token& token, +bool Reader::addErrorAndRecover(const String& message, Token& token, TokenType skipUntilToken) { addError(message, token); return recoverFromError(skipUntilToken); @@ -757,8 +759,7 @@ Reader::Char Reader::getNextChar() { return *current_++; } -void Reader::getLocationLineAndColumn(Location location, - int& line, +void Reader::getLocationLineAndColumn(Location location, int& line, int& column) const { Location current = begin_; Location lastLineStart = current; @@ -780,33 +781,22 @@ void Reader::getLocationLineAndColumn(Location location, ++line; } -std::string Reader::getLocationLineAndColumn(Location location) const { +String Reader::getLocationLineAndColumn(Location location) const { int line, column; getLocationLineAndColumn(location, line, column); char buffer[18 + 16 + 16 + 1]; -#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) -#if defined(WINCE) - _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); -#else - sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column); -#endif -#else - snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); -#endif + jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); return buffer; } // Deprecated. Preserved for backward compatibility -std::string Reader::getFormatedErrorMessages() const { +String Reader::getFormatedErrorMessages() const { return getFormattedErrorMessages(); } -std::string Reader::getFormattedErrorMessages() const { - std::string formattedMessage; - for (Errors::const_iterator itError = errors_.begin(); - itError != errors_.end(); - ++itError) { - const ErrorInfo& error = *itError; +String Reader::getFormattedErrorMessages() const { + String formattedMessage; + for (const auto& error : errors_) { formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; formattedMessage += " " + error.message_ + "\n"; @@ -819,10 +809,7 @@ std::string Reader::getFormattedErrorMessages() const { std::vector<Reader::StructuredError> Reader::getStructuredErrors() const { std::vector<Reader::StructuredError> allErrors; - for (Errors::const_iterator itError = errors_.begin(); - itError != errors_.end(); - ++itError) { - const ErrorInfo& error = *itError; + for (const auto& error : errors_) { Reader::StructuredError structured; structured.offset_start = error.token_.start_ - begin_; structured.offset_limit = error.token_.end_ - begin_; @@ -832,28 +819,27 @@ std::vector<Reader::StructuredError> Reader::getStructuredErrors() const { return allErrors; } -bool Reader::pushError(const Value& value, const std::string& message) { - size_t length = end_ - begin_; - if(value.getOffsetStart() > length - || value.getOffsetLimit() > length) +bool Reader::pushError(const Value& value, const String& message) { + ptrdiff_t const length = end_ - begin_; + if (value.getOffsetStart() > length || value.getOffsetLimit() > length) return false; Token token; token.type_ = tokenError; token.start_ = begin_ + value.getOffsetStart(); - token.end_ = end_ + value.getOffsetLimit(); + token.end_ = begin_ + value.getOffsetLimit(); ErrorInfo info; info.token_ = token; info.message_ = message; - info.extra_ = 0; + info.extra_ = nullptr; errors_.push_back(info); return true; } -bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) { - size_t length = end_ - begin_; - if(value.getOffsetStart() > length - || value.getOffsetLimit() > length - || extra.getOffsetLimit() > length) +bool Reader::pushError(const Value& value, const String& message, + const Value& extra) { + ptrdiff_t const length = end_ - begin_; + if (value.getOffsetStart() > length || value.getOffsetLimit() > length || + extra.getOffsetLimit() > length) return false; Token token; token.type_ = tokenError; @@ -867,19 +853,1139 @@ bool Reader::pushError(const Value& value, const std::string& message, const Val return true; } -bool Reader::good() const { - return !errors_.size(); +bool Reader::good() const { return errors_.empty(); } + +// Originally copied from the Features class (now deprecated), used internally +// for features implementation. +class OurFeatures { +public: + static OurFeatures all(); + bool allowComments_; + bool allowTrailingCommas_; + bool strictRoot_; + bool allowDroppedNullPlaceholders_; + bool allowNumericKeys_; + bool allowSingleQuotes_; + bool failIfExtra_; + bool rejectDupKeys_; + bool allowSpecialFloats_; + bool skipBom_; + size_t stackLimit_; +}; // OurFeatures + +OurFeatures OurFeatures::all() { return {}; } + +// Implementation of class Reader +// //////////////////////////////// + +// Originally copied from the Reader class (now deprecated), used internally +// for implementing JSON reading. +class OurReader { +public: + using Char = char; + using Location = const Char*; + struct StructuredError { + ptrdiff_t offset_start; + ptrdiff_t offset_limit; + String message; + }; + + explicit OurReader(OurFeatures const& features); + bool parse(const char* beginDoc, const char* endDoc, Value& root, + bool collectComments = true); + String getFormattedErrorMessages() const; + std::vector<StructuredError> getStructuredErrors() const; + +private: + OurReader(OurReader const&); // no impl + void operator=(OurReader const&); // no impl + + enum TokenType { + tokenEndOfStream = 0, + tokenObjectBegin, + tokenObjectEnd, + tokenArrayBegin, + tokenArrayEnd, + tokenString, + tokenNumber, + tokenTrue, + tokenFalse, + tokenNull, + tokenNaN, + tokenPosInf, + tokenNegInf, + tokenArraySeparator, + tokenMemberSeparator, + tokenComment, + tokenError + }; + + class Token { + public: + TokenType type_; + Location start_; + Location end_; + }; + + class ErrorInfo { + public: + Token token_; + String message_; + Location extra_; + }; + + using Errors = std::deque<ErrorInfo>; + + bool readToken(Token& token); + void skipSpaces(); + void skipBom(bool skipBom); + bool match(const Char* pattern, int patternLength); + bool readComment(); + bool readCStyleComment(bool* containsNewLineResult); + bool readCppStyleComment(); + bool readString(); + bool readStringSingleQuote(); + bool readNumber(bool checkInf); + bool readValue(); + bool readObject(Token& token); + bool readArray(Token& token); + bool decodeNumber(Token& token); + bool decodeNumber(Token& token, Value& decoded); + bool decodeString(Token& token); + bool decodeString(Token& token, String& decoded); + bool decodeDouble(Token& token); + bool decodeDouble(Token& token, Value& decoded); + bool decodeUnicodeCodePoint(Token& token, Location& current, Location end, + unsigned int& unicode); + bool decodeUnicodeEscapeSequence(Token& token, Location& current, + Location end, unsigned int& unicode); + bool addError(const String& message, Token& token, Location extra = nullptr); + bool recoverFromError(TokenType skipUntilToken); + bool addErrorAndRecover(const String& message, Token& token, + TokenType skipUntilToken); + void skipUntilSpace(); + Value& currentValue(); + Char getNextChar(); + void getLocationLineAndColumn(Location location, int& line, + int& column) const; + String getLocationLineAndColumn(Location location) const; + void addComment(Location begin, Location end, CommentPlacement placement); + void skipCommentTokens(Token& token); + + static String normalizeEOL(Location begin, Location end); + static bool containsNewLine(Location begin, Location end); + + using Nodes = std::stack<Value*>; + + Nodes nodes_{}; + Errors errors_{}; + String document_{}; + Location begin_ = nullptr; + Location end_ = nullptr; + Location current_ = nullptr; + Location lastValueEnd_ = nullptr; + Value* lastValue_ = nullptr; + bool lastValueHasAComment_ = false; + String commentsBefore_{}; + + OurFeatures const features_; + bool collectComments_ = false; +}; // OurReader + +// complete copy of Read impl, for OurReader + +bool OurReader::containsNewLine(OurReader::Location begin, + OurReader::Location end) { + return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; }); } -std::istream& operator>>(std::istream& sin, Value& root) { - Json::Reader reader; - bool ok = reader.parse(sin, root, true); - if (!ok) { - fprintf(stderr, - "Error from reader: %s", - reader.getFormattedErrorMessages().c_str()); +OurReader::OurReader(OurFeatures const& features) : features_(features) {} + +bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root, + bool collectComments) { + if (!features_.allowComments_) { + collectComments = false; + } + + begin_ = beginDoc; + end_ = endDoc; + collectComments_ = collectComments; + current_ = begin_; + lastValueEnd_ = nullptr; + lastValue_ = nullptr; + commentsBefore_.clear(); + errors_.clear(); + while (!nodes_.empty()) + nodes_.pop(); + nodes_.push(&root); + + // skip byte order mark if it exists at the beginning of the UTF-8 text. + skipBom(features_.skipBom_); + bool successful = readValue(); + nodes_.pop(); + Token token; + skipCommentTokens(token); + if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) { + addError("Extra non-whitespace after JSON value.", token); + return false; + } + if (collectComments_ && !commentsBefore_.empty()) + root.setComment(commentsBefore_, commentAfter); + if (features_.strictRoot_) { + if (!root.isArray() && !root.isObject()) { + // Set error location to start of doc, ideally should be first token found + // in doc + token.type_ = tokenError; + token.start_ = beginDoc; + token.end_ = endDoc; + addError( + "A valid JSON document must be either an array or an object value.", + token); + return false; + } + } + return successful; +} + +bool OurReader::readValue() { + // To preserve the old behaviour we cast size_t to int. + if (nodes_.size() > features_.stackLimit_) + throwRuntimeError("Exceeded stackLimit in readValue()."); + Token token; + skipCommentTokens(token); + bool successful = true; + + if (collectComments_ && !commentsBefore_.empty()) { + currentValue().setComment(commentsBefore_, commentBefore); + commentsBefore_.clear(); + } + + switch (token.type_) { + case tokenObjectBegin: + successful = readObject(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenArrayBegin: + successful = readArray(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenNumber: + successful = decodeNumber(token); + break; + case tokenString: + successful = decodeString(token); + break; + case tokenTrue: { + Value v(true); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenFalse: { + Value v(false); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenNull: { + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenNaN: { + Value v(std::numeric_limits<double>::quiet_NaN()); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenPosInf: { + Value v(std::numeric_limits<double>::infinity()); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenNegInf: { + Value v(-std::numeric_limits<double>::infinity()); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenArraySeparator: + case tokenObjectEnd: + case tokenArrayEnd: + if (features_.allowDroppedNullPlaceholders_) { + // "Un-read" the current token and mark the current value as a null + // token. + current_--; + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(current_ - begin_ - 1); + currentValue().setOffsetLimit(current_ - begin_); + break; + } // else, fall through ... + default: + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return addError("Syntax error: value, object or array expected.", token); + } + + if (collectComments_) { + lastValueEnd_ = current_; + lastValueHasAComment_ = false; + lastValue_ = ¤tValue(); + } + + return successful; +} + +void OurReader::skipCommentTokens(Token& token) { + if (features_.allowComments_) { + do { + readToken(token); + } while (token.type_ == tokenComment); + } else { + readToken(token); + } +} + +bool OurReader::readToken(Token& token) { + skipSpaces(); + token.start_ = current_; + Char c = getNextChar(); + bool ok = true; + switch (c) { + case '{': + token.type_ = tokenObjectBegin; + break; + case '}': + token.type_ = tokenObjectEnd; + break; + case '[': + token.type_ = tokenArrayBegin; + break; + case ']': + token.type_ = tokenArrayEnd; + break; + case '"': + token.type_ = tokenString; + ok = readString(); + break; + case '\'': + if (features_.allowSingleQuotes_) { + token.type_ = tokenString; + ok = readStringSingleQuote(); + } else { + // If we don't allow single quotes, this is a failure case. + ok = false; + } + break; + case '/': + token.type_ = tokenComment; + ok = readComment(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + token.type_ = tokenNumber; + readNumber(false); + break; + case '-': + if (readNumber(true)) { + token.type_ = tokenNumber; + } else { + token.type_ = tokenNegInf; + ok = features_.allowSpecialFloats_ && match("nfinity", 7); + } + break; + case '+': + if (readNumber(true)) { + token.type_ = tokenNumber; + } else { + token.type_ = tokenPosInf; + ok = features_.allowSpecialFloats_ && match("nfinity", 7); + } + break; + case 't': + token.type_ = tokenTrue; + ok = match("rue", 3); + break; + case 'f': + token.type_ = tokenFalse; + ok = match("alse", 4); + break; + case 'n': + token.type_ = tokenNull; + ok = match("ull", 3); + break; + case 'N': + if (features_.allowSpecialFloats_) { + token.type_ = tokenNaN; + ok = match("aN", 2); + } else { + ok = false; + } + break; + case 'I': + if (features_.allowSpecialFloats_) { + token.type_ = tokenPosInf; + ok = match("nfinity", 7); + } else { + ok = false; + } + break; + case ',': + token.type_ = tokenArraySeparator; + break; + case ':': + token.type_ = tokenMemberSeparator; + break; + case 0: + token.type_ = tokenEndOfStream; + break; + default: + ok = false; + break; + } + if (!ok) + token.type_ = tokenError; + token.end_ = current_; + return ok; +} + +void OurReader::skipSpaces() { + while (current_ != end_) { + Char c = *current_; + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') + ++current_; + else + break; + } +} + +void OurReader::skipBom(bool skipBom) { + // The default behavior is to skip BOM. + if (skipBom) { + if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) { + begin_ += 3; + current_ = begin_; + } + } +} + +bool OurReader::match(const Char* pattern, int patternLength) { + if (end_ - current_ < patternLength) + return false; + int index = patternLength; + while (index--) + if (current_[index] != pattern[index]) + return false; + current_ += patternLength; + return true; +} + +bool OurReader::readComment() { + const Location commentBegin = current_ - 1; + const Char c = getNextChar(); + bool successful = false; + bool cStyleWithEmbeddedNewline = false; + + const bool isCStyleComment = (c == '*'); + const bool isCppStyleComment = (c == '/'); + if (isCStyleComment) { + successful = readCStyleComment(&cStyleWithEmbeddedNewline); + } else if (isCppStyleComment) { + successful = readCppStyleComment(); + } + + if (!successful) + return false; + + if (collectComments_) { + CommentPlacement placement = commentBefore; + + if (!lastValueHasAComment_) { + if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { + if (isCppStyleComment || !cStyleWithEmbeddedNewline) { + placement = commentAfterOnSameLine; + lastValueHasAComment_ = true; + } + } + } + + addComment(commentBegin, current_, placement); + } + return true; +} + +String OurReader::normalizeEOL(OurReader::Location begin, + OurReader::Location end) { + String normalized; + normalized.reserve(static_cast<size_t>(end - begin)); + OurReader::Location current = begin; + while (current != end) { + char c = *current++; + if (c == '\r') { + if (current != end && *current == '\n') + // convert dos EOL + ++current; + // convert Mac EOL + normalized += '\n'; + } else { + normalized += c; + } + } + return normalized; +} + +void OurReader::addComment(Location begin, Location end, + CommentPlacement placement) { + assert(collectComments_); + const String& normalized = normalizeEOL(begin, end); + if (placement == commentAfterOnSameLine) { + assert(lastValue_ != nullptr); + lastValue_->setComment(normalized, placement); + } else { + commentsBefore_ += normalized; + } +} + +bool OurReader::readCStyleComment(bool* containsNewLineResult) { + *containsNewLineResult = false; + + while ((current_ + 1) < end_) { + Char c = getNextChar(); + if (c == '*' && *current_ == '/') + break; + if (c == '\n') + *containsNewLineResult = true; + } + + return getNextChar() == '/'; +} + +bool OurReader::readCppStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '\n') + break; + if (c == '\r') { + // Consume DOS EOL. It will be normalized in addComment. + if (current_ != end_ && *current_ == '\n') + getNextChar(); + // Break on Moc OS 9 EOL. + break; + } + } + return true; +} - JSON_FAIL_MESSAGE("reader error"); +bool OurReader::readNumber(bool checkInf) { + Location p = current_; + if (checkInf && p != end_ && *p == 'I') { + current_ = ++p; + return false; + } + char c = '0'; // stopgap for already consumed character + // integral part + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + // fractional part + if (c == '.') { + c = (current_ = p) < end_ ? *p++ : '\0'; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + } + // exponential part + if (c == 'e' || c == 'E') { + c = (current_ = p) < end_ ? *p++ : '\0'; + if (c == '+' || c == '-') + c = (current_ = p) < end_ ? *p++ : '\0'; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + } + return true; +} +bool OurReader::readString() { + Char c = 0; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '"') + break; + } + return c == '"'; +} + +bool OurReader::readStringSingleQuote() { + Char c = 0; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '\'') + break; + } + return c == '\''; +} + +bool OurReader::readObject(Token& token) { + Token tokenName; + String name; + Value init(objectValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(token.start_ - begin_); + while (readToken(tokenName)) { + bool initialTokenOk = true; + while (tokenName.type_ == tokenComment && initialTokenOk) + initialTokenOk = readToken(tokenName); + if (!initialTokenOk) + break; + if (tokenName.type_ == tokenObjectEnd && + (name.empty() || + features_.allowTrailingCommas_)) // empty object or trailing comma + return true; + name.clear(); + if (tokenName.type_ == tokenString) { + if (!decodeString(tokenName, name)) + return recoverFromError(tokenObjectEnd); + } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { + Value numberName; + if (!decodeNumber(tokenName, numberName)) + return recoverFromError(tokenObjectEnd); + name = numberName.asString(); + } else { + break; + } + if (name.length() >= (1U << 30)) + throwRuntimeError("keylength >= 2^30"); + if (features_.rejectDupKeys_ && currentValue().isMember(name)) { + String msg = "Duplicate key: '" + name + "'"; + return addErrorAndRecover(msg, tokenName, tokenObjectEnd); + } + + Token colon; + if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { + return addErrorAndRecover("Missing ':' after object member name", colon, + tokenObjectEnd); + } + Value& value = currentValue()[name]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenObjectEnd); + + Token comma; + if (!readToken(comma) || + (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && + comma.type_ != tokenComment)) { + return addErrorAndRecover("Missing ',' or '}' in object declaration", + comma, tokenObjectEnd); + } + bool finalizeTokenOk = true; + while (comma.type_ == tokenComment && finalizeTokenOk) + finalizeTokenOk = readToken(comma); + if (comma.type_ == tokenObjectEnd) + return true; + } + return addErrorAndRecover("Missing '}' or object member name", tokenName, + tokenObjectEnd); +} + +bool OurReader::readArray(Token& token) { + Value init(arrayValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(token.start_ - begin_); + int index = 0; + for (;;) { + skipSpaces(); + if (current_ != end_ && *current_ == ']' && + (index == 0 || + (features_.allowTrailingCommas_ && + !features_.allowDroppedNullPlaceholders_))) // empty array or trailing + // comma + { + Token endArray; + readToken(endArray); + return true; + } + Value& value = currentValue()[index++]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenArrayEnd); + + Token currentToken; + // Accept Comment after last item in the array. + ok = readToken(currentToken); + while (currentToken.type_ == tokenComment && ok) { + ok = readToken(currentToken); + } + bool badTokenType = (currentToken.type_ != tokenArraySeparator && + currentToken.type_ != tokenArrayEnd); + if (!ok || badTokenType) { + return addErrorAndRecover("Missing ',' or ']' in array declaration", + currentToken, tokenArrayEnd); + } + if (currentToken.type_ == tokenArrayEnd) + break; + } + return true; +} + +bool OurReader::decodeNumber(Token& token) { + Value decoded; + if (!decodeNumber(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeNumber(Token& token, Value& decoded) { + // Attempts to parse the number as an integer. If the number is + // larger than the maximum supported value of an integer then + // we decode the number as a double. + Location current = token.start_; + const bool isNegative = *current == '-'; + if (isNegative) { + ++current; + } + + // We assume we can represent the largest and smallest integer types as + // unsigned integers with separate sign. This is only true if they can fit + // into an unsigned integer. + static_assert(Value::maxLargestInt <= Value::maxLargestUInt, + "Int must be smaller than UInt"); + + // We need to convert minLargestInt into a positive number. The easiest way + // to do this conversion is to assume our "threshold" value of minLargestInt + // divided by 10 can fit in maxLargestInt when absolute valued. This should + // be a safe assumption. + static_assert(Value::minLargestInt <= -Value::maxLargestInt, + "The absolute value of minLargestInt must be greater than or " + "equal to maxLargestInt"); + static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt, + "The absolute value of minLargestInt must be only 1 magnitude " + "larger than maxLargest Int"); + + static constexpr Value::LargestUInt positive_threshold = + Value::maxLargestUInt / 10; + static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10; + + // For the negative values, we have to be more careful. Since typically + // -Value::minLargestInt will cause an overflow, we first divide by 10 and + // then take the inverse. This assumes that minLargestInt is only a single + // power of 10 different in magnitude, which we check above. For the last + // digit, we take the modulus before negating for the same reason. + static constexpr auto negative_threshold = + Value::LargestUInt(-(Value::minLargestInt / 10)); + static constexpr auto negative_last_digit = + Value::UInt(-(Value::minLargestInt % 10)); + + const Value::LargestUInt threshold = + isNegative ? negative_threshold : positive_threshold; + const Value::UInt max_last_digit = + isNegative ? negative_last_digit : positive_last_digit; + + Value::LargestUInt value = 0; + while (current < token.end_) { + Char c = *current++; + if (c < '0' || c > '9') + return decodeDouble(token, decoded); + + const auto digit(static_cast<Value::UInt>(c - '0')); + if (value >= threshold) { + // We've hit or exceeded the max value divided by 10 (rounded down). If + // a) we've only just touched the limit, meaing value == threshold, + // b) this is the last digit, or + // c) it's small enough to fit in that rounding delta, we're okay. + // Otherwise treat this number as a double to avoid overflow. + if (value > threshold || current != token.end_ || + digit > max_last_digit) { + return decodeDouble(token, decoded); + } + } + value = value * 10 + digit; + } + + if (isNegative) { + // We use the same magnitude assumption here, just in case. + const auto last_digit = static_cast<Value::UInt>(value % 10); + decoded = -Value::LargestInt(value / 10) * 10 - last_digit; + } else if (value <= Value::LargestUInt(Value::maxLargestInt)) { + decoded = Value::LargestInt(value); + } else { + decoded = value; + } + + return true; +} + +bool OurReader::decodeDouble(Token& token) { + Value decoded; + if (!decodeDouble(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeDouble(Token& token, Value& decoded) { + double value = 0; + const String buffer(token.start_, token.end_); + IStringStream is(buffer); + if (!(is >> value)) { + return addError( + "'" + String(token.start_, token.end_) + "' is not a number.", token); + } + decoded = value; + return true; +} + +bool OurReader::decodeString(Token& token) { + String decoded_string; + if (!decodeString(token, decoded_string)) + return false; + Value decoded(decoded_string); + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeString(Token& token, String& decoded) { + decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2)); + Location current = token.start_ + 1; // skip '"' + Location end = token.end_ - 1; // do not include '"' + while (current != end) { + Char c = *current++; + if (c == '"') + break; + if (c == '\\') { + if (current == end) + return addError("Empty escape sequence in string", token, current); + Char escape = *current++; + switch (escape) { + case '"': + decoded += '"'; + break; + case '/': + decoded += '/'; + break; + case '\\': + decoded += '\\'; + break; + case 'b': + decoded += '\b'; + break; + case 'f': + decoded += '\f'; + break; + case 'n': + decoded += '\n'; + break; + case 'r': + decoded += '\r'; + break; + case 't': + decoded += '\t'; + break; + case 'u': { + unsigned int unicode; + if (!decodeUnicodeCodePoint(token, current, end, unicode)) + return false; + decoded += codePointToUTF8(unicode); + } break; + default: + return addError("Bad escape sequence in string", token, current); + } + } else { + decoded += c; + } + } + return true; +} + +bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current, + Location end, unsigned int& unicode) { + + if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) + return false; + if (unicode >= 0xD800 && unicode <= 0xDBFF) { + // surrogate pairs + if (end - current < 6) + return addError( + "additional six characters expected to parse unicode surrogate pair.", + token, current); + if (*(current++) == '\\' && *(current++) == 'u') { + unsigned int surrogatePair; + if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { + unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); + } else + return false; + } else + return addError("expecting another \\u token to begin the second half of " + "a unicode surrogate pair", + token, current); + } + return true; +} + +bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current, + Location end, + unsigned int& ret_unicode) { + if (end - current < 4) + return addError( + "Bad unicode escape sequence in string: four digits expected.", token, + current); + int unicode = 0; + for (int index = 0; index < 4; ++index) { + Char c = *current++; + unicode *= 16; + if (c >= '0' && c <= '9') + unicode += c - '0'; + else if (c >= 'a' && c <= 'f') + unicode += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + unicode += c - 'A' + 10; + else + return addError( + "Bad unicode escape sequence in string: hexadecimal digit expected.", + token, current); + } + ret_unicode = static_cast<unsigned int>(unicode); + return true; +} + +bool OurReader::addError(const String& message, Token& token, Location extra) { + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = extra; + errors_.push_back(info); + return false; +} + +bool OurReader::recoverFromError(TokenType skipUntilToken) { + size_t errorCount = errors_.size(); + Token skip; + for (;;) { + if (!readToken(skip)) + errors_.resize(errorCount); // discard errors caused by recovery + if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) + break; + } + errors_.resize(errorCount); + return false; +} + +bool OurReader::addErrorAndRecover(const String& message, Token& token, + TokenType skipUntilToken) { + addError(message, token); + return recoverFromError(skipUntilToken); +} + +Value& OurReader::currentValue() { return *(nodes_.top()); } + +OurReader::Char OurReader::getNextChar() { + if (current_ == end_) + return 0; + return *current_++; +} + +void OurReader::getLocationLineAndColumn(Location location, int& line, + int& column) const { + Location current = begin_; + Location lastLineStart = current; + line = 0; + while (current < location && current != end_) { + Char c = *current++; + if (c == '\r') { + if (*current == '\n') + ++current; + lastLineStart = current; + ++line; + } else if (c == '\n') { + lastLineStart = current; + ++line; + } + } + // column & line start at 1 + column = int(location - lastLineStart) + 1; + ++line; +} + +String OurReader::getLocationLineAndColumn(Location location) const { + int line, column; + getLocationLineAndColumn(location, line, column); + char buffer[18 + 16 + 16 + 1]; + jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); + return buffer; +} + +String OurReader::getFormattedErrorMessages() const { + String formattedMessage; + for (const auto& error : errors_) { + formattedMessage += + "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; + formattedMessage += " " + error.message_ + "\n"; + if (error.extra_) + formattedMessage += + "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; + } + return formattedMessage; +} + +std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const { + std::vector<OurReader::StructuredError> allErrors; + for (const auto& error : errors_) { + OurReader::StructuredError structured; + structured.offset_start = error.token_.start_ - begin_; + structured.offset_limit = error.token_.end_ - begin_; + structured.message = error.message_; + allErrors.push_back(structured); + } + return allErrors; +} + +class OurCharReader : public CharReader { + bool const collectComments_; + OurReader reader_; + +public: + OurCharReader(bool collectComments, OurFeatures const& features) + : collectComments_(collectComments), reader_(features) {} + bool parse(char const* beginDoc, char const* endDoc, Value* root, + String* errs) override { + bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_); + if (errs) { + *errs = reader_.getFormattedErrorMessages(); + } + return ok; + } +}; + +CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); } +CharReaderBuilder::~CharReaderBuilder() = default; +CharReader* CharReaderBuilder::newCharReader() const { + bool collectComments = settings_["collectComments"].asBool(); + OurFeatures features = OurFeatures::all(); + features.allowComments_ = settings_["allowComments"].asBool(); + features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool(); + features.strictRoot_ = settings_["strictRoot"].asBool(); + features.allowDroppedNullPlaceholders_ = + settings_["allowDroppedNullPlaceholders"].asBool(); + features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool(); + features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool(); + + // Stack limit is always a size_t, so we get this as an unsigned int + // regardless of it we have 64-bit integer support enabled. + features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt()); + features.failIfExtra_ = settings_["failIfExtra"].asBool(); + features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool(); + features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool(); + features.skipBom_ = settings_["skipBom"].asBool(); + return new OurCharReader(collectComments, features); +} + +bool CharReaderBuilder::validate(Json::Value* invalid) const { + static const auto& valid_keys = *new std::set<String>{ + "collectComments", + "allowComments", + "allowTrailingCommas", + "strictRoot", + "allowDroppedNullPlaceholders", + "allowNumericKeys", + "allowSingleQuotes", + "stackLimit", + "failIfExtra", + "rejectDupKeys", + "allowSpecialFloats", + "skipBom", + }; + for (auto si = settings_.begin(); si != settings_.end(); ++si) { + auto key = si.name(); + if (valid_keys.count(key)) + continue; + if (invalid) + (*invalid)[std::move(key)] = *si; + else + return false; + } + return invalid ? invalid->empty() : true; +} + +Value& CharReaderBuilder::operator[](const String& key) { + return settings_[key]; +} +// static +void CharReaderBuilder::strictMode(Json::Value* settings) { + //! [CharReaderBuilderStrictMode] + (*settings)["allowComments"] = false; + (*settings)["allowTrailingCommas"] = false; + (*settings)["strictRoot"] = true; + (*settings)["allowDroppedNullPlaceholders"] = false; + (*settings)["allowNumericKeys"] = false; + (*settings)["allowSingleQuotes"] = false; + (*settings)["stackLimit"] = 1000; + (*settings)["failIfExtra"] = true; + (*settings)["rejectDupKeys"] = true; + (*settings)["allowSpecialFloats"] = false; + (*settings)["skipBom"] = true; + //! [CharReaderBuilderStrictMode] +} +// static +void CharReaderBuilder::setDefaults(Json::Value* settings) { + //! [CharReaderBuilderDefaults] + (*settings)["collectComments"] = true; + (*settings)["allowComments"] = true; + (*settings)["allowTrailingCommas"] = true; + (*settings)["strictRoot"] = false; + (*settings)["allowDroppedNullPlaceholders"] = false; + (*settings)["allowNumericKeys"] = false; + (*settings)["allowSingleQuotes"] = false; + (*settings)["stackLimit"] = 1000; + (*settings)["failIfExtra"] = false; + (*settings)["rejectDupKeys"] = false; + (*settings)["allowSpecialFloats"] = false; + (*settings)["skipBom"] = true; + //! [CharReaderBuilderDefaults] +} + +////////////////////////////////// +// global functions + +bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root, + String* errs) { + OStringStream ssin; + ssin << sin.rdbuf(); + String doc = ssin.str(); + char const* begin = doc.data(); + char const* end = begin + doc.size(); + // Note that we do not actually need a null-terminator. + CharReaderPtr const reader(fact.newCharReader()); + return reader->parse(begin, end, root, errs); +} + +IStream& operator>>(IStream& sin, Value& root) { + CharReaderBuilder b; + String errs; + bool ok = parseFromStream(b, sin, &root, &errs); + if (!ok) { + throwRuntimeError(errs); } return sin; } |