diff options
Diffstat (limited to 'tools/bookmaker/textParser.h')
-rw-r--r-- | tools/bookmaker/textParser.h | 733 |
1 files changed, 733 insertions, 0 deletions
diff --git a/tools/bookmaker/textParser.h b/tools/bookmaker/textParser.h new file mode 100644 index 0000000000..eee1f7cfad --- /dev/null +++ b/tools/bookmaker/textParser.h @@ -0,0 +1,733 @@ +/* + * Copyright 2018 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef textParser_DEFINED +#define textParser_DEFINED + +#include <functional> + +#include "bookmaker.h" + +class BmhParser; +class Definition; + +class TextParser : public NonAssignable { + TextParser() {} // only for ParserCommon, TextParserSave + friend class ParserCommon; + friend class TextParserSave; +public: + virtual ~TextParser() {} + + TextParser(string fileName, const char* start, const char* end, int lineCount) + : fFileName(fileName) + , fStart(start) + , fLine(start) + , fChar(start) + , fEnd(end) + , fLineCount(lineCount) + { + } + + TextParser(const Definition* ); + + const char* anyOf(const char* str) const { + const char* ptr = fChar; + while (ptr < fEnd) { + if (strchr(str, ptr[0])) { + return ptr; + } + ++ptr; + } + return nullptr; + } + + const char* anyOf(const char* wordStart, const char* wordList[], size_t wordListCount) const { + const char** wordPtr = wordList; + const char** wordEnd = wordPtr + wordListCount; + const size_t matchLen = fChar - wordStart; + while (wordPtr < wordEnd) { + const char* word = *wordPtr++; + if (strlen(word) == matchLen && !strncmp(wordStart, word, matchLen)) { + return word; + } + } + return nullptr; + } + + bool back(const char* pattern) { + size_t len = strlen(pattern); + const char* start = fChar - len; + if (start <= fStart) { + return false; + } + if (strncmp(start, pattern, len)) { + return false; + } + fChar = start; + return true; + } + + char backup(const char* pattern) const { + size_t len = strlen(pattern); + const char* start = fChar - len; + if (start <= fStart) { + return '\0'; + } + if (strncmp(start, pattern, len)) { + return '\0'; + } + return start[-1]; + } + + void backupWord() { + while (fChar > fStart && isalpha(fChar[-1])) { + --fChar; + } + } + + bool contains(const char* match, const char* lineEnd, const char** loc) const { + const char* result = this->strnstr(match, lineEnd); + if (loc) { + *loc = result; + } + return result; + } + + bool containsWord(const char* match, const char* lineEnd, const char** loc) { + size_t len = strlen(match); + do { + const char* result = this->strnstr(match, lineEnd); + if (!result) { + return false; + } + if ((result > fStart && isalnum(result[-1])) || (result + len < fEnd + && isalnum(result[len]))) { + fChar = result + len; + continue; + } + if (loc) { + *loc = result; + } + return true; + } while (true); + } + + // either /n/n or /n# will stop parsing a typedef + const char* doubleLF() const { + const char* ptr = fChar - 1; + const char* doubleStart = nullptr; + while (++ptr < fEnd) { + if (!doubleStart) { + if ('\n' == ptr[0]) { + doubleStart = ptr; + } + continue; + } + if ('\n' == ptr[0] || '#' == ptr[0]) { + return doubleStart; + } + if (' ' < ptr[0]) { + doubleStart = nullptr; + } + } + return nullptr; + } + + bool endsWith(const char* match) { + int matchLen = strlen(match); + if (matchLen > fChar - fLine) { + return false; + } + return !strncmp(fChar - matchLen, match, matchLen); + } + + bool eof() const { return fChar >= fEnd; } + + const char* lineEnd() const { + const char* ptr = fChar; + do { + if (ptr >= fEnd) { + return ptr; + } + char test = *ptr++; + if (test == '\n' || test == '\0') { + break; + } + } while (true); + return ptr; + } + + ptrdiff_t lineLength() const { + return this->lineEnd() - fLine; + } + + bool match(TextParser* ); + + char next() { + SkASSERT(fChar < fEnd); + char result = *fChar++; + if ('\n' == result) { + ++fLineCount; + fLine = fChar; + } + return result; + } + + char peek() const { SkASSERT(fChar < fEnd); return *fChar; } + + void restorePlace(const TextParser& save) { + fChar = save.fChar; + fLine = save.fLine; + fLineCount = save.fLineCount; + } + + void savePlace(TextParser* save) { + save->fChar = fChar; + save->fLine = fLine; + save->fLineCount = fLineCount; + } + + void reportError(const char* errorStr) const; + static string ReportFilename(string file); + void reportWarning(const char* errorStr) const; + + template <typename T> T reportError(const char* errorStr) const { + this->reportError(errorStr); + return T(); + } + + bool sentenceEnd(const char* check) const { + while (check > fStart) { + --check; + if (' ' < check[0] && '.' != check[0]) { + return false; + } + if ('.' == check[0]) { + return ' ' >= check[1]; + } + if ('\n' == check[0] && '\n' == check[1]) { + return true; + } + } + return true; + } + + void setForErrorReporting(const Definition* , const char* ); + + bool skipToBalancedEndBracket(char startB, char endB) { + SkASSERT(fChar < fEnd); + SkASSERT(startB == fChar[0]); + int startCount = 0; + do { + char test = this->next(); + startCount += startB == test; + startCount -= endB == test; + } while (startCount && fChar < fEnd); + return !startCount; + } + + bool skipToEndBracket(char endBracket, const char* end = nullptr) { + if (nullptr == end) { + end = fEnd; + } + while (fChar[0] != endBracket) { + if (fChar >= end) { + return false; + } + (void) this->next(); + } + return true; + } + + bool skipToEndBracket(const char* endBracket) { + size_t len = strlen(endBracket); + while (strncmp(fChar, endBracket, len)) { + if (fChar >= fEnd) { + return false; + } + (void) this->next(); + } + return true; + } + + bool skipLine() { + return skipToEndBracket('\n'); + } + + void skipTo(const char* skip) { + while (fChar < skip) { + this->next(); + } + } + + void skipToAlpha() { + while (fChar < fEnd && !isalpha(fChar[0])) { + fChar++; + } + } + + // returns true if saw close brace + bool skipToAlphaNum() { + bool sawCloseBrace = false; + while (fChar < fEnd && !isalnum(fChar[0])) { + sawCloseBrace |= '}' == *fChar++; + } + return sawCloseBrace; + } + + bool skipExact(const char* pattern) { + if (!this->startsWith(pattern)) { + return false; + } + this->skipName(pattern); + return true; + } + + // differs from skipToNonAlphaNum in that a.b isn't considered a full name, + // since a.b can't be found as a named definition + void skipFullName() { + do { + char last = '\0'; + while (fChar < fEnd && (isalnum(fChar[0]) + || '_' == fChar[0] /* || '-' == fChar[0] */ + || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]))) { + if (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) { + fChar++; + } + last = fChar[0]; + fChar++; + } + if (fChar + 1 >= fEnd || '/' != fChar[0] || !isalpha(last) || !isalpha(fChar[1])) { + break; // stop unless pattern is xxx/xxx as in I/O + } + fChar++; // skip slash + } while (true); + } + + int skipToLineBalance(char open, char close) { + int match = 0; + while (!this->eof() && '\n' != fChar[0]) { + match += open == this->peek(); + match -= close == this->next(); + } + return match; + } + + bool skipToLineStart() { + if (!this->skipLine()) { + return false; + } + if (!this->eof()) { + return this->skipWhiteSpace(); + } + return true; + } + + void skipToLineStart(int* indent, bool* sawReturn) { + SkAssertResult(this->skipLine()); + this->skipWhiteSpace(indent, sawReturn); + } + + void skipLower() { + while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) { + fChar++; + } + } + + void skipToNonAlphaNum() { + while (fChar < fEnd && (isalnum(fChar[0]) || '_' == fChar[0])) { + fChar++; + } + } + + void skipToNonName() { + while (fChar < fEnd && (isalnum(fChar[0]) + || '_' == fChar[0] || '-' == fChar[0] + || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) + || ('.' == fChar[0] && fChar + 1 < fEnd && isalpha(fChar[1])))) { + if (':' == fChar[0] && fChar +1 < fEnd && ':' == fChar[1]) { + fChar++; + } + fChar++; + } + } + + void skipPhraseName() { + while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) { + fChar++; + } + } + + void skipToSpace() { + while (fChar < fEnd && ' ' != fChar[0]) { + fChar++; + } + } + + void skipToWhiteSpace() { + while (fChar < fEnd && ' ' < fChar[0]) { + fChar++; + } + } + + bool skipName(const char* word) { + size_t len = strlen(word); + if (len <= (size_t) (fEnd - fChar) && !strncmp(word, fChar, len)) { + for (size_t i = 0; i < len; ++i) { + this->next(); + } + } + return this->eof() || ' ' >= fChar[0]; + } + + bool skipSpace() { + while (' ' == this->peek()) { + (void) this->next(); + if (fChar >= fEnd) { + return false; + } + } + return true; + } + + bool skipWord(const char* word) { + if (!this->skipWhiteSpace()) { + return false; + } + const char* save = fChar; + if (!this->skipName(word)) { + fChar = save; + return false; + } + if (!this->skipWhiteSpace()) { + return false; + } + return true; + } + + bool skipWhiteSpace() { + while (' ' >= this->peek()) { + (void) this->next(); + if (fChar >= fEnd) { + return false; + } + } + return true; + } + + void skipWhiteSpace(int* indent, bool* skippedReturn) { + while (' ' >= this->peek()) { + *indent = *skippedReturn ? *indent + 1 : 1; + if ('\n' == this->peek()) { + *skippedReturn |= true; + *indent = 0; + } + (void) this->next(); + SkASSERT(fChar < fEnd); + } + } + + bool startsWith(const char* str) const { + size_t len = strlen(str); + ptrdiff_t lineLen = fEnd - fChar; + return len <= (size_t) lineLen && 0 == strncmp(str, fChar, len); + } + + // ignores minor white space differences + bool startsWith(const char* str, size_t oLen) const { + size_t tIndex = 0; + size_t tLen = fEnd - fChar; + size_t oIndex = 0; + while (oIndex < oLen && tIndex < tLen) { + bool tSpace = ' ' >= fChar[tIndex]; + bool oSpace = ' ' >= str[oIndex]; + if (tSpace != oSpace) { + break; + } + if (tSpace) { + do { + ++tIndex; + } while (tIndex < tLen && ' ' >= fChar[tIndex]); + do { + ++oIndex; + } while (oIndex < oLen && ' ' >= str[oIndex]); + continue; + } + if (fChar[tIndex] != str[oIndex]) { + break; + } + ++tIndex; + ++oIndex; + } + return oIndex >= oLen; + } + + const char* strnchr(char ch, const char* end) const { + const char* ptr = fChar; + while (ptr < end) { + if (ptr[0] == ch) { + return ptr; + } + ++ptr; + } + return nullptr; + } + + const char* strnstr(const char *match, const char* end) const { + size_t matchLen = strlen(match); + SkASSERT(matchLen > 0); + ptrdiff_t len = end - fChar; + SkASSERT(len >= 0); + if ((size_t) len < matchLen ) { + return nullptr; + } + size_t count = len - matchLen; + for (size_t index = 0; index <= count; index++) { + if (0 == strncmp(&fChar[index], match, matchLen)) { + return &fChar[index]; + } + } + return nullptr; + } + + const char* trimmedBracketEnd(const char bracket) const { + int max = (int) (this->lineLength()); + int index = 0; + while (index < max && bracket != fChar[index]) { + ++index; + } + SkASSERT(index < max); + while (index > 0 && ' ' >= fChar[index - 1]) { + --index; + } + return fChar + index; + } + + const char* trimmedBracketEnd(string bracket) const { + size_t max = (size_t) (this->lineLength()); + string line(fChar, max); + size_t index = line.find(bracket); + SkASSERT(index < max); + while (index > 0 && ' ' >= fChar[index - 1]) { + --index; + } + return fChar + index; + } + + const char* trimmedBracketNoEnd(const char bracket) const { + int max = (int) (fEnd - fChar); + int index = 0; + while (index < max && bracket != fChar[index]) { + ++index; + } + SkASSERT(index < max); + while (index > 0 && ' ' >= fChar[index - 1]) { + --index; + } + return fChar + index; + } + + const char* trimmedLineEnd() const { + const char* result = this->lineEnd(); + while (result > fChar && ' ' >= result[-1]) { + --result; + } + return result; + } + + void trimEnd() { + while (fEnd > fStart && ' ' >= fEnd[-1]) { + --fEnd; + } + } + + // FIXME: nothing else in TextParser knows from C++ -- + // there could be a class between TextParser and ParserCommon + virtual string typedefName(); + + const char* wordEnd() const { + const char* end = fChar; + while (isalnum(end[0]) || '_' == end[0] || '-' == end[0]) { + ++end; + } + return end; + } + + string fFileName; + const char* fStart; + const char* fLine; + const char* fChar; + const char* fEnd; + size_t fLineCount; +}; + +class TextParserSave { +public: + TextParserSave(TextParser* parser) { + fParser = parser; + fSave.fFileName = parser->fFileName; + fSave.fStart = parser->fStart; + fSave.fLine = parser->fLine; + fSave.fChar = parser->fChar; + fSave.fEnd = parser->fEnd; + fSave.fLineCount = parser->fLineCount; + } + + void restore() const { + fParser->fFileName = fSave.fFileName; + fParser->fStart = fSave.fStart; + fParser->fLine = fSave.fLine; + fParser->fChar = fSave.fChar; + fParser->fEnd = fSave.fEnd; + fParser->fLineCount = fSave.fLineCount; + } + +private: + TextParser* fParser; + TextParser fSave; +}; + +static inline bool has_nonwhitespace(string s) { + bool nonwhite = false; + for (const char& c : s) { + if (' ' < c) { + nonwhite = true; + break; + } + } + return nonwhite; +} + +static inline void trim_end(string &s) { + s.erase(std::find_if(s.rbegin(), s.rend(), + std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end()); +} + +static inline void trim_end_spaces(string &s) { + while (s.length() > 0 && ' ' == s.back()) { + s.pop_back(); + } +} + +static inline void trim_start(string &s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), + std::not1(std::ptr_fun<int, int>(std::isspace)))); +} + +static inline void trim_start_end(string& s) { + trim_start(s); + trim_end(s); +} + +class EscapeParser : public TextParser { +public: + EscapeParser(const char* start, const char* end) : + TextParser("", start, end, 0) { + const char* reader = fStart; + fStorage = new char[end - start]; + char* writer = fStorage; + while (reader < fEnd) { + char ch = *reader++; + if (ch != '\\') { + *writer++ = ch; + } else { + char ctrl = *reader++; + if (ctrl == 'u') { + unsigned unicode = 0; + for (int i = 0; i < 4; ++i) { + unicode <<= 4; + SkASSERT((reader[0] >= '0' && reader[0] <= '9') || + (reader[0] >= 'A' && reader[0] <= 'F') || + (reader[0] >= 'a' && reader[0] <= 'f')); + int nibble = *reader++ - '0'; + if (nibble > 9) { + nibble = (nibble & ~('a' - 'A')) - 'A' + '9' + 1; + } + unicode |= nibble; + } + SkASSERT(unicode < 256); + *writer++ = (unsigned char) unicode; + } else { + SkASSERT(ctrl == 'n'); + *writer++ = '\n'; + } + } + } + fStart = fLine = fChar = fStorage; + fEnd = writer; + } + + ~EscapeParser() override { + delete fStorage; + } +private: + char* fStorage; +}; + +// some methods cannot be trivially parsed; look for class-name / ~ / operator +class MethodParser : public TextParser { +public: + MethodParser(string className, string fileName, + const char* start, const char* end, int lineCount) + : TextParser(fileName, start, end, lineCount) + , fClassName(className) { + size_t doubleColons = className.find_last_of("::"); + if (string::npos != doubleColons) { + fLocalName = className.substr(doubleColons + 1); + SkASSERT(fLocalName.length() > 0); + } + } + + ~MethodParser() override {} + + string localName() const { + return fLocalName; + } + + void setLocalName(string name) { + if (name == fClassName) { + fLocalName = ""; + } else { + fLocalName = name; + } + } + + // returns true if close brace was skipped + int skipToMethodStart() { + if (!fClassName.length()) { + return this->skipToAlphaNum(); + } + int braceCount = 0; + while (!this->eof() && !isalnum(this->peek()) && '~' != this->peek()) { + braceCount += '{' == this->peek(); + braceCount -= '}' == this->peek(); + this->next(); + } + return braceCount; + } + + void skipToMethodEnd(Resolvable resolvable); + + bool wordEndsWith(const char* str) const { + const char* space = this->strnchr(' ', fEnd); + if (!space) { + return false; + } + size_t len = strlen(str); + if (space < fChar + len) { + return false; + } + return !strncmp(str, space - len, len); + } + +private: + string fClassName; + string fLocalName; + typedef TextParser INHERITED; +}; + +#endif |