// Copyright 2018 The Amber Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "src/tokenizer.h" #include #include #include #include #include "src/make_unique.h" namespace amber { Token::Token(TokenType type) : type_(type) {} Token::~Token() = default; Result Token::ConvertToDouble() { if (IsDouble()) return {}; if (IsString() || IsEOL() || IsEOS()) return Result("Invalid conversion to double"); if (IsInteger()) { if (is_negative_ || uint_value_ <= static_cast(std::numeric_limits::max())) { double_value_ = static_cast(AsInt64()); } else { return Result("uint64_t value too big to fit in double"); } uint_value_ = 0; } else if (IsHex()) { double_value_ = static_cast(AsHex()); string_value_ = ""; } type_ = TokenType::kDouble; return {}; } Tokenizer::Tokenizer(const std::string& data) : data_(data) {} Tokenizer::~Tokenizer() = default; std::unique_ptr Tokenizer::NextToken() { SkipWhitespace(); if (current_position_ >= data_.length()) return MakeUnique(TokenType::kEOS); if (data_[current_position_] == '#') { SkipComment(); SkipWhitespace(); } if (current_position_ >= data_.length()) return MakeUnique(TokenType::kEOS); if (data_[current_position_] == '\n') { ++current_line_; ++current_position_; return MakeUnique(TokenType::kEOL); } // If the current position is a , ( or ) then handle it specially as we don't // want to consume any other characters. if (data_[current_position_] == ',' || data_[current_position_] == '(' || data_[current_position_] == ')') { auto tok = MakeUnique(TokenType::kString); std::string str(1, data_[current_position_]); tok->SetStringValue(str); ++current_position_; return tok; } size_t end_pos = current_position_; while (end_pos < data_.length()) { if (data_[end_pos] == ' ' || data_[end_pos] == '\r' || data_[end_pos] == '\n' || data_[end_pos] == ')' || data_[end_pos] == ',' || data_[end_pos] == '(') { break; } ++end_pos; } std::string tok_str = data_.substr(current_position_, end_pos - current_position_); current_position_ = end_pos; // Starts with an alpha is a string. if (!std::isdigit(tok_str[0]) && !(tok_str[0] == '-' && std::isdigit(tok_str[1])) && !(tok_str[0] == '.' && std::isdigit(tok_str[1]))) { // If we've got a continuation, skip over the end of line and get the next // token. if (tok_str == "\\") { if ((current_position_ < data_.length() && data_[current_position_] == '\n')) { ++current_line_; ++current_position_; return NextToken(); } else if (current_position_ + 1 < data_.length() && data_[current_position_] == '\r' && data_[current_position_ + 1] == '\n') { ++current_line_; current_position_ += 2; return NextToken(); } } auto tok = MakeUnique(TokenType::kString); tok->SetStringValue(tok_str); return tok; } // Handle hex strings if (tok_str.size() > 2 && tok_str[0] == '0' && tok_str[1] == 'x') { auto tok = MakeUnique(TokenType::kHex); tok->SetStringValue(tok_str); return tok; } bool is_double = false; for (const char ch : tok_str) { if (ch == '.') { is_double = true; break; } } std::unique_ptr tok; char* final_pos = nullptr; if (is_double) { tok = MakeUnique(TokenType::kDouble); double val = strtod(tok_str.c_str(), &final_pos); tok->SetDoubleValue(val); } else { tok = MakeUnique(TokenType::kInteger); uint64_t val = uint64_t(std::strtoull(tok_str.c_str(), &final_pos, 10)); tok->SetUint64Value(static_cast(val)); } if (tok_str.size() > 1 && tok_str[0] == '-') tok->SetNegative(); tok->SetOriginalString( tok_str.substr(0, static_cast(final_pos - tok_str.c_str()))); // If the number isn't the whole token then move back so we can then parse // the string portion. auto diff = size_t(final_pos - tok_str.c_str()); if (diff > 0) current_position_ -= tok_str.length() - diff; return tok; } std::string Tokenizer::ExtractToNext(const std::string& str) { size_t pos = data_.find(str, current_position_); std::string ret; if (pos == std::string::npos) { ret = data_.substr(current_position_); current_position_ = data_.length(); } else { ret = data_.substr(current_position_, pos - current_position_); current_position_ = pos; } // Account for any new lines in the extracted text so our current line // number stays correct. for (const char c : ret) { if (c == '\n') ++current_line_; } return ret; } bool Tokenizer::IsWhitespace(char ch) { return ch == '\0' || ch == '\t' || ch == '\r' || ch == 0x0c /* ff */ || ch == ' '; } void Tokenizer::SkipWhitespace() { while (current_position_ < data_.size() && IsWhitespace(data_[current_position_])) { ++current_position_; } } void Tokenizer::SkipComment() { while (current_position_ < data_.length() && data_[current_position_] != '\n') { ++current_position_; } } } // namespace amber