aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.h
blob: 7349c68eb9d31ec7384d962174d096ecbd24fe70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// Copyright 2018 The Amber Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef SRC_TOKENIZER_H_
#define SRC_TOKENIZER_H_

#include <cstdlib>
#include <memory>
#include <string>

#include "amber/result.h"

namespace amber {

enum class TokenType : uint8_t {
  kEOS = 0,
  kEOL,
  kString,
  kInteger,
  kDouble,
  kHex,
};

/// A token read from the input source.
class Token {
 public:
  explicit Token(TokenType type);
  ~Token();

  bool IsHex() const { return type_ == TokenType::kHex; }
  bool IsInteger() const { return type_ == TokenType::kInteger; }
  bool IsDouble() const { return type_ == TokenType::kDouble; }
  bool IsString() const { return type_ == TokenType::kString; }
  bool IsEOS() const { return type_ == TokenType::kEOS; }
  bool IsEOL() const { return type_ == TokenType::kEOL; }

  bool IsComma() const {
    return type_ == TokenType::kString && string_value_ == ",";
  }
  bool IsOpenBracket() const {
    return type_ == TokenType::kString && string_value_ == "(";
  }
  bool IsCloseBracket() const {
    return type_ == TokenType::kString && string_value_ == ")";
  }

  void SetNegative() { is_negative_ = true; }
  void SetStringValue(const std::string& val) { string_value_ = val; }
  void SetUint64Value(uint64_t val) { uint_value_ = val; }
  void SetDoubleValue(double val) { double_value_ = val; }

  const std::string& AsString() const { return string_value_; }

  uint8_t AsUint8() const { return static_cast<uint8_t>(uint_value_); }
  uint16_t AsUint16() const { return static_cast<uint16_t>(uint_value_); }
  uint32_t AsUint32() const { return static_cast<uint32_t>(uint_value_); }
  uint64_t AsUint64() const { return static_cast<uint64_t>(uint_value_); }

  int8_t AsInt8() const { return static_cast<int8_t>(uint_value_); }
  int16_t AsInt16() const { return static_cast<int16_t>(uint_value_); }
  int32_t AsInt32() const { return static_cast<int32_t>(uint_value_); }
  int64_t AsInt64() const { return static_cast<int64_t>(uint_value_); }

  Result ConvertToDouble();

  float AsFloat() const { return static_cast<float>(double_value_); }
  double AsDouble() const { return double_value_; }

  uint64_t AsHex() const {
    return uint64_t(std::strtoull(string_value_.c_str(), nullptr, 16));
  }

  /// The OriginalString is set for integer and double values to store the
  /// unparsed number which we can return in error messages.
  void SetOriginalString(const std::string& orig_string) {
    string_value_ = orig_string;
  }
  std::string ToOriginalString() const { return string_value_; }

 private:
  TokenType type_;
  std::string string_value_;
  uint64_t uint_value_ = 0;
  double double_value_ = 0.0;
  bool is_negative_ = false;
};

/// Splits the provided input into a stream of tokens.
class Tokenizer {
 public:
  explicit Tokenizer(const std::string& data);
  ~Tokenizer();

  std::unique_ptr<Token> NextToken();
  std::string ExtractToNext(const std::string& str);

  void SetCurrentLine(size_t line) { current_line_ = line; }
  size_t GetCurrentLine() const { return current_line_; }

 private:
  bool IsWhitespace(char ch);
  void SkipWhitespace();
  void SkipComment();

  std::string data_;
  size_t current_position_ = 0;
  size_t current_line_ = 1;
};

}  // namespace amber

#endif  // SRC_TOKENIZER_H_