summaryrefslogtreecommitdiff
path: root/includes/image_io/base/data_scanner.h
blob: 0d1af2642247171f1bc62e7d667d7043f54734ac (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#ifndef IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT
#define IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT

#include <string>

#include "image_io/base/data_context.h"
#include "image_io/base/data_match_result.h"
#include "image_io/base/data_range.h"
#include "image_io/base/data_segment.h"

namespace photos_editing_formats {
namespace image_io {

/// Provides a means to scan a textual portion of a data segment for a sequence
/// of characters and return the data associated with the resulting match. The
/// scanners also maintain state information for repeated calling in case the
/// text data is split over multipe data segments. The scanners also maintain
/// a data range where the result of the scanner's match can be found. These
/// scanners are written to allow copy semantics to make memory management
/// easier. Several types of scanners are provided.
class DataScanner {
 public:
  /// The type of scanner.
  enum Type {
    /// A scanner to look for text that matches exactly one or more characters.
    /// The text to look for is given to the CreateLiteralScanner() function.
    kLiteral,

    /// A scanner to look for text that matches a name. A name must begin with
    /// one of the characters in "[A-Z][a-z]:_". Subsequent characters can
    /// include "[0-9]-.".
    kName,

    /// A scanner to look for a quoted string. A quoted string is delimited by
    /// a single (') or double (") quote, and include any character except the
    /// quote mark.
    kQuotedString,

    /// A scanner to look for one character from a set of characters. The set of
    /// characters are given to the CreateSentinelScanner() function.
    kSentinel,

    /// A scanner to accept all text up to and including a literal text value.
    /// The text to look for is given to the CreateThroughLiteralScanner()
    /// function.
    kThroughLiteral,

    /// A scanner to skip white space characters. At least one whitespace
    /// character must be scanned. The set of white space characters is given
    /// by the GetWhitespaceChars() function.
    kWhitespace,

    /// A scanner to skip white space characters, but unlike the kWhitespace
    /// scanner, this scanner will not return an error result if there are no
    /// whitespace characters scanned.
    kOptionalWhitespace,
  };

  /// @return The set of whitespace characters: " \t\n\r".
  static std::string GetWhitespaceChars();

  /// @param literal The literal to use for the scanner.
  /// @return A kLiteral type scanner.
  static DataScanner CreateLiteralScanner(const std::string& literal);

  /// @return A kName type scanner.
  static DataScanner CreateNameScanner();

  /// @return A kQuoteString type scanner.
  static DataScanner CreateQuotedStringScanner();

  /// @param sentinels The set of sentinels to scan for. The "~" character is
  /// used as an "abbreviation" for any of the characters that can make up the
  /// first character of a kName type sentinel.
  /// @return a kSentinel type scanner.
  static DataScanner CreateSentinelScanner(const std::string& sentinels);

  /// @param literal The literal to use for the scanner.
  /// @return A kThroughLiteral type scanner.
  static DataScanner CreateThroughLiteralScanner(const std::string& literal);

  /// @return A kWhitespace type scanner;
  static DataScanner CreateWhitespaceScanner();

  /// @return A kOptionalWhitespace type scanner;
  static DataScanner CreateOptionalWhitespaceScanner();

  /// @return The type of the scanner.
  Type GetType() const { return type_; }

  /// @return A description of the scanner, based on the type.
  std::string GetDescription() const;

  /// @return The literal value of a kLiteral or kThroughLiteral type scanner,
  /// or an empty string otherwise.
  std::string GetLiteral() const;

  /// @return The set of sentinels for a kSentinal type scanner, or an empty
  /// string otherwise.
  std::string GetSentenels() const;

  /// @return The sentinel character from the set of characters passed to the
  /// CreateSentinelScanner() function that was matched by a successful scan
  /// operation, or 0 otherwise.
  char GetSentinel() const;

  /// @return The range of characters that the scanner found during one or more
  /// successful Scan() function operations.
  const DataRange& GetTokenRange() const { return token_range_; }

  /// @return The number of tiomes the Scan() function has been called.
  size_t GetScanCallCount() const { return scan_call_count_; }

  /// @param context The data context to use for the scan operation.
  /// @return The match result of the scan operation.
  DataMatchResult Scan(const DataContext& context);

  /// Reset the scanner's token range to an invalid value.
  void ResetTokenRange();

  /// Reset the scanner state to the value it had when it was first constructed.
  void Reset();

 private:
  explicit DataScanner(Type type) : DataScanner(type, "") {}
  DataScanner(Type type, const std::string& literal_or_sentinels)
      : literal_or_sentinels_(literal_or_sentinels),
        data_(0),
        scan_call_count_(0),
        type_(type) {}

  /// @param delta_length The byte count to use to extend the token range end.
  /// @return The new length of the token range.
  size_t ExtendTokenLength(size_t delta_length);

  /// The worker functions for scanning each type of literal.
  /// @param cbytes The pointer value to the buffer at the context's location.
  /// @param bytes_available The number of bytes available for the scan.
  /// @param context The data context for message generation purposes.
  DataMatchResult ScanLiteral(const char* cbytes, size_t bytes_available,
                              const DataContext& context);
  DataMatchResult ScanName(const char* cbytes, size_t bytes_available,
                           const DataContext& context);
  DataMatchResult ScanQuotedString(const char* cbytes, size_t bytes_available,
                                   const DataContext& context);
  DataMatchResult ScanSentinel(const char* cbytes, size_t bytes_available,
                               const DataContext& context);
  DataMatchResult ScanThroughLiteral(const char* cbytes, size_t bytes_available,
                                     const DataContext& context);
  DataMatchResult ScanWhitespace(const char* cbytes, size_t bytes_available,
                                 const DataContext& context);

  /// Sets the match result to kError and generates an internal error message.
  /// @param context The data context for message generation purposes.
  /// @param error_description A description of the type of internal error.
  /// @param result The result to receive the kError type and message.
  void SetInternalError(const DataContext& context,
                        const std::string& error_description,
                        DataMatchResult* result);

  /// Sets the match result to kError and generates an syntax error message.
  /// @param context The data context for message generation purposes.
  /// @param error_description A description of the type of syntax error.
  /// @param result The result to receive the kError type and message.
  void SetSyntaxError(const DataContext& context,
                      const std::string& error_description,
                      DataMatchResult* result);

  /// The string used for kLiteral, kThroughLiteral and kSentinel type scanners.
  std::string literal_or_sentinels_;

  /// The token range built by one or calls to the Scan() function.
  DataRange token_range_;

  /// State data used in different ways by different scanner types.
  size_t data_;

  /// The number of times the scanner's Scan function has been called.
  size_t scan_call_count_;

  /// The type of scanner.
  Type type_;
};

}  // namespace image_io
}  // namespace photos_editing_formats

#endif // IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT