summaryrefslogtreecommitdiff
path: root/base/i18n/streaming_utf8_validator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'base/i18n/streaming_utf8_validator.cc')
-rw-r--r--base/i18n/streaming_utf8_validator.cc59
1 files changed, 59 insertions, 0 deletions
diff --git a/base/i18n/streaming_utf8_validator.cc b/base/i18n/streaming_utf8_validator.cc
new file mode 100644
index 0000000000..19c86a37a4
--- /dev/null
+++ b/base/i18n/streaming_utf8_validator.cc
@@ -0,0 +1,59 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This implementation doesn't use ICU. The ICU macros are oriented towards
+// character-at-a-time processing, whereas byte-at-a-time processing is easier
+// with streaming input.
+
+#include "base/i18n/streaming_utf8_validator.h"
+
+#include "base/i18n/utf8_validator_tables.h"
+#include "base/logging.h"
+
+namespace base {
+namespace {
+
+uint8_t StateTableLookup(uint8_t offset) {
+ DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize);
+ return internal::kUtf8ValidatorTables[offset];
+}
+
+} // namespace
+
+StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data,
+ size_t size) {
+ // Copy |state_| into a local variable so that the compiler doesn't have to be
+ // careful of aliasing.
+ uint8_t state = state_;
+ for (const char* p = data; p != data + size; ++p) {
+ if ((*p & 0x80) == 0) {
+ if (state == 0)
+ continue;
+ state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX;
+ break;
+ }
+ const uint8_t shift_amount = StateTableLookup(state);
+ const uint8_t shifted_char = (*p & 0x7F) >> shift_amount;
+ state = StateTableLookup(state + shifted_char + 1);
+ // State may be INVALID here, but this code is optimised for the case of
+ // valid UTF-8 and it is more efficient (by about 2%) to not attempt an
+ // early loop exit unless we hit an ASCII character.
+ }
+ state_ = state;
+ return state == 0 ? VALID_ENDPOINT
+ : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
+ ? INVALID
+ : VALID_MIDPOINT;
+}
+
+void StreamingUtf8Validator::Reset() {
+ state_ = 0u;
+}
+
+bool StreamingUtf8Validator::Validate(const std::string& string) {
+ return StreamingUtf8Validator().AddBytes(string.data(), string.size()) ==
+ VALID_ENDPOINT;
+}
+
+} // namespace base