aboutsummaryrefslogtreecommitdiff
path: root/absl/strings/charset.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings/charset.h')
-rw-r--r--absl/strings/charset.h164
1 files changed, 164 insertions, 0 deletions
diff --git a/absl/strings/charset.h b/absl/strings/charset.h
new file mode 100644
index 00000000..ff4e81a4
--- /dev/null
+++ b/absl/strings/charset.h
@@ -0,0 +1,164 @@
+// Copyright 2022 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: charset.h
+// -----------------------------------------------------------------------------
+//
+// This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
+// characters.
+//
+// Instances can be initialized as constexpr constants. For example:
+//
+// constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
+// constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
+// constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
+//
+// Multiple instances can be combined that still forms a constexpr expression.
+// For example:
+//
+// constexpr absl::CharSet kLettersAndNumbers =
+// absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
+//
+// Several pre-defined character classes are available that mirror the methods
+// from <cctype>. For example:
+//
+// constexpr absl::CharSet kLettersAndWhitespace =
+// absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
+//
+// To check membership, use the .contains method, e.g.
+//
+// absl::CharSet hex_letters("abcdef");
+// hex_letters.contains('a'); // true
+// hex_letters.contains('g'); // false
+
+#ifndef ABSL_STRINGS_CHARSET_H_
+#define ABSL_STRINGS_CHARSET_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+class CharSet {
+ public:
+ constexpr CharSet() : m_() {}
+
+ // Initializes with a given string_view.
+ constexpr explicit CharSet(absl::string_view str) : m_() {
+ for (char c : str) {
+ SetChar(static_cast<unsigned char>(c));
+ }
+ }
+
+ constexpr bool contains(char c) const {
+ return ((m_[static_cast<unsigned char>(c) / 64] >>
+ (static_cast<unsigned char>(c) % 64)) &
+ 0x1) == 0x1;
+ }
+
+ constexpr bool empty() const {
+ for (uint64_t c : m_) {
+ if (c != 0) return false;
+ }
+ return true;
+ }
+
+ // Containing only a single specified char.
+ static constexpr CharSet Char(char x) {
+ return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
+ CharMaskForWord(x, 2), CharMaskForWord(x, 3));
+ }
+
+ // Containing all the chars in the closed interval [lo,hi].
+ static constexpr CharSet Range(char lo, char hi) {
+ return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
+ RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
+ }
+
+ friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
+ return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
+ a.m_[3] & b.m_[3]);
+ }
+
+ friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
+ return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
+ a.m_[3] | b.m_[3]);
+ }
+
+ friend constexpr CharSet operator~(const CharSet& a) {
+ return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
+ }
+
+ // Mirrors the char-classifying predicates in <cctype>.
+ static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
+ static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
+ static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
+ static constexpr CharSet AsciiAlphabet() {
+ return AsciiLowercase() | AsciiUppercase();
+ }
+ static constexpr CharSet AsciiAlphanumerics() {
+ return AsciiDigits() | AsciiAlphabet();
+ }
+ static constexpr CharSet AsciiHexDigits() {
+ return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
+ }
+ static constexpr CharSet AsciiPrintable() {
+ return CharSet::Range(0x20, 0x7e);
+ }
+ static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
+ static constexpr CharSet AsciiPunctuation() {
+ return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
+ }
+
+ private:
+ constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
+ : m_{b0, b1, b2, b3} {}
+
+ static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
+ return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
+ ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
+ }
+
+ // All the chars in the specified word of the range [0, upper).
+ static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
+ uint64_t word) {
+ return (upper <= 64 * word) ? 0
+ : (upper >= 64 * (word + 1))
+ ? ~static_cast<uint64_t>(0)
+ : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
+ }
+
+ static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
+ return (static_cast<unsigned char>(x) / 64 == word)
+ ? (static_cast<uint64_t>(1)
+ << (static_cast<unsigned char>(x) % 64))
+ : 0;
+ }
+
+ constexpr void SetChar(unsigned char c) {
+ m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
+ }
+
+ uint64_t m_[4];
+};
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_CHARSET_H_