diff options
Diffstat (limited to 'third_party/abseil-cpp/absl/strings/internal/cord_internal.h')
-rw-r--r-- | third_party/abseil-cpp/absl/strings/internal/cord_internal.h | 314 |
1 files changed, 189 insertions, 125 deletions
diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_internal.h b/third_party/abseil-cpp/absl/strings/internal/cord_internal.h index bfe5564e46..fcca3a28cd 100644 --- a/third_party/abseil-cpp/absl/strings/internal/cord_internal.h +++ b/third_party/abseil-cpp/absl/strings/internal/cord_internal.h @@ -21,6 +21,7 @@ #include <cstdint> #include <type_traits> +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/internal/invoke.h" @@ -33,16 +34,27 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. +struct CordRep; +struct CordRepConcat; +struct CordRepExternal; +struct CordRepFlat; +struct CordRepSubstring; +struct CordRepCrc; +class CordRepRing; +class CordRepBtree; + class CordzInfo; // Default feature enable states for cord ring buffers enum CordFeatureDefaults { - kCordEnableBtreeDefault = true, kCordEnableRingBufferDefault = false, kCordShallowSubcordsDefault = false }; -extern std::atomic<bool> cord_btree_enabled; extern std::atomic<bool> cord_ring_buffer_enabled; extern std::atomic<bool> shallow_subcords_enabled; @@ -52,10 +64,6 @@ extern std::atomic<bool> shallow_subcords_enabled; // O(n^2) complexity as recursive / full tree validation is O(n). extern std::atomic<bool> cord_btree_exhaustive_validation; -inline void enable_cord_btree(bool enable) { - cord_btree_enabled.store(enable, std::memory_order_relaxed); -} - inline void enable_cord_ring_buffer(bool enable) { cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); } @@ -80,6 +88,9 @@ enum Constants { kMaxBytesToCopy = 511 }; +// Emits a fatal error "Unexpected node type: xyz" and aborts the program. +ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep); + // Compact class for tracking the reference count and state flags for CordRep // instances. Data is stored in an atomic int32_t for compactness and speed. class RefcountAndFlags { @@ -87,9 +98,6 @@ class RefcountAndFlags { constexpr RefcountAndFlags() : count_{kRefIncrement} {} struct Immortal {}; explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} - struct WithCrc {}; - explicit constexpr RefcountAndFlags(WithCrc) - : count_(kCrcFlag | kRefIncrement) {} // Increments the reference count. Imposes no memory ordering. inline void Increment() { @@ -121,36 +129,19 @@ class RefcountAndFlags { } // Returns the current reference count using acquire semantics. - inline int32_t Get() const { - return count_.load(std::memory_order_acquire) >> kNumFlags; - } - - // Returns true if the referenced object carries a CRC value. - bool HasCrc() const { - return (count_.load(std::memory_order_relaxed) & kCrcFlag) != 0; + inline size_t Get() const { + return static_cast<size_t>(count_.load(std::memory_order_acquire) >> + kNumFlags); } - // Returns true iff the atomic integer is 1 and this node does not store - // a CRC. When both these conditions are met, the current thread owns - // the reference and no other thread shares it, so its contents may be - // safely mutated. - // - // If the referenced item is shared, carries a CRC, or is immortal, - // it should not be modified in-place, and this function returns false. - // - // This call performs the memory barrier needed for the owning thread - // to act on the object, so that if it returns true, it may safely - // assume exclusive access to the object. - inline bool IsMutable() { - return (count_.load(std::memory_order_acquire)) == kRefIncrement; - } - - // Returns whether the atomic integer is 1. Similar to IsMutable(), - // but does not check for a stored CRC. (An unshared node with a CRC is not - // mutable, because changing its data would invalidate the CRC.) - // - // When this returns true, there are no other references, and data sinks - // may safely adopt the children of the CordRep. + // Returns whether the atomic integer is 1. + // If the reference count is used in the conventional way, a + // reference count of 1 implies that the current thread owns the + // reference and no other thread shares it. + // This call performs the test for a reference count of one, and + // performs the memory barrier needed for the owning thread + // to act on the object, knowing that it has exclusive access to the + // object. Always returns false when the immortal bit is set. inline bool IsOne() { return (count_.load(std::memory_order_acquire) & kRefcountMask) == kRefIncrement; @@ -166,51 +157,43 @@ class RefcountAndFlags { // used for the StringConstant constructor to avoid collecting immutable // constant cords. // kReservedFlag is reserved for future use. - enum { + enum Flags { kNumFlags = 2, kImmortalFlag = 0x1, - kCrcFlag = 0x2, + kReservedFlag = 0x2, kRefIncrement = (1 << kNumFlags), // Bitmask to use when checking refcount by equality. This masks out // all flags except kImmortalFlag, which is part of the refcount for // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 // if the immortal bit is set.) - kRefcountMask = ~kCrcFlag, + kRefcountMask = ~kReservedFlag, }; std::atomic<int32_t> count_; }; -// The overhead of a vtable is too much for Cord, so we roll our own subclasses -// using only a single byte to differentiate classes from each other - the "tag" -// byte. Define the subclasses first so we can provide downcasting helper -// functions in the base class. - -struct CordRepConcat; -struct CordRepExternal; -struct CordRepFlat; -struct CordRepSubstring; -class CordRepRing; -class CordRepBtree; - // Various representations that we allow enum CordRepKind { - CONCAT = 0, + UNUSED_0 = 0, SUBSTRING = 1, - BTREE = 2, - RING = 3, - EXTERNAL = 4, + CRC = 2, + BTREE = 3, + RING = 4, + EXTERNAL = 5, // We have different tags for different sized flat arrays, - // starting with FLAT, and limited to MAX_FLAT_TAG. The 225 value is based on - // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed - // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well - // as the Tag <---> Size logic so that FLAT stil represents the minimum flat - // allocation size. (32 bytes as of now). - FLAT = 5, - MAX_FLAT_TAG = 225 + // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an + // allocated range of 32 bytes to 256 KB. The current granularity is: + // - 8 byte granularity for flat sizes in [32 - 512] + // - 64 byte granularity for flat sizes in (512 - 8KiB] + // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB] + // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should + // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still + // represents the minimum flat allocation size. (32 bytes as of now). + FLAT = 6, + MAX_FLAT_TAG = 248 }; // There are various locations where we want to check if some rep is a 'plain' @@ -225,6 +208,18 @@ static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive"); static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); struct CordRep { + // Result from an `extract edge` operation. Contains the (possibly changed) + // tree node as well as the extracted edge, or {tree, nullptr} if no edge + // could be extracted. + // On success, the returned `tree` value is null if `extracted` was the only + // data edge inside the tree, a data edge if there were only two data edges in + // the tree, or the (possibly new / smaller) remaining tree with the extracted + // data edge removed. + struct ExtractResult { + CordRep* tree; + CordRep* extracted; + }; + CordRep() = default; constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l) : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} @@ -249,18 +244,18 @@ struct CordRep { // Returns true if this instance's tag matches the requested type. constexpr bool IsRing() const { return tag == RING; } - constexpr bool IsConcat() const { return tag == CONCAT; } constexpr bool IsSubstring() const { return tag == SUBSTRING; } + constexpr bool IsCrc() const { return tag == CRC; } constexpr bool IsExternal() const { return tag == EXTERNAL; } constexpr bool IsFlat() const { return tag >= FLAT; } constexpr bool IsBtree() const { return tag == BTREE; } inline CordRepRing* ring(); inline const CordRepRing* ring() const; - inline CordRepConcat* concat(); - inline const CordRepConcat* concat() const; inline CordRepSubstring* substring(); inline const CordRepSubstring* substring() const; + inline CordRepCrc* crc(); + inline const CordRepCrc* crc() const; inline CordRepExternal* external(); inline const CordRepExternal* external() const; inline CordRepFlat* flat(); @@ -283,17 +278,23 @@ struct CordRep { static inline void Unref(CordRep* rep); }; -struct CordRepConcat : public CordRep { - CordRep* left; - CordRep* right; - - uint8_t depth() const { return storage[0]; } - void set_depth(uint8_t depth) { storage[0] = depth; } -}; - struct CordRepSubstring : public CordRep { size_t start; // Starting offset of substring in child CordRep* child; + + // Creates a substring on `child`, adopting a reference on `child`. + // Requires `child` to be either a flat or external node, and `pos` and `n` to + // form a non-empty partial sub range of `'child`, i.e.: + // `n > 0 && n < length && n + pos <= length` + static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n); + + // Creates a substring of `rep`. Does not adopt a reference on `rep`. + // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`. + // If `n == rep->length` then this method returns `CordRep::Ref(rep)` + // If `rep` is a substring of a flat or external node, then this method will + // return a new substring of that flat or external node with `pos` adjusted + // with the original `start` position. + static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n); }; // Type for function pointer that will invoke the releaser function and also @@ -357,6 +358,47 @@ struct CordRepExternalImpl } }; +inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos, + size_t n) { + assert(child != nullptr); + assert(n > 0); + assert(n < child->length); + assert(pos < child->length); + assert(n <= child->length - pos); + + // TODO(b/217376272): Harden internal logic. + // Move to strategical places inside the Cord logic and make this an assert. + if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) { + LogFatalNodeType(child); + } + + CordRepSubstring* rep = new CordRepSubstring(); + rep->length = n; + rep->tag = SUBSTRING; + rep->start = pos; + rep->child = child; + return rep; +} + +inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos, + size_t n) { + assert(rep != nullptr); + assert(n != 0); + assert(pos < rep->length); + assert(n <= rep->length - pos); + if (n == rep->length) return CordRep::Ref(rep); + if (rep->IsSubstring()) { + pos += rep->substring()->start; + rep = rep->substring()->child; + } + CordRepSubstring* substr = new CordRepSubstring(); + substr->length = n; + substr->tag = SUBSTRING; + substr->start = pos; + substr->child = CordRep::Ref(rep); + return substr; +} + inline void CordRepExternal::Delete(CordRep* rep) { assert(rep != nullptr && rep->IsExternal()); auto* rep_external = static_cast<CordRepExternal*>(rep); @@ -370,7 +412,8 @@ struct ConstInitExternalStorage { }; template <typename Str> -CordRepExternal ConstInitExternalStorage<Str>::value(Str::value); +ABSL_CONST_INIT CordRepExternal + ConstInitExternalStorage<Str>::value(Str::value); enum { kMaxInline = 15, @@ -380,8 +423,8 @@ constexpr char GetOrNull(absl::string_view data, size_t pos) { return pos < data.size() ? data[pos] : '\0'; } -// We store cordz_info as 64 bit pointer value in big endian format. This -// guarantees that the least significant byte of cordz_info matches the last +// We store cordz_info as 64 bit pointer value in little endian format. This +// guarantees that the least significant byte of cordz_info matches the first // byte of the inline data representation in as_chars_, which holds the inlined // size or the 'is_tree' bit. using cordz_info_t = int64_t; @@ -391,14 +434,14 @@ using cordz_info_t = int64_t; static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); -// BigEndianByte() creates a big endian representation of 'value', i.e.: a big -// endian value where the last byte in the host's representation holds 'value`, -// with all other bytes being 0. -static constexpr cordz_info_t BigEndianByte(unsigned char value) { +// LittleEndianByte() creates a little endian representation of 'value', i.e.: +// a little endian value where the first byte in the host's representation +// holds 'value`, with all other bytes being 0. +static constexpr cordz_info_t LittleEndianByte(unsigned char value) { #if defined(ABSL_IS_BIG_ENDIAN) - return value; -#else return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#else + return value; #endif } @@ -407,25 +450,37 @@ class InlineData { // DefaultInitType forces the use of the default initialization constructor. enum DefaultInitType { kDefaultInit }; - // kNullCordzInfo holds the big endian representation of intptr_t(1) + // kNullCordzInfo holds the little endian representation of intptr_t(1) // This is the 'null' / initial value of 'cordz_info'. The null value // is specifically big endian 1 as with 64-bit pointers, the last // byte of cordz_info overlaps with the last byte holding the tag. - static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); + static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1); + + // kTagOffset contains the offset of the control byte / tag. This constant is + // intended mostly for debugging purposes: do not remove this constant as it + // is actively inspected and used by gdb pretty printing code. + static constexpr size_t kTagOffset = 0; constexpr InlineData() : as_chars_{0} {} explicit InlineData(DefaultInitType) {} explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} explicit constexpr InlineData(absl::string_view chars) - : as_chars_{ - GetOrNull(chars, 0), GetOrNull(chars, 1), - GetOrNull(chars, 2), GetOrNull(chars, 3), - GetOrNull(chars, 4), GetOrNull(chars, 5), - GetOrNull(chars, 6), GetOrNull(chars, 7), - GetOrNull(chars, 8), GetOrNull(chars, 9), - GetOrNull(chars, 10), GetOrNull(chars, 11), - GetOrNull(chars, 12), GetOrNull(chars, 13), - GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {} + : as_chars_{static_cast<char>((chars.size() << 1)), + GetOrNull(chars, 0), + GetOrNull(chars, 1), + GetOrNull(chars, 2), + GetOrNull(chars, 3), + GetOrNull(chars, 4), + GetOrNull(chars, 5), + GetOrNull(chars, 6), + GetOrNull(chars, 7), + GetOrNull(chars, 8), + GetOrNull(chars, 9), + GetOrNull(chars, 10), + GetOrNull(chars, 11), + GetOrNull(chars, 12), + GetOrNull(chars, 13), + GetOrNull(chars, 14)} {} // Returns true if the current instance is empty. // The 'empty value' is an inlined data value of zero length. @@ -456,8 +511,8 @@ class InlineData { // Requires the current instance to hold a tree value. CordzInfo* cordz_info() const { assert(is_tree()); - intptr_t info = - static_cast<intptr_t>(absl::big_endian::ToHost64(as_tree_.cordz_info)); + intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64( + static_cast<uint64_t>(as_tree_.cordz_info))); assert(info & 1); return reinterpret_cast<CordzInfo*>(info - 1); } @@ -467,8 +522,9 @@ class InlineData { // Requires the current instance to hold a tree value. void set_cordz_info(CordzInfo* cordz_info) { assert(is_tree()); - intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1; - as_tree_.cordz_info = absl::big_endian::FromHost64(info); + uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1; + as_tree_.cordz_info = + static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)); } // Resets the current cordz_info to null / empty. @@ -481,7 +537,7 @@ class InlineData { // Requires the current instance to hold inline data. const char* as_chars() const { assert(!is_tree()); - return as_chars_; + return &as_chars_[1]; } // Returns a mutable pointer to the character data inside this instance. @@ -499,7 +555,7 @@ class InlineData { // // It's an error to read from the returned pointer without a preceding write // if the current instance does not hold inline data, i.e.: is_tree() == true. - char* as_chars() { return as_chars_; } + char* as_chars() { return &as_chars_[1]; } // Returns the tree value of this value. // Requires the current instance to hold a tree value. @@ -527,7 +583,7 @@ class InlineData { // Requires the current instance to hold inline data. size_t inline_size() const { assert(!is_tree()); - return tag() >> 1; + return static_cast<size_t>(tag()) >> 1; } // Sets the size of the inlined character data inside this instance. @@ -535,26 +591,42 @@ class InlineData { // See the documentation on 'as_chars()' for more information and examples. void set_inline_size(size_t size) { ABSL_ASSERT(size <= kMaxInline); - tag() = static_cast<char>(size << 1); + tag() = static_cast<int8_t>(size << 1); + } + + // Compares 'this' inlined data with rhs. The comparison is a straightforward + // lexicographic comparison. `Compare()` returns values as follows: + // + // -1 'this' InlineData instance is smaller + // 0 the InlineData instances are equal + // 1 'this' InlineData instance larger + int Compare(const InlineData& rhs) const { + uint64_t x, y; + memcpy(&x, as_chars(), sizeof(x)); + memcpy(&y, rhs.as_chars(), sizeof(y)); + if (x == y) { + memcpy(&x, as_chars() + 7, sizeof(x)); + memcpy(&y, rhs.as_chars() + 7, sizeof(y)); + if (x == y) { + if (inline_size() == rhs.inline_size()) return 0; + return inline_size() < rhs.inline_size() ? -1 : 1; + } + } + x = absl::big_endian::FromHost64(x); + y = absl::big_endian::FromHost64(y); + return x < y ? -1 : 1; } private: // See cordz_info_t for forced alignment and size of `cordz_info` details. struct AsTree { - explicit constexpr AsTree(absl::cord_internal::CordRep* tree) - : rep(tree), cordz_info(kNullCordzInfo) {} - // This union uses up extra space so that whether rep is 32 or 64 bits, - // cordz_info will still start at the eighth byte, and the last - // byte of cordz_info will still be the last byte of InlineData. - union { - absl::cord_internal::CordRep* rep; - cordz_info_t unused_aligner; - }; - cordz_info_t cordz_info; + explicit constexpr AsTree(absl::cord_internal::CordRep* tree) : rep(tree) {} + cordz_info_t cordz_info = kNullCordzInfo; + absl::cord_internal::CordRep* rep; }; - char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; } - char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; } + int8_t& tag() { return reinterpret_cast<int8_t*>(this)[0]; } + int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; } // If the data has length <= kMaxInline, we store it in `as_chars_`, and // store the size in the last char of `as_chars_` shifted left + 1. @@ -568,16 +640,6 @@ class InlineData { static_assert(sizeof(InlineData) == kMaxInline + 1, ""); -inline CordRepConcat* CordRep::concat() { - assert(IsConcat()); - return static_cast<CordRepConcat*>(this); -} - -inline const CordRepConcat* CordRep::concat() const { - assert(IsConcat()); - return static_cast<const CordRepConcat*>(this); -} - inline CordRepSubstring* CordRep::substring() { assert(IsSubstring()); return static_cast<CordRepSubstring*>(this); @@ -599,7 +661,9 @@ inline const CordRepExternal* CordRep::external() const { } inline CordRep* CordRep::Ref(CordRep* rep) { - assert(rep != nullptr); + // ABSL_ASSUME is a workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585 + ABSL_ASSUME(rep != nullptr); rep->refcount.Increment(); return rep; } |