diff options
Diffstat (limited to 'third_party/abseil-cpp/absl/strings/internal')
69 files changed, 14251 insertions, 851 deletions
diff --git a/third_party/abseil-cpp/absl/strings/internal/char_map.h b/third_party/abseil-cpp/absl/strings/internal/char_map.h index a76e60362b..61484de0b7 100644 --- a/third_party/abseil-cpp/absl/strings/internal/char_map.h +++ b/third_party/abseil-cpp/absl/strings/internal/char_map.h @@ -72,7 +72,7 @@ class Charmap { CharMaskForWord(x, 2), CharMaskForWord(x, 3)); } - // Containing all the chars in the C-std::string 's'. + // Containing all the chars in the C-string 's'. // Note that this is expensively recursive because of the C++11 constexpr // formulation. Use only in constexpr initializers. static constexpr Charmap FromString(const char* s) { diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc index 66f33e7207..ebf8c0791a 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc @@ -208,7 +208,7 @@ int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end, ++dropped_digits; } if (begin < end && *std::prev(end) == '.') { - // If the std::string ends in '.', either before or after dropping zeroes, then + // If the string ends in '.', either before or after dropping zeroes, then // drop the decimal point and look for more digits to drop. dropped_digits = 0; --end; diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h index 999e9ae3a2..8f702976a8 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h @@ -66,7 +66,7 @@ class BigUnsigned { static_cast<uint32_t>(v >> 32)} {} // Constructs a BigUnsigned from the given string_view containing a decimal - // value. If the input std::string is not a decimal integer, constructs a 0 + // value. If the input string is not a decimal integer, constructs a 0 // instead. explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { // Check for valid input, returning a 0 otherwise. This is reasonable @@ -210,7 +210,7 @@ class BigUnsigned { return words_[index]; } - // Returns this integer as a decimal std::string. This is not used in the decimal- + // Returns this integer as a decimal string. This is not used in the decimal- // to-binary conversion; it is intended to aid in testing. std::string ToString() const; diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint_test.cc b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint_test.cc index 363bcb03d9..a8b9945829 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint_test.cc @@ -69,6 +69,61 @@ TEST(BigUnsigned, ShiftLeft) { // And we should have fully rotated all bits off by now: EXPECT_EQ(a, BigUnsigned<84>(0u)); } + { + // Bit shifting large and small numbers by large and small offsets. + // Intended to exercise bounds-checking corner on ShiftLeft() (directly + // and under asan). + + // 2**(32*84)-1 + const BigUnsigned<84> all_bits_one( + "1474444211396924248063325089479706787923460402125687709454567433186613" + "6228083464060749874845919674257665016359189106695900028098437021384227" + "3285029708032466536084583113729486015826557532750465299832071590813090" + "2011853039837649252477307070509704043541368002938784757296893793903797" + "8180292336310543540677175225040919704702800559606097685920595947397024" + "8303316808753252115729411497720357971050627997031988036134171378490368" + "6008000778741115399296162550786288457245180872759047016734959330367829" + "5235612397427686310674725251378116268607113017720538636924549612987647" + "5767411074510311386444547332882472126067840027882117834454260409440463" + "9345147252664893456053258463203120637089916304618696601333953616715125" + "2115882482473279040772264257431663818610405673876655957323083702713344" + "4201105427930770976052393421467136557055"); + const BigUnsigned<84> zero(0u); + const BigUnsigned<84> one(1u); + // in bounds shifts + for (int i = 1; i < 84*32; ++i) { + // shifting all_bits_one to the left should result in a smaller number, + // since the high bits rotate off and the low bits are replaced with + // zeroes. + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(i); + EXPECT_GT(all_bits_one, big_shifted); + // Shifting 1 to the left should instead result in a larger number. + BigUnsigned<84> small_shifted = one; + small_shifted.ShiftLeft(i); + EXPECT_LT(one, small_shifted); + } + // Shifting by zero or a negative number has no effect + for (int no_op_shift : {0, -1, -84 * 32, std::numeric_limits<int>::min()}) { + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(no_op_shift); + EXPECT_EQ(all_bits_one, big_shifted); + BigUnsigned<84> small_shifted = one; + big_shifted.ShiftLeft(no_op_shift); + EXPECT_EQ(one, small_shifted); + } + // Shifting by an amount greater than the number of bits should result in + // zero. + for (int out_of_bounds_shift : + {84 * 32, 84 * 32 + 1, std::numeric_limits<int>::max()}) { + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(out_of_bounds_shift); + EXPECT_EQ(zero, big_shifted); + BigUnsigned<84> small_shifted = one; + small_shifted.ShiftLeft(out_of_bounds_shift); + EXPECT_EQ(zero, small_shifted); + } + } } TEST(BigUnsigned, MultiplyByUint32) { diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc b/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc index d9a57a7822..d29acaf462 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc @@ -52,7 +52,7 @@ static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact"); // The lowest valued 19-digit decimal mantissa we can read still contains // sufficient information to reconstruct a binary mantissa. -static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above"); +static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above"); // ParseFloat<16> will read the first 15 significant digits of the mantissa. // @@ -246,8 +246,8 @@ constexpr int DigitMagnitude<16>() { // ConsumeDigits does not protect against overflow on *out; max_digits must // be chosen with respect to type T to avoid the possibility of overflow. template <int base, typename T> -std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, - T* out, bool* dropped_nonzero_digit) { +int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out, + bool* dropped_nonzero_digit) { if (base == 10) { assert(max_digits <= std::numeric_limits<T>::digits10); } else if (base == 16) { @@ -282,7 +282,7 @@ std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, *dropped_nonzero_digit = true; } *out = accumulator; - return begin - original_begin; + return static_cast<int>(begin - original_begin); } // Returns true if `v` is one of the chars allowed inside parentheses following @@ -302,7 +302,7 @@ bool ParseInfinityOrNan(const char* begin, const char* end, switch (*begin) { case 'i': case 'I': { - // An infinity std::string consists of the characters "inf" or "infinity", + // An infinity string consists of the characters "inf" or "infinity", // case insensitive. if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { return false; @@ -326,7 +326,7 @@ bool ParseInfinityOrNan(const char* begin, const char* end, } out->type = strings_internal::FloatType::kNan; out->end = begin + 3; - // NaN is allowed to be followed by a parenthesized std::string, consisting of + // NaN is allowed to be followed by a parenthesized string, consisting of // only the characters [a-zA-Z0-9_]. Match that if it's present. begin += 3; if (begin < end && *begin == '(') { @@ -372,7 +372,7 @@ strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, int exponent_adjustment = 0; bool mantissa_is_inexact = false; - std::size_t pre_decimal_digits = ConsumeDigits<base>( + int pre_decimal_digits = ConsumeDigits<base>( begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); begin += pre_decimal_digits; int digits_left; @@ -398,14 +398,14 @@ strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, while (begin < end && *begin == '0') { ++begin; } - std::size_t zeros_skipped = begin - begin_zeros; + int zeros_skipped = static_cast<int>(begin - begin_zeros); if (zeros_skipped >= DigitLimit<base>()) { // refuse to parse pathological inputs return result; } exponent_adjustment -= static_cast<int>(zeros_skipped); } - std::size_t post_decimal_digits = ConsumeDigits<base>( + int post_decimal_digits = ConsumeDigits<base>( begin, end, digits_left, &mantissa, &mantissa_is_inexact); begin += post_decimal_digits; diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_parse_test.cc b/third_party/abseil-cpp/absl/strings/internal/charconv_parse_test.cc index 9511c98745..bc2d111876 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_parse_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_parse_test.cc @@ -63,7 +63,7 @@ void ExpectParsedFloat(std::string s, absl::chars_format format_flags, } const std::string::size_type expected_characters_matched = s.find('$'); ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, - "Input std::string must contain $"); + "Input string must contain $"); s.replace(expected_characters_matched, 1, ""); ParsedFloat parsed = diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_internal.cc b/third_party/abseil-cpp/absl/strings/internal/cord_internal.cc new file mode 100644 index 0000000000..1767e6fcc5 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_internal.cc @@ -0,0 +1,89 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cord_internal.h" + +#include <atomic> +#include <cassert> +#include <memory> + +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +ABSL_CONST_INIT std::atomic<bool> cord_btree_enabled(kCordEnableBtreeDefault); +ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled( + kCordEnableRingBufferDefault); +ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled( + kCordShallowSubcordsDefault); +ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false); + +void CordRep::Destroy(CordRep* rep) { + assert(rep != nullptr); + + absl::InlinedVector<CordRep*, Constants::kInlinedVectorSize> pending; + while (true) { + assert(!rep->refcount.IsImmortal()); + if (rep->tag == CONCAT) { + CordRepConcat* rep_concat = rep->concat(); + CordRep* right = rep_concat->right; + if (!right->refcount.Decrement()) { + pending.push_back(right); + } + CordRep* left = rep_concat->left; + delete rep_concat; + rep = nullptr; + if (!left->refcount.Decrement()) { + rep = left; + continue; + } + } else if (rep->tag == BTREE) { + CordRepBtree::Destroy(rep->btree()); + rep = nullptr; + } else if (rep->tag == RING) { + CordRepRing::Destroy(rep->ring()); + rep = nullptr; + } else if (rep->tag == EXTERNAL) { + CordRepExternal::Delete(rep); + rep = nullptr; + } else if (rep->tag == SUBSTRING) { + CordRepSubstring* rep_substring = rep->substring(); + CordRep* child = rep_substring->child; + delete rep_substring; + rep = nullptr; + if (!child->refcount.Decrement()) { + rep = child; + continue; + } + } else { + CordRepFlat::Delete(rep); + rep = nullptr; + } + + if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } else { + break; + } + } +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_internal.h b/third_party/abseil-cpp/absl/strings/internal/cord_internal.h index 5b5d108308..bfe5564e46 100644 --- a/third_party/abseil-cpp/absl/strings/internal/cord_internal.h +++ b/third_party/abseil-cpp/absl/strings/internal/cord_internal.h @@ -1,4 +1,4 @@ -// Copyright 2020 The Abseil Authors. +// Copyright 2021 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,11 @@ #include <cstdint> #include <type_traits> +#include "absl/base/config.h" +#include "absl/base/internal/endian.h" +#include "absl/base/internal/invoke.h" +#include "absl/base/optimization.h" +#include "absl/container/internal/compressed_tuple.h" #include "absl/meta/type_traits.h" #include "absl/strings/string_view.h" @@ -28,49 +33,153 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -// Wraps std::atomic for reference counting. -class Refcount { +class CordzInfo; + +// Default feature enable states for cord ring buffers +enum CordFeatureDefaults { + kCordEnableBtreeDefault = true, + kCordEnableRingBufferDefault = false, + kCordShallowSubcordsDefault = false +}; + +extern std::atomic<bool> cord_btree_enabled; +extern std::atomic<bool> cord_ring_buffer_enabled; +extern std::atomic<bool> shallow_subcords_enabled; + +// `cord_btree_exhaustive_validation` can be set to force exhaustive validation +// in debug assertions, and code that calls `IsValid()` explicitly. By default, +// assertions should be relatively cheap and AssertValid() can easily lead to +// O(n^2) complexity as recursive / full tree validation is O(n). +extern std::atomic<bool> cord_btree_exhaustive_validation; + +inline void enable_cord_btree(bool enable) { + cord_btree_enabled.store(enable, std::memory_order_relaxed); +} + +inline void enable_cord_ring_buffer(bool enable) { + cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); +} + +inline void enable_shallow_subcords(bool enable) { + shallow_subcords_enabled.store(enable, std::memory_order_relaxed); +} + +enum Constants { + // The inlined size to use with absl::InlinedVector. + // + // Note: The InlinedVectors in this file (and in cord.h) do not need to use + // the same value for their inlined size. The fact that they do is historical. + // It may be desirable for each to use a different inlined size optimized for + // that InlinedVector's usage. + // + // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for + // the inlined vector size (47 exists for backward compatibility). + kInlinedVectorSize = 47, + + // Prefer copying blocks of at most this size, otherwise reference count. + kMaxBytesToCopy = 511 +}; + +// Compact class for tracking the reference count and state flags for CordRep +// instances. Data is stored in an atomic int32_t for compactness and speed. +class RefcountAndFlags { public: - Refcount() : count_{1} {} - ~Refcount() {} + constexpr RefcountAndFlags() : count_{kRefIncrement} {} + struct Immortal {}; + explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} + struct WithCrc {}; + explicit constexpr RefcountAndFlags(WithCrc) + : count_(kCrcFlag | kRefIncrement) {} - // Increments the reference count by 1. Imposes no memory ordering. - inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); } + // Increments the reference count. Imposes no memory ordering. + inline void Increment() { + count_.fetch_add(kRefIncrement, std::memory_order_relaxed); + } // Asserts that the current refcount is greater than 0. If the refcount is - // greater than 1, decrements the reference count by 1. + // greater than 1, decrements the reference count. // // Returns false if there are no references outstanding; true otherwise. // Inserts barriers to ensure that state written before this method returns // false will be visible to a thread that just observed this method returning - // false. + // false. Always returns false when the immortal bit is set. inline bool Decrement() { - int32_t refcount = count_.load(std::memory_order_acquire); - assert(refcount > 0); - return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1; + int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask; + assert(refcount > 0 || refcount & kImmortalFlag); + return refcount != kRefIncrement && + (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kRefcountMask) != kRefIncrement; } // Same as Decrement but expect that refcount is greater than 1. inline bool DecrementExpectHighRefcount() { - int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel); - assert(refcount > 0); - return refcount != 1; + int32_t refcount = + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kRefcountMask; + assert(refcount > 0 || refcount & kImmortalFlag); + return refcount != kRefIncrement; } // Returns the current reference count using acquire semantics. - inline int32_t Get() const { return count_.load(std::memory_order_acquire); } - - // Returns whether the atomic integer is 1. - // If the reference count is used in the conventional way, a - // reference count of 1 implies that the current thread owns the - // reference and no other thread shares it. - // This call performs the test for a reference count of one, and - // performs the memory barrier needed for the owning thread - // to act on the object, knowing that it has exclusive access to the - // object. - inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; } + inline int32_t Get() const { + return count_.load(std::memory_order_acquire) >> kNumFlags; + } + + // Returns true if the referenced object carries a CRC value. + bool HasCrc() const { + return (count_.load(std::memory_order_relaxed) & kCrcFlag) != 0; + } + + // Returns true iff the atomic integer is 1 and this node does not store + // a CRC. When both these conditions are met, the current thread owns + // the reference and no other thread shares it, so its contents may be + // safely mutated. + // + // If the referenced item is shared, carries a CRC, or is immortal, + // it should not be modified in-place, and this function returns false. + // + // This call performs the memory barrier needed for the owning thread + // to act on the object, so that if it returns true, it may safely + // assume exclusive access to the object. + inline bool IsMutable() { + return (count_.load(std::memory_order_acquire)) == kRefIncrement; + } + + // Returns whether the atomic integer is 1. Similar to IsMutable(), + // but does not check for a stored CRC. (An unshared node with a CRC is not + // mutable, because changing its data would invalidate the CRC.) + // + // When this returns true, there are no other references, and data sinks + // may safely adopt the children of the CordRep. + inline bool IsOne() { + return (count_.load(std::memory_order_acquire) & kRefcountMask) == + kRefIncrement; + } + + bool IsImmortal() const { + return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0; + } private: + // We reserve the bottom bits for flags. + // kImmortalBit indicates that this entity should never be collected; it is + // used for the StringConstant constructor to avoid collecting immutable + // constant cords. + // kReservedFlag is reserved for future use. + enum { + kNumFlags = 2, + + kImmortalFlag = 0x1, + kCrcFlag = 0x2, + kRefIncrement = (1 << kNumFlags), + + // Bitmask to use when checking refcount by equality. This masks out + // all flags except kImmortalFlag, which is part of the refcount for + // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 + // if the immortal bit is set.) + kRefcountMask = ~kCrcFlag, + }; + std::atomic<int32_t> count_; }; @@ -80,34 +189,106 @@ class Refcount { // functions in the base class. struct CordRepConcat; -struct CordRepSubstring; struct CordRepExternal; +struct CordRepFlat; +struct CordRepSubstring; +class CordRepRing; +class CordRepBtree; + +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + SUBSTRING = 1, + BTREE = 2, + RING = 3, + EXTERNAL = 4, + + // We have different tags for different sized flat arrays, + // starting with FLAT, and limited to MAX_FLAT_TAG. The 225 value is based on + // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed + // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well + // as the Tag <---> Size logic so that FLAT stil represents the minimum flat + // allocation size. (32 bytes as of now). + FLAT = 5, + MAX_FLAT_TAG = 225 +}; + +// There are various locations where we want to check if some rep is a 'plain' +// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we +// can perform this check in a single branch as 'tag >= EXTERNAL' +// Likewise, we have some locations where we check for 'ring or external/flat', +// so likewise align RING to EXTERNAL. +// Note that we can leave this optimization to the compiler. The compiler will +// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`. +static_assert(RING == BTREE + 1, "BTREE and RING not consecutive"); +static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive"); +static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); struct CordRep { + CordRep() = default; + constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l) + : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} + // The following three fields have to be less than 32 bytes since // that is the smallest supported flat node size. - // We use uint64_t for the length even in 32-bit binaries. - uint64_t length; - Refcount refcount; + size_t length; + RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. // Otherwise, the node type is CordRepFlat and the tag is the encoded size. uint8_t tag; - char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep + // `storage` provides two main purposes: + // - the starting point for FlatCordRep.Data() [flexible-array-member] + // - 3 bytes of additional storage for use by derived classes. + // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores + // a 'depth' value in storage[0], and the (future) CordRepBtree class stores + // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to + // allocate room for these in the derived class, as not all compilers reuse + // padding space from the base class (clang and gcc do, MSVC does not, etc) + uint8_t storage[3]; + + // Returns true if this instance's tag matches the requested type. + constexpr bool IsRing() const { return tag == RING; } + constexpr bool IsConcat() const { return tag == CONCAT; } + constexpr bool IsSubstring() const { return tag == SUBSTRING; } + constexpr bool IsExternal() const { return tag == EXTERNAL; } + constexpr bool IsFlat() const { return tag >= FLAT; } + constexpr bool IsBtree() const { return tag == BTREE; } + + inline CordRepRing* ring(); + inline const CordRepRing* ring() const; inline CordRepConcat* concat(); inline const CordRepConcat* concat() const; inline CordRepSubstring* substring(); inline const CordRepSubstring* substring() const; inline CordRepExternal* external(); inline const CordRepExternal* external() const; + inline CordRepFlat* flat(); + inline const CordRepFlat* flat() const; + inline CordRepBtree* btree(); + inline const CordRepBtree* btree() const; + + // -------------------------------------------------------------------- + // Memory management + + // Destroys the provided `rep`. + static void Destroy(CordRep* rep); + + // Increments the reference count of `rep`. + // Requires `rep` to be a non-null pointer value. + static inline CordRep* Ref(CordRep* rep); + + // Decrements the reference count of `rep`. Destroys rep if count reaches + // zero. Requires `rep` to be a non-null pointer value. + static inline void Unref(CordRep* rep); }; struct CordRepConcat : public CordRep { CordRep* left; CordRep* right; - uint8_t depth() const { return static_cast<uint8_t>(data[0]); } - void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } + uint8_t depth() const { return storage[0]; } + void set_depth(uint8_t depth) { storage[0] = depth; } }; struct CordRepSubstring : public CordRep { @@ -115,37 +296,325 @@ struct CordRepSubstring : public CordRep { CordRep* child; }; -// TODO(strel): replace the following logic (and related functions in cord.cc) -// with container_internal::Layout. - -// Alignment requirement for CordRepExternal so that the type erased releaser -// will be stored at a suitably aligned address. -constexpr size_t ExternalRepAlignment() { -#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) - return __STDCPP_DEFAULT_NEW_ALIGNMENT__; -#else - return alignof(max_align_t); -#endif -} - -// Type for function pointer that will invoke and destroy the type-erased -// releaser function object. Accepts a pointer to the releaser and the -// `string_view` that were passed in to `NewExternalRep` below. The return value -// is the size of the `Releaser` type. -using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view); +// Type for function pointer that will invoke the releaser function and also +// delete the `CordRepExternalImpl` corresponding to the passed in +// `CordRepExternal`. +using ExternalReleaserInvoker = void (*)(CordRepExternal*); // External CordReps are allocated together with a type erased releaser. The // releaser is stored in the memory directly following the CordRepExternal. -struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep { +struct CordRepExternal : public CordRep { + CordRepExternal() = default; + explicit constexpr CordRepExternal(absl::string_view str) + : CordRep(RefcountAndFlags::Immortal{}, str.size()), + base(str.data()), + releaser_invoker(nullptr) {} + const char* base; // Pointer to function that knows how to call and destroy the releaser. ExternalReleaserInvoker releaser_invoker; + + // Deletes (releases) the external rep. + // Requires rep != nullptr and rep->IsExternal() + static void Delete(CordRep* rep); +}; + +struct Rank1 {}; +struct Rank0 : Rank1 {}; + +template <typename Releaser, typename = ::absl::base_internal::invoke_result_t< + Releaser, absl::string_view>> +void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) { + ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data); +} + +template <typename Releaser, + typename = ::absl::base_internal::invoke_result_t<Releaser>> +void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) { + ::absl::base_internal::invoke(std::forward<Releaser>(releaser)); +} + +// We use CompressedTuple so that we can benefit from EBCO. +template <typename Releaser> +struct CordRepExternalImpl + : public CordRepExternal, + public ::absl::container_internal::CompressedTuple<Releaser> { + // The extra int arg is so that we can avoid interfering with copy/move + // constructors while still benefitting from perfect forwarding. + template <typename T> + CordRepExternalImpl(T&& releaser, int) + : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) { + this->releaser_invoker = &Release; + } + + ~CordRepExternalImpl() { + InvokeReleaser(Rank0{}, std::move(this->template get<0>()), + absl::string_view(base, length)); + } + + static void Release(CordRepExternal* rep) { + delete static_cast<CordRepExternalImpl*>(rep); + } +}; + +inline void CordRepExternal::Delete(CordRep* rep) { + assert(rep != nullptr && rep->IsExternal()); + auto* rep_external = static_cast<CordRepExternal*>(rep); + assert(rep_external->releaser_invoker != nullptr); + rep_external->releaser_invoker(rep_external); +} + +template <typename Str> +struct ConstInitExternalStorage { + ABSL_CONST_INIT static CordRepExternal value; }; -// TODO(strel): look into removing, it doesn't seem like anything relies on this -static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), ""); +template <typename Str> +CordRepExternal ConstInitExternalStorage<Str>::value(Str::value); + +enum { + kMaxInline = 15, +}; + +constexpr char GetOrNull(absl::string_view data, size_t pos) { + return pos < data.size() ? data[pos] : '\0'; +} + +// We store cordz_info as 64 bit pointer value in big endian format. This +// guarantees that the least significant byte of cordz_info matches the last +// byte of the inline data representation in as_chars_, which holds the inlined +// size or the 'is_tree' bit. +using cordz_info_t = int64_t; + +// Assert that the `cordz_info` pointer value perfectly overlaps the last half +// of `as_chars_` and can hold a pointer value. +static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); +static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); + +// BigEndianByte() creates a big endian representation of 'value', i.e.: a big +// endian value where the last byte in the host's representation holds 'value`, +// with all other bytes being 0. +static constexpr cordz_info_t BigEndianByte(unsigned char value) { +#if defined(ABSL_IS_BIG_ENDIAN) + return value; +#else + return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#endif +} + +class InlineData { + public: + // DefaultInitType forces the use of the default initialization constructor. + enum DefaultInitType { kDefaultInit }; + + // kNullCordzInfo holds the big endian representation of intptr_t(1) + // This is the 'null' / initial value of 'cordz_info'. The null value + // is specifically big endian 1 as with 64-bit pointers, the last + // byte of cordz_info overlaps with the last byte holding the tag. + static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); + + constexpr InlineData() : as_chars_{0} {} + explicit InlineData(DefaultInitType) {} + explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} + explicit constexpr InlineData(absl::string_view chars) + : as_chars_{ + GetOrNull(chars, 0), GetOrNull(chars, 1), + GetOrNull(chars, 2), GetOrNull(chars, 3), + GetOrNull(chars, 4), GetOrNull(chars, 5), + GetOrNull(chars, 6), GetOrNull(chars, 7), + GetOrNull(chars, 8), GetOrNull(chars, 9), + GetOrNull(chars, 10), GetOrNull(chars, 11), + GetOrNull(chars, 12), GetOrNull(chars, 13), + GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {} + + // Returns true if the current instance is empty. + // The 'empty value' is an inlined data value of zero length. + bool is_empty() const { return tag() == 0; } + + // Returns true if the current instance holds a tree value. + bool is_tree() const { return (tag() & 1) != 0; } + + // Returns true if the current instance holds a cordz_info value. + // Requires the current instance to hold a tree value. + bool is_profiled() const { + assert(is_tree()); + return as_tree_.cordz_info != kNullCordzInfo; + } + + // Returns true if either of the provided instances hold a cordz_info value. + // This method is more efficient than the equivalent `data1.is_profiled() || + // data2.is_profiled()`. Requires both arguments to hold a tree. + static bool is_either_profiled(const InlineData& data1, + const InlineData& data2) { + assert(data1.is_tree() && data2.is_tree()); + return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) != + kNullCordzInfo; + } + + // Returns the cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + CordzInfo* cordz_info() const { + assert(is_tree()); + intptr_t info = + static_cast<intptr_t>(absl::big_endian::ToHost64(as_tree_.cordz_info)); + assert(info & 1); + return reinterpret_cast<CordzInfo*>(info - 1); + } + + // Sets the current cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + void set_cordz_info(CordzInfo* cordz_info) { + assert(is_tree()); + intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1; + as_tree_.cordz_info = absl::big_endian::FromHost64(info); + } + + // Resets the current cordz_info to null / empty. + void clear_cordz_info() { + assert(is_tree()); + as_tree_.cordz_info = kNullCordzInfo; + } + + // Returns a read only pointer to the character data inside this instance. + // Requires the current instance to hold inline data. + const char* as_chars() const { + assert(!is_tree()); + return as_chars_; + } + + // Returns a mutable pointer to the character data inside this instance. + // Should be used for 'write only' operations setting an inlined value. + // Applications can set the value of inlined data either before or after + // setting the inlined size, i.e., both of the below are valid: + // + // // Set inlined data and inline size + // memcpy(data_.as_chars(), data, size); + // data_.set_inline_size(size); + // + // // Set inlined size and inline data + // data_.set_inline_size(size); + // memcpy(data_.as_chars(), data, size); + // + // It's an error to read from the returned pointer without a preceding write + // if the current instance does not hold inline data, i.e.: is_tree() == true. + char* as_chars() { return as_chars_; } + + // Returns the tree value of this value. + // Requires the current instance to hold a tree value. + CordRep* as_tree() const { + assert(is_tree()); + return as_tree_.rep; + } + + // Initialize this instance to holding the tree value `rep`, + // initializing the cordz_info to null, i.e.: 'not profiled'. + void make_tree(CordRep* rep) { + as_tree_.rep = rep; + as_tree_.cordz_info = kNullCordzInfo; + } + + // Set the tree value of this instance to 'rep`. + // Requires the current instance to already hold a tree value. + // Does not affect the value of cordz_info. + void set_tree(CordRep* rep) { + assert(is_tree()); + as_tree_.rep = rep; + } + + // Returns the size of the inlined character data inside this instance. + // Requires the current instance to hold inline data. + size_t inline_size() const { + assert(!is_tree()); + return tag() >> 1; + } + + // Sets the size of the inlined character data inside this instance. + // Requires `size` to be <= kMaxInline. + // See the documentation on 'as_chars()' for more information and examples. + void set_inline_size(size_t size) { + ABSL_ASSERT(size <= kMaxInline); + tag() = static_cast<char>(size << 1); + } + + private: + // See cordz_info_t for forced alignment and size of `cordz_info` details. + struct AsTree { + explicit constexpr AsTree(absl::cord_internal::CordRep* tree) + : rep(tree), cordz_info(kNullCordzInfo) {} + // This union uses up extra space so that whether rep is 32 or 64 bits, + // cordz_info will still start at the eighth byte, and the last + // byte of cordz_info will still be the last byte of InlineData. + union { + absl::cord_internal::CordRep* rep; + cordz_info_t unused_aligner; + }; + cordz_info_t cordz_info; + }; + + char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; } + char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; } + + // If the data has length <= kMaxInline, we store it in `as_chars_`, and + // store the size in the last char of `as_chars_` shifted left + 1. + // Else we store it in a tree and store a pointer to that tree in + // `as_tree_.rep` and store a tag in `tagged_size`. + union { + char as_chars_[kMaxInline + 1]; + AsTree as_tree_; + }; +}; + +static_assert(sizeof(InlineData) == kMaxInline + 1, ""); + +inline CordRepConcat* CordRep::concat() { + assert(IsConcat()); + return static_cast<CordRepConcat*>(this); +} + +inline const CordRepConcat* CordRep::concat() const { + assert(IsConcat()); + return static_cast<const CordRepConcat*>(this); +} + +inline CordRepSubstring* CordRep::substring() { + assert(IsSubstring()); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(IsSubstring()); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(IsExternal()); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(IsExternal()); + return static_cast<const CordRepExternal*>(this); +} + +inline CordRep* CordRep::Ref(CordRep* rep) { + assert(rep != nullptr); + rep->refcount.Increment(); + return rep; +} + +inline void CordRep::Unref(CordRep* rep) { + assert(rep != nullptr); + // Expect refcount to be 0. Avoiding the cost of an atomic decrement should + // typically outweigh the cost of an extra branch checking for ref == 1. + if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) { + Destroy(rep); + } +} } // namespace cord_internal + ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_internal_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_internal_test.cc new file mode 100644 index 0000000000..0758dfef38 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_internal_test.cc @@ -0,0 +1,116 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_internal.h" + +#include "gmock/gmock.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +TEST(RefcountAndFlags, NormalRefcount) { + for (bool expect_high_refcount : {false, true}) { + SCOPED_TRACE(expect_high_refcount); + RefcountAndFlags refcount; + // count = 1 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_TRUE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + refcount.Increment(); + // count = 2 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_FALSE(refcount.IsOne()); + + // Decrementing should return true, since a reference is outstanding. + if (expect_high_refcount) { + EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_TRUE(refcount.Decrement()); + } + // count = 1 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_TRUE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + // One more decremnt will return false, as no references remain. + if (expect_high_refcount) { + EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_FALSE(refcount.Decrement()); + } + } +} + +TEST(RefcountAndFlags, CrcRefcount) { + for (bool expect_high_refcount : {false, true}) { + SCOPED_TRACE(expect_high_refcount); + RefcountAndFlags refcount(RefcountAndFlags::WithCrc{}); + // count = 1 + + // A CRC-carrying node is never mutable, but can be unshared + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + refcount.Increment(); + // count = 2 + + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_FALSE(refcount.IsOne()); + + // Decrementing should return true, since a reference is outstanding. + if (expect_high_refcount) { + EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_TRUE(refcount.Decrement()); + } + // count = 1 + + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + // One more decremnt will return false, as no references remain. + if (expect_high_refcount) { + EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_FALSE(refcount.Decrement()); + } + } +} + +TEST(RefcountAndFlags, ImmortalRefcount) { + RefcountAndFlags immortal_refcount(RefcountAndFlags::Immortal{}); + + for (int i = 0; i < 100; ++i) { + // An immortal refcount is never unshared, and decrementing never causes + // a collection. + EXPECT_FALSE(immortal_refcount.HasCrc()); + EXPECT_FALSE(immortal_refcount.IsMutable()); + EXPECT_FALSE(immortal_refcount.IsOne()); + EXPECT_TRUE(immortal_refcount.Decrement()); + EXPECT_TRUE(immortal_refcount.DecrementExpectHighRefcount()); + } +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc new file mode 100644 index 0000000000..4404f33a12 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc @@ -0,0 +1,1128 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree.h" + +#include <cassert> +#include <cstdint> +#include <iostream> +#include <string> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_consume.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +constexpr size_t CordRepBtree::kMaxCapacity; // NOLINT: needed for c++ < c++17 + +namespace { + +using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth]; +using EdgeType = CordRepBtree::EdgeType; +using OpResult = CordRepBtree::OpResult; +using CopyResult = CordRepBtree::CopyResult; + +constexpr auto kFront = CordRepBtree::kFront; +constexpr auto kBack = CordRepBtree::kBack; + +inline bool exhaustive_validation() { + return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); +} + +// Implementation of the various 'Dump' functions. +// Prints the entire tree structure or 'rep'. External callers should +// not specify 'depth' and leave it to its default (0) value. +// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node. +void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream, + int depth = 0) { + // Allow for full height trees + substring -> flat / external nodes. + assert(depth <= CordRepBtree::kMaxDepth + 2); + std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne() + ? std::string("Private") + : absl::StrCat("Shared(", rep->refcount.Get(), ")"); + std::string sptr = absl::StrCat("0x", absl::Hex(rep)); + + // Dumps the data contents of `rep` if `include_contents` is true. + // Always emits a new line character. + auto maybe_dump_data = [&stream, include_contents](const CordRep* r) { + if (include_contents) { + // Allow for up to 60 wide display of content data, which with some + // indentation and prefix / labels keeps us within roughly 80-100 wide. + constexpr size_t kMaxDataLength = 60; + stream << ", data = \"" + << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength) + << (r->length > kMaxDataLength ? "\"..." : "\""); + } + stream << '\n'; + }; + + // For each level, we print the 'shared/private' state and the rep pointer, + // indented by two spaces per recursive depth. + stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") "; + + if (rep->IsBtree()) { + const CordRepBtree* node = rep->btree(); + std::string label = + node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf"; + stream << label << ", len = " << node->length + << ", begin = " << node->begin() << ", end = " << node->end() + << "\n"; + for (CordRep* edge : node->Edges()) { + DumpAll(edge, include_contents, stream, depth + 1); + } + } else if (rep->tag == SUBSTRING) { + const CordRepSubstring* substring = rep->substring(); + stream << "Substring, len = " << rep->length + << ", start = " << substring->start; + maybe_dump_data(rep); + DumpAll(substring->child, include_contents, stream, depth + 1); + } else if (rep->tag >= FLAT) { + stream << "Flat, len = " << rep->length + << ", cap = " << rep->flat()->Capacity(); + maybe_dump_data(rep); + } else if (rep->tag == EXTERNAL) { + stream << "Extn, len = " << rep->length; + maybe_dump_data(rep); + } +} + +// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring +// small data out of large reps, and general efficiency of 'always copy small +// data'. Consider making this a cord rep internal library function. +CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) { + assert(n != 0); + assert(offset + n <= rep->length); + assert(offset != 0 || n != rep->length); + + if (rep->tag == SUBSTRING) { + CordRepSubstring* substring = rep->substring(); + offset += substring->start; + rep = CordRep::Ref(substring->child); + CordRep::Unref(substring); + } + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = rep; + return substring; +} + +// TODO(b/192061034): consider making this a cord rep library function. +inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) { + if (n == rep->length) return rep; + if (n == 0) return CordRep::Unref(rep), nullptr; + return CreateSubstring(rep, offset, n); +} + +// TODO(b/192061034): consider making this a cord rep library function. +inline CordRep* MakeSubstring(CordRep* rep, size_t offset) { + if (offset == 0) return rep; + return CreateSubstring(rep, offset, rep->length - offset); +} + +// Resizes `edge` to the provided `length`. Adopts a reference on `edge`. +// This method directly returns `edge` if `length` equals `edge->length`. +// If `is_mutable` is set to true, this function may return `edge` with +// `edge->length` set to the new length depending on the type and size of +// `edge`. Otherwise, this function returns a new CordRepSubstring value. +// Requires `length > 0 && length <= edge->length`. +CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) { + assert(length > 0); + assert(length <= edge->length); + assert(CordRepBtree::IsDataEdge(edge)); + if (length >= edge->length) return edge; + + if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) { + edge->length = length; + return edge; + } + + return CreateSubstring(edge, 0, length); +} + +template <EdgeType edge_type> +inline absl::string_view Consume(absl::string_view s, size_t n) { + return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n); +} + +template <EdgeType edge_type> +inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) { + if (edge_type == kBack) { + memcpy(dst, s.data(), n); + return s.substr(n); + } else { + const size_t offset = s.size() - n; + memcpy(dst, s.data() + offset, n); + return s.substr(0, offset); + } +} + +// Known issue / optimization weirdness: the store associated with the +// decrement introduces traffic between cpus (even if the result of that +// traffic does nothing), making this faster than a single call to +// refcount.Decrement() checking the zero refcount condition. +template <typename R, typename Fn> +inline void FastUnref(R* r, Fn&& fn) { + if (r->refcount.IsOne()) { + fn(r); + } else if (!r->refcount.DecrementExpectHighRefcount()) { + fn(r); + } +} + +// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT +// node, or a SUBSTRING of an EXTERNAL or FLAT node. +void DeleteLeafEdge(CordRep* rep) { + for (;;) { + if (rep->tag >= FLAT) { + CordRepFlat::Delete(rep->flat()); + return; + } + if (rep->tag == EXTERNAL) { + CordRepExternal::Delete(rep->external()); + return; + } + assert(rep->tag == SUBSTRING); + CordRepSubstring* substring = rep->substring(); + rep = substring->child; + assert(rep->tag == EXTERNAL || rep->tag >= FLAT); + delete substring; + if (rep->refcount.Decrement()) return; + } +} + +// StackOperations contains the logic to build a left-most or right-most stack +// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to +// propagate node changes up the stack. +template <EdgeType edge_type> +struct StackOperations { + // Returns true if the node at 'depth' is mutable, i.e. has a refcount + // of one, carries no CRC, and all of its parent nodes have a refcount of one. + inline bool owned(int depth) const { return depth < share_depth; } + + // Returns the node at 'depth'. + inline CordRepBtree* node(int depth) const { return stack[depth]; } + + // Builds a `depth` levels deep stack starting at `tree` recording which nodes + // are private in the form of the 'share depth' where nodes are shared. + inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) { + assert(depth <= tree->height()); + int current_depth = 0; + while (current_depth < depth && tree->refcount.IsMutable()) { + stack[current_depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + share_depth = current_depth + (tree->refcount.IsMutable() ? 1 : 0); + while (current_depth < depth) { + stack[current_depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + return tree; + } + + // Builds a stack with the invariant that all nodes are private owned / not + // shared and carry no CRC data. This is used in iterative updates where a + // previous propagation guaranteed all nodes have this property. + inline void BuildOwnedStack(CordRepBtree* tree, int height) { + assert(height <= CordRepBtree::kMaxHeight); + int depth = 0; + while (depth < height) { + assert(tree->refcount.IsMutable()); + stack[depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + assert(tree->refcount.IsMutable()); + share_depth = depth + 1; + } + + // Processes the final 'top level' result action for the tree. + // See the 'Action' enum for the various action implications. + static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) { + switch (result.action) { + case CordRepBtree::kPopped: + tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree) + : CordRepBtree::New(result.tree, tree); + if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) { + tree = CordRepBtree::Rebuild(tree); + ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight, + "Max height exceeded"); + } + return tree; + case CordRepBtree::kCopied: + CordRep::Unref(tree); + ABSL_FALLTHROUGH_INTENDED; + case CordRepBtree::kSelf: + return result.tree; + } + ABSL_INTERNAL_UNREACHABLE; + return result.tree; + } + + // Propagate the action result in 'result' up into all nodes of the stack + // starting at depth 'depth'. 'length' contains the extra length of data that + // was added at the lowest level, and is updated into all nodes of the stack. + // See the 'Action' enum for the various action implications. + // If 'propagate' is true, then any copied node values are updated into the + // stack, which is used for iterative processing on the same stack. + template <bool propagate = false> + inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length, + OpResult result) { + // TODO(mvels): revisit the below code to check if 3 loops with 3 + // (incremental) conditions is faster than 1 loop with a switch. + // Benchmarking and perf recordings indicate the loop with switch is + // fastest, likely because of indirect jumps on the tight case values and + // dense branches. But it's worth considering 3 loops, as the `action` + // transitions are mono directional. E.g.: + // while (action == kPopped) { + // ... + // } + // while (action == kCopied) { + // ... + // } + // ... + // We also found that an "if () do {}" loop here seems faster, possibly + // because it allows the branch predictor more granular heuristics on + // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases + // which appear to be the most common use cases. + if (depth != 0) { + do { + CordRepBtree* node = stack[--depth]; + const bool owned = depth < share_depth; + switch (result.action) { + case CordRepBtree::kPopped: + assert(!propagate); + result = node->AddEdge<edge_type>(owned, result.tree, length); + break; + case CordRepBtree::kCopied: + result = node->SetEdge<edge_type>(owned, result.tree, length); + if (propagate) stack[depth] = result.tree; + break; + case CordRepBtree::kSelf: + node->length += length; + while (depth > 0) { + node = stack[--depth]; + node->length += length; + } + return node; + } + } while (depth > 0); + } + return Finalize(tree, result); + } + + // Invokes `Unwind` with `propagate=true` to update the stack node values. + inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length, + OpResult result) { + return Unwind</*propagate=*/true>(tree, depth, length, result); + } + + // `share_depth` contains the depth at which the nodes in the stack cannot + // be mutated. I.e., if the top most level is shared (i.e.: + // `!refcount.IsMutable()`), then `share_depth` is 0. If the 2nd node + // is shared (and implicitly all nodes below that) then `share_depth` is 1, + // etc. A `share_depth` greater than the depth of the stack indicates that + // none of the nodes in the stack are shared. + int share_depth; + + NodeStack stack; +}; + +} // namespace + +void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, + bool include_contents, std::ostream& stream) { + stream << "===================================\n"; + if (!label.empty()) { + stream << label << '\n'; + stream << "-----------------------------------\n"; + } + if (rep) { + DumpAll(rep, include_contents, stream); + } else { + stream << "NULL\n"; + } +} + +void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, + std::ostream& stream) { + Dump(rep, label, false, stream); +} + +void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) { + Dump(rep, absl::string_view(), false, stream); +} + +void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) { + for (CordRep* edge : tree->Edges(begin, end)) { + FastUnref(edge, DeleteLeafEdge); + } + Delete(tree); +} + +void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin, + size_t end) { + for (CordRep* edge : tree->Edges(begin, end)) { + FastUnref(edge->btree(), Destroy); + } + Delete(tree); +} + +bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) { +#define NODE_CHECK_VALID(x) \ + if (!(x)) { \ + ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \ + return false; \ + } +#define NODE_CHECK_EQ(x, y) \ + if ((x) != (y)) { \ + ABSL_RAW_LOG(ERROR, \ + "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \ + #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \ + return false; \ + } + + NODE_CHECK_VALID(tree != nullptr); + NODE_CHECK_VALID(tree->IsBtree()); + NODE_CHECK_VALID(tree->height() <= kMaxHeight); + NODE_CHECK_VALID(tree->begin() < tree->capacity()); + NODE_CHECK_VALID(tree->end() <= tree->capacity()); + NODE_CHECK_VALID(tree->begin() <= tree->end()); + size_t child_length = 0; + for (CordRep* edge : tree->Edges()) { + NODE_CHECK_VALID(edge != nullptr); + if (tree->height() > 0) { + NODE_CHECK_VALID(edge->IsBtree()); + NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1); + } else { + NODE_CHECK_VALID(IsDataEdge(edge)); + } + child_length += edge->length; + } + NODE_CHECK_EQ(child_length, tree->length); + if ((!shallow || exhaustive_validation()) && tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + if (!IsValid(edge->btree(), shallow)) return false; + } + } + return true; + +#undef NODE_CHECK_VALID +#undef NODE_CHECK_EQ +} + +#ifndef NDEBUG + +CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) { + if (!IsValid(tree, shallow)) { + Dump(tree, "CordRepBtree validation failed:", false, std::cout); + ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); + } + return tree; +} + +const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool shallow) { + if (!IsValid(tree, shallow)) { + Dump(tree, "CordRepBtree validation failed:", false, std::cout); + ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); + } + return tree; +} + +#endif // NDEBUG + +template <EdgeType edge_type> +inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) { + if (size() >= kMaxCapacity) return {New(edge), kPopped}; + OpResult result = ToOpResult(owned); + result.tree->Add<edge_type>(edge); + result.tree->length += delta; + return result; +} + +template <EdgeType edge_type> +OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) { + OpResult result; + const size_t idx = index(edge_type); + if (owned) { + result = {this, kSelf}; + CordRep::Unref(edges_[idx]); + } else { + // Create a copy containing all unchanged edges. Unchanged edges are the + // open interval [begin, back) or [begin + 1, end) depending on `edge_type`. + // We conveniently cover both case using a constexpr `shift` being 0 or 1 + // as `end :== back + 1`. + result = {CopyRaw(), kCopied}; + constexpr int shift = edge_type == kFront ? 1 : 0; + for (CordRep* r : Edges(begin() + shift, back() + shift)) { + CordRep::Ref(r); + } + } + result.tree->edges_[idx] = edge; + result.tree->length += delta; + return result; +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) { + const int depth = tree->height(); + const size_t length = rep->length; + StackOperations<edge_type> ops; + CordRepBtree* leaf = ops.BuildStack(tree, depth); + const OpResult result = + leaf->AddEdge<edge_type>(ops.owned(depth), rep, length); + return ops.Unwind(tree, depth, length, result); +} + +template <> +CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data, + size_t extra) { + CordRepBtree* leaf = CordRepBtree::New(0); + size_t length = 0; + size_t end = 0; + const size_t cap = leaf->capacity(); + while (!data.empty() && end != cap) { + auto* flat = CordRepFlat::New(data.length() + extra); + flat->length = (std::min)(data.length(), flat->Capacity()); + length += flat->length; + leaf->edges_[end++] = flat; + data = Consume<kBack>(flat->Data(), data, flat->length); + } + leaf->length = length; + leaf->set_end(end); + return leaf; +} + +template <> +CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data, + size_t extra) { + CordRepBtree* leaf = CordRepBtree::New(0); + size_t length = 0; + size_t begin = leaf->capacity(); + leaf->set_end(leaf->capacity()); + while (!data.empty() && begin != 0) { + auto* flat = CordRepFlat::New(data.length() + extra); + flat->length = (std::min)(data.length(), flat->Capacity()); + length += flat->length; + leaf->edges_[--begin] = flat; + data = Consume<kFront>(flat->Data(), data, flat->length); + } + leaf->length = length; + leaf->set_begin(begin); + return leaf; +} + +template <> +absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data, + size_t extra) { + assert(!data.empty()); + assert(size() < capacity()); + AlignBegin(); + const size_t cap = capacity(); + do { + CordRepFlat* flat = CordRepFlat::New(data.length() + extra); + const size_t n = (std::min)(data.length(), flat->Capacity()); + flat->length = n; + edges_[fetch_add_end(1)] = flat; + data = Consume<kBack>(flat->Data(), data, n); + } while (!data.empty() && end() != cap); + return data; +} + +template <> +absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data, + size_t extra) { + assert(!data.empty()); + assert(size() < capacity()); + AlignEnd(); + do { + CordRepFlat* flat = CordRepFlat::New(data.length() + extra); + const size_t n = (std::min)(data.length(), flat->Capacity()); + flat->length = n; + edges_[sub_fetch_begin(1)] = flat; + data = Consume<kFront>(flat->Data(), data, n); + } while (!data.empty() && begin() != 0); + return data; +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data, + size_t extra) { + if (ABSL_PREDICT_FALSE(data.empty())) return tree; + + const size_t original_data_size = data.size(); + int depth = tree->height(); + StackOperations<edge_type> ops; + CordRepBtree* leaf = ops.BuildStack(tree, depth); + + // If there is capacity in the last edge, append as much data + // as possible into this last edge. + if (leaf->size() < leaf->capacity()) { + OpResult result = leaf->ToOpResult(ops.owned(depth)); + data = result.tree->AddData<edge_type>(data, extra); + if (data.empty()) { + result.tree->length += original_data_size; + return ops.Unwind(tree, depth, original_data_size, result); + } + + // We added some data into this leaf, but not all. Propagate the added + // length to the top most node, and rebuild the stack with any newly copied + // or updated nodes. From this point on, the path (leg) from the top most + // node to the right-most node towards the leaf node is privately owned. + size_t delta = original_data_size - data.size(); + assert(delta > 0); + result.tree->length += delta; + tree = ops.Propagate(tree, depth, delta, result); + ops.share_depth = depth + 1; + } + + // We were unable to append all data into the existing right-most leaf node. + // This means all remaining data must be put into (a) new leaf node(s) which + // we append to the tree. To make this efficient, we iteratively build full + // leaf nodes from `data` until the created leaf contains all remaining data. + // We utilize the `Unwind` method to merge the created leaf into the first + // level towards root that has capacity. On each iteration with remaining + // data, we rebuild the stack in the knowledge that right-most nodes are + // privately owned after the first `Unwind` completes. + for (;;) { + OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped}; + if (result.tree->length == data.size()) { + return ops.Unwind(tree, depth, result.tree->length, result); + } + data = Consume<edge_type>(data, result.tree->length); + tree = ops.Unwind(tree, depth, result.tree->length, result); + depth = tree->height(); + ops.BuildOwnedStack(tree, depth); + } +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) { + assert(dst->height() >= src->height()); + + // Capture source length as we may consume / destroy `src`. + const size_t length = src->length; + + // We attempt to merge `src` at its corresponding height in `dst`. + const int depth = dst->height() - src->height(); + StackOperations<edge_type> ops; + CordRepBtree* merge_node = ops.BuildStack(dst, depth); + + // If there is enough space in `merge_node` for all edges from `src`, add all + // edges to this node, making a fresh copy as needed if not privately owned. + // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and + // `Finalize` to merge `src` into the first level towards `root` where there + // is capacity for another edge, or create a new top level node. + OpResult result; + if (merge_node->size() + src->size() <= kMaxCapacity) { + result = merge_node->ToOpResult(ops.owned(depth)); + result.tree->Add<edge_type>(src->Edges()); + result.tree->length += src->length; + if (src->refcount.IsOne()) { + Delete(src); + } else { + for (CordRep* edge : src->Edges()) CordRep::Ref(edge); + CordRepBtree::Unref(src); + } + } else { + result = {src, kPopped}; + } + + // Unless we merged at the top level (i.e.: src and dst are equal height), + // unwind the result towards the top level, and finalize the result. + if (depth) { + return ops.Unwind(dst, depth, length, result); + } + return ops.Finalize(dst, result); +} + +CopyResult CordRepBtree::CopySuffix(size_t offset) { + assert(offset < this->length); + + // As long as `offset` starts inside the last edge, we can 'drop' the current + // depth. For the most extreme example: if offset references the last data + // edge in the tree, there is only a single edge / path from the top of the + // tree to that last edge, so we can drop all the nodes except that edge. + // The fast path check for this is `back->length >= length - offset`. + int height = this->height(); + CordRepBtree* node = this; + size_t len = node->length - offset; + CordRep* back = node->Edge(kBack); + while (back->length >= len) { + offset = back->length - len; + if (--height < 0) { + return {MakeSubstring(CordRep::Ref(back), offset), height}; + } + node = back->btree(); + back = node->Edge(kBack); + } + if (offset == 0) return {CordRep::Ref(node), height}; + + // Offset does not point into the last edge, so we span at least two edges. + // Find the index of offset with `IndexBeyond` which provides us the edge + // 'beyond' the offset if offset is not a clean starting point of an edge. + Position pos = node->IndexBeyond(offset); + CordRepBtree* sub = node->CopyToEndFrom(pos.index, len); + const CopyResult result = {sub, height}; + + // `pos.n` contains a non zero value if the offset is not an exact starting + // point of an edge. In this case, `pos.n` contains the 'trailing' amount of + // bytes of the edge preceding that in `pos.index`. We need to iteratively + // adjust the preceding edge with the 'broken' offset until we have a perfect + // start of the edge. + while (pos.n != 0) { + assert(pos.index >= 1); + const size_t begin = pos.index - 1; + sub->set_begin(begin); + CordRep* const edge = node->Edge(begin); + + len = pos.n; + offset = edge->length - len; + + if (--height < 0) { + sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len); + return result; + } + + node = edge->btree(); + pos = node->IndexBeyond(offset); + + CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len); + sub->edges_[begin] = nsub; + sub = nsub; + } + sub->set_begin(pos.index); + return result; +} + +CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) { + assert(n > 0); + assert(n <= this->length); + + // As long as `n` does not exceed the length of the first edge, we can 'drop' + // the current depth. For the most extreme example: if we'd copy a 1 byte + // prefix from a tree, there is only a single edge / path from the top of the + // tree to the single data edge containing this byte, so we can drop all the + // nodes except the data node. + int height = this->height(); + CordRepBtree* node = this; + CordRep* front = node->Edge(kFront); + if (allow_folding) { + while (front->length >= n) { + if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; + node = front->btree(); + front = node->Edge(kFront); + } + } + if (node->length == n) return {CordRep::Ref(node), height}; + + // `n` spans at least two nodes, find the end point of the span. + Position pos = node->IndexOf(n); + + // Create a partial copy of the node up to `pos.index`, with a defined length + // of `n`. Any 'partial last edge' is added further below as needed. + CordRepBtree* sub = node->CopyBeginTo(pos.index, n); + const CopyResult result = {sub, height}; + + // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as + // this is not zero, we don't have a 'clean cut', so we need to make a + // (partial) copy of that last edge, and repeat this until pos.n is zero. + while (pos.n != 0) { + size_t end = pos.index; + n = pos.n; + + CordRep* edge = node->Edge(pos.index); + if (--height < 0) { + sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n); + sub->set_end(end); + AssertValid(result.edge->btree()); + return result; + } + + node = edge->btree(); + pos = node->IndexOf(n); + CordRepBtree* nsub = node->CopyBeginTo(pos.index, n); + sub->edges_[end++] = nsub; + sub->set_end(end); + sub = nsub; + } + sub->set_end(pos.index); + AssertValid(result.edge->btree()); + return result; +} + +CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) { + CordRep* front = tree->Edge(tree->begin()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(tree->begin() + 1, tree->end())); + CordRepBtree::Delete(tree); + } else { + CordRep::Ref(front); + CordRep::Unref(tree); + } + return front; +} + +CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length) { + assert(end <= tree->end()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(end, tree->end())); + tree->set_end(end); + tree->length = new_length; + } else { + CordRepBtree* old = tree; + tree = tree->CopyBeginTo(end, new_length); + CordRep::Unref(old); + } + return tree; +} + +CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) { + // Check input and deal with trivial cases 'Remove all/none' + assert(tree != nullptr); + assert(n <= tree->length); + const size_t len = tree->length; + if (ABSL_PREDICT_FALSE(n == 0)) { + return tree; + } + if (ABSL_PREDICT_FALSE(n >= len)) { + CordRepBtree::Unref(tree); + return nullptr; + } + + size_t length = len - n; + int height = tree->height(); + bool is_mutable = tree->refcount.IsMutable(); + + // Extract all top nodes which are reduced to size = 1 + Position pos = tree->IndexOfLength(length); + while (pos.index == tree->begin()) { + CordRep* edge = ExtractFront(tree); + is_mutable &= edge->refcount.IsMutable(); + if (height-- == 0) return ResizeEdge(edge, length, is_mutable); + tree = edge->btree(); + pos = tree->IndexOfLength(length); + } + + // Repeat the following sequence traversing down the tree: + // - Crop the top node to the 'last remaining edge' adjusting length. + // - Set the length for down edges to the partial length in that last edge. + // - Repeat this until the last edge is 'included in full' + // - If we hit the data edge level, resize and return the last data edge + CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length); + CordRep* edge = tree->Edge(pos.index); + length = pos.n; + while (length != edge->length) { + // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy. + assert(tree->refcount.IsMutable()); + const bool edge_is_mutable = edge->refcount.IsMutable(); + + if (height-- == 0) { + tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable); + return AssertValid(top); + } + + if (!edge_is_mutable) { + // We can't 'in place' remove any suffixes down this edge. + // Replace this edge with a prefix copy instead. + tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge; + CordRep::Unref(edge); + return AssertValid(top); + } + + // Move down one level, rinse repeat. + tree = edge->btree(); + pos = tree->IndexOfLength(length); + tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length); + edge = tree->Edge(pos.index); + length = pos.n; + } + + return AssertValid(top); +} + +CordRep* CordRepBtree::SubTree(size_t offset, size_t n) { + assert(n <= this->length); + assert(offset <= this->length - n); + if (ABSL_PREDICT_FALSE(n == 0)) return nullptr; + + CordRepBtree* node = this; + int height = node->height(); + Position front = node->IndexOf(offset); + CordRep* left = node->edges_[front.index]; + while (front.n + n <= left->length) { + if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n); + node = left->btree(); + front = node->IndexOf(front.n); + left = node->edges_[front.index]; + } + + const Position back = node->IndexBefore(front, n); + CordRep* const right = node->edges_[back.index]; + assert(back.index > front.index); + + // Get partial suffix and prefix entries. + CopyResult prefix; + CopyResult suffix; + if (height > 0) { + // Copy prefix and suffix of the boundary nodes. + prefix = left->btree()->CopySuffix(front.n); + suffix = right->btree()->CopyPrefix(back.n); + + // If there is an edge between the prefix and suffix edges, then the tree + // must remain at its previous (full) height. If we have no edges between + // prefix and suffix edges, then the tree must be as high as either the + // suffix or prefix edges (which are collapsed to their minimum heights). + if (front.index + 1 == back.index) { + height = (std::max)(prefix.height, suffix.height) + 1; + } + + // Raise prefix and suffixes to the new tree height. + for (int h = prefix.height + 1; h < height; ++h) { + prefix.edge = CordRepBtree::New(prefix.edge); + } + for (int h = suffix.height + 1; h < height; ++h) { + suffix.edge = CordRepBtree::New(suffix.edge); + } + } else { + // Leaf node, simply take substrings for prefix and suffix. + prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1}; + suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1}; + } + + // Compose resulting tree. + CordRepBtree* sub = CordRepBtree::New(height); + size_t end = 0; + sub->edges_[end++] = prefix.edge; + for (CordRep* r : node->Edges(front.index + 1, back.index)) { + sub->edges_[end++] = CordRep::Ref(r); + } + sub->edges_[end++] = suffix.edge; + sub->set_end(end); + sub->length = n; + return AssertValid(sub); +} + +CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left, + CordRepBtree* right) { + return left->height() >= right->height() ? Merge<kBack>(left, right) + : Merge<kFront>(right, left); +} + +bool CordRepBtree::IsFlat(absl::string_view* fragment) const { + if (height() == 0 && size() == 1) { + if (fragment) *fragment = Data(begin()); + return true; + } + return false; +} + +bool CordRepBtree::IsFlat(size_t offset, const size_t n, + absl::string_view* fragment) const { + assert(n <= this->length); + assert(offset <= this->length - n); + if (ABSL_PREDICT_FALSE(n == 0)) return false; + int height = this->height(); + const CordRepBtree* node = this; + for (;;) { + const Position front = node->IndexOf(offset); + const CordRep* edge = node->Edge(front.index); + if (edge->length < front.n + n) return false; + if (--height < 0) { + if (fragment) *fragment = EdgeData(edge).substr(front.n, n); + return true; + } + offset = front.n; + node = node->Edge(front.index)->btree(); + } +} + +char CordRepBtree::GetCharacter(size_t offset) const { + assert(offset < length); + const CordRepBtree* node = this; + int height = node->height(); + for (;;) { + Position front = node->IndexOf(offset); + if (--height < 0) return node->Data(front.index)[front.n]; + offset = front.n; + node = node->Edge(front.index)->btree(); + } +} + +Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { + // The inlined version in `GetAppendBuffer()` deals with all heights <= 3. + assert(height() >= 4); + assert(refcount.IsMutable()); + + // Build a stack of nodes we may potentially need to update if we find a + // non-shared FLAT with capacity at the leaf level. + const int depth = height(); + CordRepBtree* node = this; + CordRepBtree* stack[kMaxDepth]; + for (int i = 0; i < depth; ++i) { + node = node->Edge(kBack)->btree(); + if (!node->refcount.IsMutable()) return {}; + stack[i] = node; + } + + // Must be a privately owned, mutable flat. + CordRep* const edge = node->Edge(kBack); + if (!edge->refcount.IsMutable() || edge->tag < FLAT) return {}; + + // Must have capacity. + const size_t avail = edge->flat()->Capacity() - edge->length; + if (avail == 0) return {}; + + // Build span on remaining capacity. + size_t delta = (std::min)(size, avail); + Span<char> span = {edge->flat()->Data() + edge->length, delta}; + edge->length += delta; + this->length += delta; + for (int i = 0; i < depth; ++i) { + stack[i]->length += delta; + } + return span; +} + +CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) { + if (rep->IsBtree()) return rep->btree(); + + CordRepBtree* node = nullptr; + auto consume = [&node](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + if (node == nullptr) { + node = New(r); + } else { + node = CordRepBtree::AddCordRep<kBack>(node, r); + } + }; + Consume(rep, consume); + return node; +} + +CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(rep->IsBtree())) { + return MergeTrees(tree, rep->btree()); + } + auto consume = [&tree](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + tree = CordRepBtree::AddCordRep<kBack>(tree, r); + }; + Consume(rep, consume); + return tree; +} + +CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(rep->IsBtree())) { + return MergeTrees(rep->btree(), tree); + } + auto consume = [&tree](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + tree = CordRepBtree::AddCordRep<kFront>(tree, r); + }; + ReverseConsume(rep, consume); + return tree; +} + +CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data, + size_t extra) { + return CordRepBtree::AddData<kBack>(tree, data, extra); +} + +CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data, + size_t extra) { + return CordRepBtree::AddData<kFront>(tree, data, extra); +} + +template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree, + CordRep* rep); +template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree, + CordRep* rep); +template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree, + absl::string_view data, + size_t extra); +template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree, + absl::string_view data, + size_t extra); + +void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree, + bool consume) { + bool owned = consume && tree->refcount.IsOne(); + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + if (!owned) edge = CordRep::Ref(edge); + size_t height = 0; + size_t length = edge->length; + CordRepBtree* node = stack[0]; + OpResult result = node->AddEdge<kBack>(true, edge, length); + while (result.action == CordRepBtree::kPopped) { + stack[height] = result.tree; + if (stack[++height] == nullptr) { + result.action = CordRepBtree::kSelf; + stack[height] = CordRepBtree::New(node, result.tree); + } else { + node = stack[height]; + result = node->AddEdge<kBack>(true, result.tree, length); + } + } + while (stack[++height] != nullptr) { + stack[height]->length += length; + } + } + } else { + for (CordRep* rep : tree->Edges()) { + Rebuild(stack, rep->btree(), owned); + } + } + if (consume) { + if (owned) { + CordRepBtree::Delete(tree); + } else { + CordRepBtree::Unref(tree); + } + } +} + +CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) { + // Set up initial stack with empty leaf node. + CordRepBtree* node = CordRepBtree::New(); + CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node}; + + // Recursively build the tree, consuming the input tree. + Rebuild(stack, tree, /* consume reference */ true); + + // Return top most node + for (CordRepBtree* parent : stack) { + if (parent == nullptr) return node; + node = parent; + } + + // Unreachable + assert(false); + return nullptr; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h new file mode 100644 index 0000000000..bb38f0c3fe --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h @@ -0,0 +1,939 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ + +#include <cassert> +#include <cstdint> +#include <iosfwd> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/optimization.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +class CordRepBtreeNavigator; + +// CordRepBtree is as the name implies a btree implementation of a Cordrep tree. +// Data is stored at the leaf level only, non leaf nodes contain down pointers +// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT +// or EXTERNAL nodes. The implementation allows for data to be added to either +// end of the tree only, it does not provide any 'insert' logic. This has the +// benefit that we can expect good fill ratios: all nodes except the outer +// 'legs' will have 100% fill ratios for trees built using Append/Prepend +// methods. Merged trees will typically have a fill ratio well above 50% as in a +// similar fashion, one side of the merged tree will typically have a 100% fill +// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or +// better, and the tree never needs balancing. +// +// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that +// input unless explicitly stated otherwise. All functions returning a CordRep* +// or CordRepBtree* instance transfer a reference back to the caller. +// Simplified, callers both 'donate' and 'consume' a reference count on each +// call, simplifying the API. An example of building a tree: +// +// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello")); +// tree = CordRepBtree::Append(tree, MakeFlat("world")); +// +// In the above example, all inputs are consumed, making each call affecting +// `tree` reference count neutral. The returned `tree` value can be different +// from the input if the input is shared with other threads, or if the tree +// grows in height, but callers typically never have to concern themselves with +// that and trust that all methods DTRT at all times. +class CordRepBtree : public CordRep { + public: + // EdgeType identifies `front` and `back` enum values. + // Various implementations in CordRepBtree such as `Add` and `Edge` are + // generic and templated on operating on either of the boundary edges. + // For more information on the possible edges contained in a CordRepBtree + // instance see the documentation for `edges_`. + enum class EdgeType { kFront, kBack }; + + // Convenience constants into `EdgeType` + static constexpr EdgeType kFront = EdgeType::kFront; + static constexpr EdgeType kBack = EdgeType::kBack; + + // Maximum number of edges: based on experiments and performance data, we can + // pick suitable values resulting in optimum cacheline aligned values. The + // preferred values are based on 64-bit systems where we aim to align this + // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc. + // TODO(b/192061034): experiment with alternative sizes. + static constexpr size_t kMaxCapacity = 6; + + // Reasonable maximum height of the btree. We can expect a fill ratio of at + // least 50%: trees are always expanded at the front or back. Concatenating + // trees will then typically fold at the top most node, where the lower nodes + // are at least at capacity on one side of joined inputs. At a lower fill + // rate of 4 edges per node, we have capacity for ~16 million leaf nodes. + // We will fail / abort if an application ever exceeds this height, which + // should be extremely rare (near impossible) and be an indication of an + // application error: we do not assume it reasonable for any application to + // operate correctly with such monster trees. + // Another compelling reason for the number `12` is that any contextual stack + // required for navigation or insertion requires 12 words and 12 bytes, which + // fits inside 2 cache lines with some room to spare, and is reasonable as a + // local stack variable compared to Cord's current near 400 bytes stack use. + // The maximum `height` value of a node is then `kMaxDepth - 1` as node height + // values start with a value of 0 for leaf nodes. + static constexpr int kMaxDepth = 12; + static constexpr int kMaxHeight = kMaxDepth - 1; + + // `Action` defines the action for unwinding changes done at the btree's leaf + // level that need to be propagated up to the parent node(s). Each operation + // on a node has an effect / action defined as follows: + // - kSelf + // The operation (add / update, etc) was performed directly on the node as + // the node is private to the current thread (i.e.: not shared directly or + // indirectly through a refcount > 1). Changes can be propagated directly to + // all parent nodes as all parent nodes are also then private to the current + // thread. + // - kCopied + // The operation (add / update, etc) was performed on a copy of the original + // node, as the node is (potentially) directly or indirectly shared with + // other threads. Changes need to be propagated into the parent nodes where + // the old down pointer must be unreffed and replaced with this new copy. + // Such changes to parent nodes may themselves require a copy if the parent + // node is also shared. A kCopied action can propagate all the way to the + // top node where we then must unref the `tree` input provided by the + // caller, and return the new copy. + // - kPopped + // The operation (typically add) could not be satisfied due to insufficient + // capacity in the targeted node, and a new 'leg' was created that needs to + // be added into the parent node. For example, adding a FLAT inside a leaf + // node that is at capacity will create a new leaf node containing that + // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can + // cascade up the tree if parent nodes are also at capacity. A 'Popped' + // action propagating all the way to the top of the tree will result in + // the tree becoming one level higher than the current tree through a final + // `CordRepBtree::New(tree, popped)` call, resulting in a new top node + // referencing the old tree and the new (fully popped upwards) 'leg'. + enum Action { kSelf, kCopied, kPopped }; + + // Result of an operation on a node. See the `Action` enum for details. + struct OpResult { + CordRepBtree* tree; + Action action; + }; + + // Return value of the CopyPrefix and CopySuffix methods which can + // return a node or data edge at any height inside the tree. + // A height of 0 defines the lowest (leaf) node, a height of -1 identifies + // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof. + struct CopyResult { + CordRep* edge; + int height; + }; + + // Logical position inside a node: + // - index: index of the edge. + // - n: size or offset value depending on context. + struct Position { + size_t index; + size_t n; + }; + + // Creates a btree from the given input. Adopts a ref of `rep`. + // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this + // function immediately returns `rep->btree()`. If the input is a valid data + // edge (see IsDataEdge()), then a new leaf node is returned containing `rep` + // as the sole data edge. Else, the input is assumed to be a (legacy) concat + // tree, and the input is consumed and transformed into a btree(). + static CordRepBtree* Create(CordRep* rep); + + // Destroys the provided tree. Should only be called by cord internal API's, + // typically after a ref_count.Decrement() on the last reference count. + static void Destroy(CordRepBtree* tree); + + // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>. + using CordRep::Unref; + + // Unrefs all edges in `edges` which are assumed to be 'likely one'. + static void Unref(absl::Span<CordRep* const> edges); + + // Appends / Prepends an existing CordRep instance to this tree. + // The below methods accept three types of input: + // 1) `rep` is a data node (See `IsDataNode` for valid data edges). + // `rep` is appended or prepended to this tree 'as is'. + // 2) `rep` is a BTREE. + // `rep` is merged into `tree` respecting the Append/Prepend order. + // 3) `rep` is some other (legacy) type. + // `rep` is converted in place and added to `tree` + // Requires `tree` and `rep` to be not null. + static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep); + static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep); + + // Append/Prepend the data in `data` to this tree. + // The `extra` parameter defines how much extra capacity should be allocated + // for any additional FLAT being allocated. This is an optimization hint from + // the caller. For example, a caller may need to add 2 string_views of data + // "abc" and "defghi" which are not consecutive. The caller can in this case + // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated + // where possible with at least 6 bytes of extra capacity beyond `length`. + // This helps avoiding data getting fragmented over multiple flats. + // There is no limit on the size of `data`. If `data` can not be stored inside + // a single flat, then the function will iteratively add flats until all data + // has been consumed and appended or prepended to the tree. + static CordRepBtree* Append(CordRepBtree* tree, string_view data, + size_t extra = 0); + static CordRepBtree* Prepend(CordRepBtree* tree, string_view data, + size_t extra = 0); + + // Returns a new tree, containing `n` bytes of data from this instance + // starting at offset `offset`. Where possible, the returned tree shares + // (re-uses) data edges and nodes with this instance to minimize the + // combined memory footprint of both trees. + // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero. + CordRep* SubTree(size_t offset, size_t n); + + // Removes `n` trailing bytes from `tree`, and returns the resulting tree + // or data edge. Returns `tree` if n is zero, and nullptr if n == length. + // This function is logically identical to: + // result = tree->SubTree(0, tree->length - n); + // Unref(tree); + // return result; + // However, the actual implementation will as much as possible perform 'in + // place' modifications on the tree on all nodes and edges that are mutable. + // For example, in a fully privately owned tree with the last edge being a + // flat of length 12, RemoveSuffix(1) will simply set the length of that data + // edge to 11, and reduce the length of all nodes on the edge path by 1. + static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n); + + // Returns the character at the given offset. + char GetCharacter(size_t offset) const; + + // Returns true if this node holds a single data edge, and if so, sets + // `fragment` to reference the contained data. `fragment` is an optional + // output parameter and allowed to be null. + bool IsFlat(absl::string_view* fragment) const; + + // Returns true if the data of `n` bytes starting at offset `offset` + // is contained in a single data edge, and if so, sets fragment to reference + // the contained data. `fragment` is an optional output parameter and allowed + // to be null. + bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const; + + // Returns a span (mutable range of bytes) of up to `size` bytes into the + // last FLAT data edge inside this tree under the following conditions: + // - none of the nodes down into the FLAT node are shared. + // - the last data edge in this tree is a non-shared FLAT. + // - the referenced FLAT has additional capacity available. + // If all these conditions are met, a non-empty span is returned, and the + // length of the flat node and involved tree nodes have been increased by + // `span.length()`. The caller is responsible for immediately assigning values + // to all uninitialized data reference by the returned span. + // Requires `this->refcount.IsMutable()`: this function forces the + // caller to do this fast path check on the top level node, as this is the + // most commonly shared node of a cord tree. + Span<char> GetAppendBuffer(size_t size); + + // Returns the `height` of the tree. The height of a tree is limited to + // kMaxHeight. `height` is implemented as an `int` as in some places we + // use negative (-1) values for 'data edges'. + int height() const { return static_cast<int>(storage[0]); } + + // Properties: begin, back, end, front/back boundary indexes. + size_t begin() const { return static_cast<size_t>(storage[1]); } + size_t back() const { return static_cast<size_t>(storage[2]) - 1; } + size_t end() const { return static_cast<size_t>(storage[2]); } + size_t index(EdgeType edge) const { + return edge == kFront ? begin() : back(); + } + + // Properties: size and capacity. + // `capacity` contains the current capacity of this instance, where + // `kMaxCapacity` contains the maximum capacity of a btree node. + // For now, `capacity` and `kMaxCapacity` return the same value, but this may + // change in the future if we see benefit in dynamically sizing 'small' nodes + // to 'large' nodes for large data trees. + size_t size() const { return end() - begin(); } + size_t capacity() const { return kMaxCapacity; } + + // Edge access + inline CordRep* Edge(size_t index) const; + inline CordRep* Edge(EdgeType edge_type) const; + inline absl::Span<CordRep* const> Edges() const; + inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const; + + // Returns reference to the data edge at `index`. + // Requires this instance to be a leaf node, and `index` to be valid index. + inline absl::string_view Data(size_t index) const; + + static const char* EdgeDataPtr(const CordRep* r); + static absl::string_view EdgeData(const CordRep* r); + + // Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node + // holding a FLAT or EXTERNAL child rep. + static bool IsDataEdge(const CordRep* rep); + + // Diagnostics: returns true if `tree` is valid and internally consistent. + // If `shallow` is false, then the provided top level node and all child nodes + // below it are recursively checked. If `shallow` is true, only the provided + // node in `tree` and the cumulative length, type and height of the direct + // child nodes of `tree` are checked. The value of `shallow` is ignored if the + // internal `cord_btree_exhaustive_validation` diagnostics variable is true, + // in which case the performed validations works as if `shallow` were false. + // This function is intended for debugging and testing purposes only. + static bool IsValid(const CordRepBtree* tree, bool shallow = false); + + // Diagnostics: asserts that the provided tree is valid. + // `AssertValid()` performs a shallow validation by default. `shallow` can be + // set to false in which case an exhaustive validation is performed. This + // function is implemented in terms of calling `IsValid()` and asserting the + // return value to be true. See `IsValid()` for more information. + // This function is intended for debugging and testing purposes only. + static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true); + static const CordRepBtree* AssertValid(const CordRepBtree* tree, + bool shallow = true); + + // Diagnostics: dump the contents of this tree to `stream`. + // This function is intended for debugging and testing purposes only. + static void Dump(const CordRep* rep, std::ostream& stream); + static void Dump(const CordRep* rep, absl::string_view label, + std::ostream& stream); + static void Dump(const CordRep* rep, absl::string_view label, + bool include_contents, std::ostream& stream); + + // Adds the edge `edge` to this node if possible. `owned` indicates if the + // current node is potentially shared or not with other threads. Returns: + // - {kSelf, <this>} + // The edge was directly added to this node. + // - {kCopied, <node>} + // The edge was added to a copy of this node. + // - {kPopped, New(edge, height())} + // A new leg with the edge was created as this node has no extra capacity. + template <EdgeType edge_type> + inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta); + + // Replaces the front or back edge with the provided new edge. Returns: + // - {kSelf, <this>} + // The edge was directly set in this node. The old edge is unreffed. + // - {kCopied, <node>} + // A copy of this node was created with the new edge value. + // In both cases, the function adopts a reference on `edge`. + template <EdgeType edge_type> + OpResult SetEdge(bool owned, CordRep* edge, size_t delta); + + // Creates a new empty node at the specified height. + static CordRepBtree* New(int height = 0); + + // Creates a new node containing `rep`, with the height being computed + // automatically based on the type of `rep`. + static CordRepBtree* New(CordRep* rep); + + // Creates a new node containing both `front` and `back` at height + // `front.height() + 1`. Requires `back.height() == front.height()`. + static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back); + + // Creates a fully balanced tree from the provided tree by rebuilding a new + // tree from all data edges in the input. This function is automatically + // invoked internally when the tree exceeds the maximum height. + static CordRepBtree* Rebuild(CordRepBtree* tree); + + private: + CordRepBtree() = default; + ~CordRepBtree() = default; + + // Initializes the main properties `tag`, `begin`, `end`, `height`. + inline void InitInstance(int height, size_t begin = 0, size_t end = 0); + + // Direct property access begin / end + void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); } + void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); } + + // Decreases the value of `begin` by `n`, and returns the new value. Notice + // how this returns the new value unlike atomic::fetch_add which returns the + // old value. This is because this is used to prepend edges at 'begin - 1'. + size_t sub_fetch_begin(size_t n) { + storage[1] -= static_cast<uint8_t>(n); + return storage[1]; + } + + // Increases the value of `end` by `n`, and returns the previous value. This + // function is typically used to append edges at 'end'. + size_t fetch_add_end(size_t n) { + const uint8_t current = storage[2]; + storage[2] = static_cast<uint8_t>(current + n); + return current; + } + + // Returns the index of the last edge starting on, or before `offset`, with + // `n` containing the relative offset of `offset` inside that edge. + // Requires `offset` < length. + Position IndexOf(size_t offset) const; + + // Returns the index of the last edge starting before `offset`, with `n` + // containing the relative offset of `offset` inside that edge. + // This function is useful to find the edges for some span of bytes ending at + // `offset` (i.e., `n` bytes). For example: + // + // Position pos = IndexBefore(n) + // edges = Edges(begin(), pos.index) // All full edges (may be empty) + // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty) + // + // Requires 0 < `offset` <= length. + Position IndexBefore(size_t offset) const; + + // Returns the index of the edge ending at (or on) length `length`, and the + // number of bytes inside that edge up to `length`. For example, if we have a + // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27) + // will return {1, 17}, and IndexOfLength(10) will return {0, 10}. + Position IndexOfLength(size_t n) const; + + // Identical to the above function except starting from the position `front`. + // This function is equivalent to `IndexBefore(front.n + offset)`, with + // the difference that this function is optimized to start at `front.index`. + Position IndexBefore(Position front, size_t offset) const; + + // Returns the index of the edge directly beyond the edge containing offset + // `offset`, with `n` containing the distance of that edge from `offset`. + // This function is useful for iteratively finding suffix nodes and remaining + // partial bytes in left-most suffix nodes as for example in CopySuffix. + // Requires `offset` < length. + Position IndexBeyond(size_t offset) const; + + // Destruction + static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end); + static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end); + static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end); + static void Delete(CordRepBtree* tree) { delete tree; } + + // Creates a new leaf node containing as much data as possible from `data`. + // The data is added either forwards or reversed depending on `edge_type`. + // Callers must check the length of the returned node to determine if all data + // was copied or not. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + static CordRepBtree* NewLeaf(absl::string_view data, size_t extra); + + // Creates a raw copy of this Btree node, copying all properties, but + // without adding any references to existing edges. + CordRepBtree* CopyRaw() const; + + // Creates a full copy of this Btree node, adding a reference on all edges. + CordRepBtree* Copy() const; + + // Creates a partial copy of this Btree node, copying all edges up to `end`, + // adding a reference on each copied edge, and sets the length of the newly + // created copy to `new_length`. + CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const; + + // Returns a tree containing the edges [tree->begin(), end) and length + // of `new_length`. This method consumes a reference on the provided + // tree, and logically performs the following operation: + // result = tree->CopyBeginTo(end, new_length); + // CordRep::Unref(tree); + // return result; + static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length); + + // Creates a partial copy of this Btree node, copying all edges starting at + // `begin`, adding a reference on each copied edge, and sets the length of + // the newly created copy to `new_length`. + CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const; + + // Extracts and returns the front edge from the provided tree. + // This method consumes a reference on the provided tree, and logically + // performs the following operation: + // edge = CordRep::Ref(tree->Edge(kFront)); + // CordRep::Unref(tree); + // return edge; + static CordRep* ExtractFront(CordRepBtree* tree); + + // Returns a tree containing the result of appending `right` to `left`. + static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right); + + // Fallback functions for `Create()`, `Append()` and `Prepend()` which + // deal with legacy / non conforming input, i.e.: CONCAT trees. + static CordRepBtree* CreateSlow(CordRep* rep); + static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep); + static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep); + + // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the + // function will consume a reference on `tree`. `stack` is a null terminated + // array containing the new tree's state, with the current leaf node at + // stack[0], and parent nodes above that, or null for 'top of tree'. + static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume); + + // Aligns existing edges to start at index 0, to allow for a new edge to be + // added to the back of the current edges. + inline void AlignBegin(); + + // Aligns existing edges to end at `capacity`, to allow for a new edge to be + // added in front of the current edges. + inline void AlignEnd(); + + // Adds the provided edge to this node. + // Requires this node to have capacity for the edge. Realigns / moves + // existing edges as needed to prepend or append the new edge. + template <EdgeType edge_type> + inline void Add(CordRep* rep); + + // Adds the provided edges to this node. + // Requires this node to have capacity for the edges. Realigns / moves + // existing edges as needed to prepend or append the new edges. + template <EdgeType edge_type> + inline void Add(absl::Span<CordRep* const>); + + // Adds data from `data` to this node until either all data has been consumed, + // or there is no more capacity for additional flat nodes inside this node. + // Requires the current node to be a leaf node, data to be non empty, and the + // current node to have capacity for at least one more data edge. + // Returns any remaining data from `data` that was not added, which is + // depending on the edge type (front / back) either the remaining prefix of + // suffix of the input. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + absl::string_view AddData(absl::string_view data, size_t extra); + + // Replace the front or back edge with the provided value. + // Adopts a reference on `edge` and unrefs the old edge. + template <EdgeType edge_type> + inline void SetEdge(CordRep* edge); + + // Returns a partial copy of the current tree containing the first `n` bytes + // of data. `CopyResult` contains both the resulting edge and its height. The + // resulting tree may be less high than the current tree, or even be a single + // matching data edge if `allow_folding` is set to true. + // For example, if `n == 1`, then the result will be the single data edge, and + // height will be set to -1 (one below the owning leaf node). If n == 0, this + // function returns null. Requires `n <= length` + CopyResult CopyPrefix(size_t n, bool allow_folding = true); + + // Returns a partial copy of the current tree containing all data starting + // after `offset`. `CopyResult` contains both the resulting edge and its + // height. The resulting tree may be less high than the current tree, or even + // be a single matching data edge. For example, if `n == length - 1`, then the + // result will be a single data edge, and height will be set to -1 (one below + // the owning leaf node). + // Requires `offset < length` + CopyResult CopySuffix(size_t offset); + + // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied} + // depending on the value of `owned`. + inline OpResult ToOpResult(bool owned); + + // Adds `rep` to the specified tree, returning the modified tree. + template <EdgeType edge_type> + static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep); + + // Adds `data` to the specified tree, returning the modified tree. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data, + size_t extra = 0); + + // Merges `src` into `dst` with `src` being added either before (kFront) or + // after (kBack) `dst`. Requires the height of `dst` to be greater than or + // equal to the height of `src`. + template <EdgeType edge_type> + static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src); + + // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer() + // implements an inlined version for trees of limited height (3 levels), + // GetAppendBufferSlow implements the logic for large trees. + Span<char> GetAppendBufferSlow(size_t size); + + // `edges_` contains all edges starting from this instance. + // These are explicitly `child` edges only, a cord btree (or any cord tree in + // that respect) does not store `parent` pointers anywhere: multiple trees / + // parents can reference the same shared child edge. The type of these edges + // depends on the height of the node. `Leaf nodes` (height == 0) contain `data + // edges` (external or flat nodes, or sub-strings thereof). All other nodes + // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`. + CordRep* edges_[kMaxCapacity]; + + friend class CordRepBtreeTestPeer; + friend class CordRepBtreeNavigator; +}; + +inline CordRepBtree* CordRep::btree() { + assert(IsBtree()); + return static_cast<CordRepBtree*>(this); +} + +inline const CordRepBtree* CordRep::btree() const { + assert(IsBtree()); + return static_cast<const CordRepBtree*>(this); +} + +inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) { + tag = BTREE; + storage[0] = static_cast<uint8_t>(height); + storage[1] = static_cast<uint8_t>(begin); + storage[2] = static_cast<uint8_t>(end); +} + +inline CordRep* CordRepBtree::Edge(size_t index) const { + assert(index >= begin()); + assert(index < end()); + return edges_[index]; +} + +inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const { + return edges_[edge_type == kFront ? begin() : back()]; +} + +inline absl::Span<CordRep* const> CordRepBtree::Edges() const { + return {edges_ + begin(), size()}; +} + +inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin, + size_t end) const { + assert(begin <= end); + assert(begin >= this->begin()); + assert(end <= this->end()); + return {edges_ + begin, static_cast<size_t>(end - begin)}; +} + +inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) { + assert(IsDataEdge(r)); + size_t offset = 0; + if (r->tag == SUBSTRING) { + offset = r->substring()->start; + r = r->substring()->child; + } + return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset; +} + +inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) { + return absl::string_view(EdgeDataPtr(r), r->length); +} + +inline absl::string_view CordRepBtree::Data(size_t index) const { + assert(height() == 0); + return EdgeData(Edge(index)); +} + +inline bool CordRepBtree::IsDataEdge(const CordRep* rep) { + // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below + // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL + // check in the slow path the SUBSTRING check to optimize for the hot path. + if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true; + if (rep->tag == SUBSTRING) rep = rep->substring()->child; + return rep->tag == EXTERNAL || rep->tag >= FLAT; +} + +inline CordRepBtree* CordRepBtree::New(int height) { + CordRepBtree* tree = new CordRepBtree; + tree->length = 0; + tree->InitInstance(height); + return tree; +} + +inline CordRepBtree* CordRepBtree::New(CordRep* rep) { + CordRepBtree* tree = new CordRepBtree; + int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0; + tree->length = rep->length; + tree->InitInstance(height, /*begin=*/0, /*end=*/1); + tree->edges_[0] = rep; + return tree; +} + +inline CordRepBtree* CordRepBtree::New(CordRepBtree* front, + CordRepBtree* back) { + assert(front->height() == back->height()); + CordRepBtree* tree = new CordRepBtree; + tree->length = front->length + back->length; + tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2); + tree->edges_[0] = front; + tree->edges_[1] = back; + return tree; +} + +inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin, + size_t end) { + if (tree->height() == 0) { + DestroyLeaf(tree, begin, end); + } else { + DestroyNonLeaf(tree, begin, end); + } +} + +inline void CordRepBtree::Destroy(CordRepBtree* tree) { + DestroyTree(tree, tree->begin(), tree->end()); +} + +inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) { + for (CordRep* edge : edges) { + if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) { + CordRep::Destroy(edge); + } + } +} + +inline CordRepBtree* CordRepBtree::CopyRaw() const { + auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); + memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); + new (&tree->refcount) RefcountAndFlags; + return tree; +} + +inline CordRepBtree* CordRepBtree::Copy() const { + CordRepBtree* tree = CopyRaw(); + for (CordRep* rep : Edges()) CordRep::Ref(rep); + return tree; +} + +inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, + size_t new_length) const { + assert(begin >= this->begin()); + assert(begin <= this->end()); + CordRepBtree* tree = CopyRaw(); + tree->length = new_length; + tree->set_begin(begin); + for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); + return tree; +} + +inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, + size_t new_length) const { + assert(end <= capacity()); + assert(end >= this->begin()); + CordRepBtree* tree = CopyRaw(); + tree->length = new_length; + tree->set_end(end); + for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); + return tree; +} + +inline void CordRepBtree::AlignBegin() { + // The below code itself does not need to be fast as typically we have + // mono-directional append/prepend calls, and `begin` / `end` are typically + // adjusted no more than once. But we want to avoid potential register clobber + // effects, making the compiler emit register save/store/spills, and minimize + // the size of code. + const size_t delta = begin(); + if (ABSL_PREDICT_FALSE(delta != 0)) { + const size_t new_end = end() - delta; + set_begin(0); + set_end(new_end); + // TODO(mvels): we can write this using 2 loads / 2 stores depending on + // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on + // size, and then do overlapping load/store of up to 4 pointers (inlined as + // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a) + // compact and b) not clobbering any registers. + ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity); +#ifdef __clang__ +#pragma unroll 1 +#endif + for (size_t i = 0; i < new_end; ++i) { + edges_[i] = edges_[i + delta]; + } + } +} + +inline void CordRepBtree::AlignEnd() { + // See comments in `AlignBegin` for motivation on the hand-rolled for loops. + const size_t delta = capacity() - end(); + if (delta != 0) { + const size_t new_begin = begin() + delta; + const size_t new_end = end() + delta; + set_begin(new_begin); + set_end(new_end); + ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity); +#ifdef __clang__ +#pragma unroll 1 +#endif + for (size_t i = new_end - 1; i >= new_begin; --i) { + edges_[i] = edges_[i - delta]; + } + } +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) { + AlignBegin(); + edges_[fetch_add_end(1)] = rep; +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kBack>( + absl::Span<CordRep* const> edges) { + AlignBegin(); + size_t new_end = end(); + for (CordRep* edge : edges) edges_[new_end++] = edge; + set_end(new_end); +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) { + AlignEnd(); + edges_[sub_fetch_begin(1)] = rep; +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kFront>( + absl::Span<CordRep* const> edges) { + AlignEnd(); + size_t new_begin = begin() - edges.size(); + set_begin(new_begin); + for (CordRep* edge : edges) edges_[new_begin++] = edge; +} + +template <CordRepBtree::EdgeType edge_type> +inline void CordRepBtree::SetEdge(CordRep* edge) { + const int idx = edge_type == kFront ? begin() : back(); + CordRep::Unref(edges_[idx]); + edges_[idx] = edge; +} + +inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) { + return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied}; +} + +inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const { + assert(offset < length); + size_t index = begin(); + while (offset >= edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const { + assert(offset > 0); + assert(offset <= length); + size_t index = begin(); + while (offset > edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front, + size_t offset) const { + size_t index = front.index; + offset = offset + front.n; + while (offset > edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const { + assert(n <= length); + size_t index = back(); + size_t strip = length - n; + while (strip >= edges_[index]->length) strip -= edges_[index--]->length; + return {index, edges_[index]->length - strip}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBeyond( + const size_t offset) const { + // We need to find the edge which `starting offset` is beyond (>=)`offset`. + // For this we can't use the `offset -= length` logic of IndexOf. Instead, we + // track the offset of the `current edge` in `off`, which we increase as we + // iterate over the edges until we find the matching edge. + size_t off = 0; + size_t index = begin(); + while (offset > off) off += edges_[index++]->length; + return {index, off - offset}; +} + +inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { + if (IsDataEdge(rep)) return New(rep); + return CreateSlow(rep); +} + +inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { + assert(refcount.IsMutable()); + CordRepBtree* tree = this; + const int height = this->height(); + CordRepBtree* n1 = tree; + CordRepBtree* n2 = tree; + CordRepBtree* n3 = tree; + switch (height) { + case 3: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + n2 = tree; + ABSL_FALLTHROUGH_INTENDED; + case 2: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + n1 = tree; + ABSL_FALLTHROUGH_INTENDED; + case 1: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + ABSL_FALLTHROUGH_INTENDED; + case 0: + CordRep* edge = tree->Edge(kBack); + if (!edge->refcount.IsMutable()) return {}; + if (edge->tag < FLAT) return {}; + size_t avail = edge->flat()->Capacity() - edge->length; + if (avail == 0) return {}; + size_t delta = (std::min)(size, avail); + Span<char> span = {edge->flat()->Data() + edge->length, delta}; + edge->length += delta; + switch (height) { + case 3: + n3->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 2: + n2->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 1: + n1->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 0: + tree->length += delta; + return span; + } + break; + } + return GetAppendBufferSlow(size); +} + +extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>( + CordRepBtree* tree, CordRep* rep); + +extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>( + CordRepBtree* tree, CordRep* rep); + +inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { + return CordRepBtree::AddCordRep<kBack>(tree, rep); + } + return AppendSlow(tree, rep); +} + +inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { + return CordRepBtree::AddCordRep<kFront>(tree, rep); + } + return PrependSlow(tree, rep); +} + +#ifdef NDEBUG + +inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, + bool /* shallow */) { + return tree; +} + +inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool /* shallow */) { + return tree; +} + +#endif + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.cc new file mode 100644 index 0000000000..d1f9995d00 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.cc @@ -0,0 +1,185 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_navigator.h" + +#include <cassert> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ReadResult = CordRepBtreeNavigator::ReadResult; + +namespace { + +// Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`. +// If `rep` is already a `CordRepSubstring` instance, an adjusted instance is +// created based on the old offset and new offset. +// Adopts a reference on `rep`. Rep must be a valid data edge. Returns +// nullptr if `n == 0`, `rep` if `n == rep->length`. +// Requires `offset < rep->length` and `offset + n <= rep->length`. +// TODO(192061034): move to utility library in internal and optimize for small +// substrings of larger reps. +inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) { + assert(n <= rep->length); + assert(offset < rep->length); + assert(offset <= rep->length - n); + assert(CordRepBtree::IsDataEdge(rep)); + + if (n == 0) return nullptr; + if (n == rep->length) return CordRep::Ref(rep); + + if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = CordRep::Ref(rep); + return substring; +} + +inline CordRep* Substring(CordRep* rep, size_t offset) { + return Substring(rep, offset, rep->length - offset); +} + +} // namespace + +CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) { + int height = 0; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + + // Overall logic: Find an edge of at least the length we need to skip. + // We consume all edges which are smaller (i.e., must be 100% skipped). + // If we exhausted all edges on the current level, we move one level + // up the tree, and repeat until we either find the edge, or until we hit + // the top of the tree meaning the skip exceeds tree->length. + while (n >= edge->length) { + n -= edge->length; + while (++index == node->end()) { + if (++height > height_) return {nullptr, n}; + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + } + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be skipped. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + while (n >= edge->length) { + n -= edge->length; + ++index; + assert(index != node->end()); + edge = node->Edge(index); + } + } + index_[0] = index; + return {edge, n}; +} + +ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) { + int height = 0; + size_t length = edge_offset + n; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + assert(edge_offset < edge->length); + + if (length < edge->length) { + return {Substring(edge, edge_offset, n), length}; + } + + // Similar to 'Skip', we consume all edges that are inside the 'length' of + // data that needs to be read. If we exhaust the current level, we move one + // level up the tree and repeat until we hit the final edge that must be + // (partially) read. We consume all edges into `subtree`. + CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset)); + size_t subtree_end = 1; + do { + length -= edge->length; + while (++index == node->end()) { + index_[height] = index; + if (++height > height_) { + subtree->set_end(subtree_end); + if (length == 0) return {subtree, 0}; + CordRep::Unref(subtree); + return {nullptr, length}; + } + if (length != 0) { + subtree->set_end(subtree_end); + subtree = CordRepBtree::New(subtree); + subtree_end = 1; + } + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + if (length >= edge->length) { + subtree->length += edge->length; + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + } + } while (length >= edge->length); + CordRepBtree* tree = subtree; + subtree->length += length; + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be read, adding 'down' nodes to `subtree`. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + + if (length != 0) { + CordRepBtree* right = CordRepBtree::New(height); + right->length = length; + subtree->edges_[subtree_end++] = right; + subtree->set_end(subtree_end); + subtree = right; + subtree_end = 0; + while (length >= edge->length) { + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + length -= edge->length; + edge = node->Edge(++index); + } + } + } + // Add any (partial) edge still remaining at the leaf level. + if (length != 0) { + subtree->edges_[subtree_end++] = Substring(edge, 0, length); + } + subtree->set_end(subtree_end); + index_[0] = index; + return {tree, length}; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.h new file mode 100644 index 0000000000..971b92eda6 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.h @@ -0,0 +1,265 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ + +#include <cassert> +#include <iostream> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeNavigator is a bi-directional navigator allowing callers to +// navigate all the (leaf) data edges in a CordRepBtree instance. +// +// A CordRepBtreeNavigator instance is by default empty. Callers initialize a +// navigator instance by calling one of `InitFirst()`, `InitLast()` or +// `InitOffset()`, which establishes a current position. Callers can then +// navigate using the `Next`, `Previous`, `Skip` and `Seek` methods. +// +// The navigator instance does not take or adopt a reference on the provided +// `tree` on any of the initialization calls. Callers are responsible for +// guaranteeing the lifecycle of the provided tree. A navigator instance can +// be reset to the empty state by calling `Reset`. +// +// A navigator only keeps positional state on the 'current data edge', it does +// explicitly not keep any 'offset' state. The class does accept and return +// offsets in the `Read()`, `Skip()` and 'Seek()` methods as these would +// otherwise put a big burden on callers. Callers are expected to maintain +// (returned) offset info if they require such granular state. +class CordRepBtreeNavigator { + public: + // The logical position as returned by the Seek() and Skip() functions. + // Returns the current leaf edge for the desired seek or skip position and + // the offset of that position inside that edge. + struct Position { + CordRep* edge; + size_t offset; + }; + + // The read result as returned by the Read() function. + // `tree` contains the resulting tree which is identical to the result + // of calling CordRepBtree::SubTree(...) on the tree being navigated. + // `n` contains the number of bytes used from the last navigated to + // edge of the tree. + struct ReadResult { + CordRep* tree; + size_t n; + }; + + // Returns true if this instance is not empty. + explicit operator bool() const; + + // Returns the tree for this instance or nullptr if empty. + CordRepBtree* btree() const; + + // Returns the data edge of the current position. + // Requires this instance to not be empty. + CordRep* Current() const; + + // Resets this navigator to `tree`, returning the first data edge in the tree. + CordRep* InitFirst(CordRepBtree* tree); + + // Resets this navigator to `tree`, returning the last data edge in the tree. + CordRep* InitLast(CordRepBtree* tree); + + // Resets this navigator to `tree` returning the data edge at position + // `offset` and the relative offset of `offset` into that data edge. + // Returns `Position.edge = nullptr` if the provided offset is greater + // than or equal to the length of the tree, in which case the state of + // the navigator instance remains unchanged. + Position InitOffset(CordRepBtree* tree, size_t offset); + + // Navigates to the next data edge. + // Returns the next data edge or nullptr if there is no next data edge, in + // which case the current position remains unchanged. + CordRep* Next(); + + // Navigates to the previous data edge. + // Returns the previous data edge or nullptr if there is no previous data + // edge, in which case the current position remains unchanged. + CordRep* Previous(); + + // Navigates to the data edge at position `offset`. Returns the navigated to + // data edge in `Position.edge` and the relative offset of `offset` into that + // data edge in `Position.offset`. Returns `Position.edge = nullptr` if the + // provide offset is greater than or equal to the tree's length. + Position Seek(size_t offset); + + // Reads `n` bytes of data starting at offset `edge_offset` of the current + // data edge, and returns the result in `ReadResult.tree`. `ReadResult.n` + // contains the 'bytes used` from the last / current data edge in the tree. + // This allows users that mix regular navigation (using string views) and + // 'read into cord' navigation to keep track of the current state, and which + // bytes have been consumed from a navigator. + // This function returns `ReadResult.tree = nullptr` if the requested length + // exceeds the length of the tree starting at the current data edge. + ReadResult Read(size_t edge_offset, size_t n); + + // Skips `n` bytes forward from the current data edge, returning the navigated + // to data edge in `Position.edge` and `Position.offset` containing the offset + // inside that data edge. Note that the state of the navigator is left + // unchanged if `n` is smaller than the length of the current data edge. + Position Skip(size_t n); + + // Resets this instance to the default / empty state. + void Reset(); + + private: + // Slow path for Next() if Next() reached the end of a leaf node. Backtracks + // up the stack until it finds a node that has a 'next' position available, + // and then does a 'front dive' towards the next leaf node. + CordRep* NextUp(); + + // Slow path for Previous() if Previous() reached the beginning of a leaf + // node. Backtracks up the stack until it finds a node that has a 'previous' + // position available, and then does a 'back dive' towards the previous leaf + // node. + CordRep* PreviousUp(); + + // Generic implementation of InitFirst() and InitLast(). + template <CordRepBtree::EdgeType edge_type> + CordRep* Init(CordRepBtree* tree); + + // `height_` contains the height of the current tree, or -1 if empty. + int height_ = -1; + + // `index_` and `node_` contain the navigation state as the 'path' to the + // current data edge which is at `node_[0]->Edge(index_[0])`. The contents + // of these are undefined until the instance is initialized (`height_ >= 0`). + uint8_t index_[CordRepBtree::kMaxHeight]; + CordRepBtree* node_[CordRepBtree::kMaxHeight]; +}; + +// Returns true if this instance is not empty. +inline CordRepBtreeNavigator::operator bool() const { return height_ >= 0; } + +inline CordRepBtree* CordRepBtreeNavigator::btree() const { + return height_ >= 0 ? node_[height_] : nullptr; +} + +inline CordRep* CordRepBtreeNavigator::Current() const { + assert(height_ >= 0); + return node_[0]->Edge(index_[0]); +} + +inline void CordRepBtreeNavigator::Reset() { height_ = -1; } + +inline CordRep* CordRepBtreeNavigator::InitFirst(CordRepBtree* tree) { + return Init<CordRepBtree::kFront>(tree); +} + +inline CordRep* CordRepBtreeNavigator::InitLast(CordRepBtree* tree) { + return Init<CordRepBtree::kBack>(tree); +} + +template <CordRepBtree::EdgeType edge_type> +inline CordRep* CordRepBtreeNavigator::Init(CordRepBtree* tree) { + assert(tree != nullptr); + assert(tree->size() > 0); + int height = height_ = tree->height(); + size_t index = tree->index(edge_type); + node_[height] = tree; + index_[height] = static_cast<uint8_t>(index); + while (--height >= 0) { + tree = tree->Edge(index)->btree(); + node_[height] = tree; + index = tree->index(edge_type); + index_[height] = static_cast<uint8_t>(index); + } + return node_[0]->Edge(index); +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::Seek( + size_t offset) { + assert(btree() != nullptr); + int height = height_; + CordRepBtree* edge = node_[height]; + if (ABSL_PREDICT_FALSE(offset >= edge->length)) return {nullptr, 0}; + CordRepBtree::Position index = edge->IndexOf(offset); + index_[height] = static_cast<uint8_t>(index.index); + while (--height >= 0) { + edge = edge->Edge(index.index)->btree(); + node_[height] = edge; + index = edge->IndexOf(index.n); + index_[height] = static_cast<uint8_t>(index.index); + } + return {edge->Edge(index.index), index.n}; +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::InitOffset( + CordRepBtree* tree, size_t offset) { + assert(tree != nullptr); + if (ABSL_PREDICT_FALSE(offset >= tree->length)) return {nullptr, 0}; + height_ = tree->height(); + node_[height_] = tree; + return Seek(offset); +} + +inline CordRep* CordRepBtreeNavigator::Next() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->back() ? NextUp() : edge->Edge(++index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::Previous() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->begin() ? PreviousUp() : edge->Edge(--index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::NextUp() { + assert(index_[0] == node_[0]->back()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height] + 1; + } while (index == edge->end()); + index_[height] = static_cast<uint8_t>(index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->begin()); + } while (height > 0); + return edge->Edge(index); +} + +inline CordRep* CordRepBtreeNavigator::PreviousUp() { + assert(index_[0] == node_[0]->begin()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height]; + } while (index == edge->begin()); + index_[height] = static_cast<uint8_t>(--index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->back()); + } while (height > 0); + return edge->Edge(index); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator_test.cc new file mode 100644 index 0000000000..ce09b1992a --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator_test.cc @@ -0,0 +1,325 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_navigator.h" + +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::Ne; + +using ::absl::cordrep_testing::CordRepBtreeFromFlats; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; + +using ReadResult = CordRepBtreeNavigator::ReadResult; +using Position = CordRepBtreeNavigator::Position; + +// CordRepBtreeNavigatorTest is a test fixture which automatically creates a +// tree to test navigation logic on. The parameter `count' defines the number of +// data edges in the test tree. +class CordRepBtreeNavigatorTest : public testing::TestWithParam<int> { + public: + using Flats = std::vector<CordRep*>; + static constexpr size_t kCharsPerFlat = 3; + + CordRepBtreeNavigatorTest() { + data_ = CreateRandomString(count() * kCharsPerFlat); + flats_ = CreateFlatsFromString(data_, kCharsPerFlat); + + // Turn flat 0 or 1 into a substring to cover partial reads on substrings. + if (count() > 1) { + CordRep::Unref(flats_[1]); + flats_[1] = MakeSubstring(kCharsPerFlat, kCharsPerFlat, MakeFlat(data_)); + } else { + CordRep::Unref(flats_[0]); + flats_[0] = MakeSubstring(0, kCharsPerFlat, MakeFlat(data_)); + } + + tree_ = CordRepBtreeFromFlats(flats_); + } + + ~CordRepBtreeNavigatorTest() override { CordRep::Unref(tree_); } + + int count() const { return GetParam(); } + CordRepBtree* tree() { return tree_; } + const std::string& data() const { return data_; } + const std::vector<CordRep*>& flats() const { return flats_; } + + static std::string ToString(testing::TestParamInfo<int> param) { + return absl::StrCat(param.param, "_Flats"); + } + + private: + std::string data_; + Flats flats_; + CordRepBtree* tree_; +}; + +INSTANTIATE_TEST_SUITE_P( + WithParam, CordRepBtreeNavigatorTest, + testing::Values(1, CordRepBtree::kMaxCapacity - 1, + CordRepBtree::kMaxCapacity, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity - 1, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity + 1, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity * 2 + + 17), + CordRepBtreeNavigatorTest::ToString); + +TEST(CordRepBtreeNavigatorTest, Uninitialized) { + CordRepBtreeNavigator nav; + EXPECT_FALSE(nav); + EXPECT_THAT(nav.btree(), Eq(nullptr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(nav.Current(), ".*"); +#endif +} + +TEST_P(CordRepBtreeNavigatorTest, InitFirst) { + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitFirst(tree()); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree())); + EXPECT_THAT(nav.Current(), Eq(flats().front())); + EXPECT_THAT(edge, Eq(flats().front())); +} + +TEST_P(CordRepBtreeNavigatorTest, InitLast) { + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitLast(tree()); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree())); + EXPECT_THAT(nav.Current(), Eq(flats().back())); + EXPECT_THAT(edge, Eq(flats().back())); +} + +TEST_P(CordRepBtreeNavigatorTest, NextPrev) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + const Flats& flats = this->flats(); + + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); + for (int i = 1; i < flats.size(); ++i) { + ASSERT_THAT(nav.Next(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); + for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) { + ASSERT_THAT(nav.Previous(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); +} + +TEST_P(CordRepBtreeNavigatorTest, PrevNext) { + CordRepBtreeNavigator nav; + nav.InitLast(tree()); + const Flats& flats = this->flats(); + + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); + for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) { + ASSERT_THAT(nav.Previous(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); + for (int i = 1; i < flats.size(); ++i) { + ASSERT_THAT(nav.Next(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); +} + +TEST(CordRepBtreeNavigatorTest, Reset) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtreeNavigator nav; + nav.InitFirst(tree); + nav.Reset(); + EXPECT_FALSE(nav); + EXPECT_THAT(nav.btree(), Eq(nullptr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(nav.Current(), ".*"); +#endif + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeNavigatorTest, Skip) { + int count = this->count(); + const Flats& flats = this->flats(); + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + Position pos = nav.Skip(char_offset); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.edge, Eq(flats[0])); + EXPECT_THAT(pos.offset, Eq(char_offset)); + } + + for (int index1 = 0; index1 < count; ++index1) { + for (int index2 = index1; index2 < count; ++index2) { + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + size_t length1 = index1 * kCharsPerFlat; + Position pos1 = nav.Skip(length1 + char_offset); + ASSERT_THAT(pos1.edge, Eq(flats[index1])); + ASSERT_THAT(pos1.edge, Eq(nav.Current())); + ASSERT_THAT(pos1.offset, Eq(char_offset)); + + size_t length2 = index2 * kCharsPerFlat; + Position pos2 = nav.Skip(length2 - length1 + char_offset); + ASSERT_THAT(pos2.edge, Eq(flats[index2])); + ASSERT_THAT(pos2.edge, Eq(nav.Current())); + ASSERT_THAT(pos2.offset, Eq(char_offset)); + } + } + } +} + +TEST_P(CordRepBtreeNavigatorTest, Seek) { + int count = this->count(); + const Flats& flats = this->flats(); + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + Position pos = nav.Seek(char_offset); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.edge, Eq(flats[0])); + EXPECT_THAT(pos.offset, Eq(char_offset)); + } + + for (int index = 0; index < count; ++index) { + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + size_t offset = index * kCharsPerFlat + char_offset; + Position pos1 = nav.Seek(offset); + ASSERT_THAT(pos1.edge, Eq(flats[index])); + ASSERT_THAT(pos1.edge, Eq(nav.Current())); + ASSERT_THAT(pos1.offset, Eq(char_offset)); + } + } +} + +TEST(CordRepBtreeNavigatorTest, InitOffset) { + // Whitebox: InitOffset() is implemented in terms of Seek() which is + // exhaustively tested. Only test it initializes / forwards properly.. + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeNavigator nav; + Position pos = nav.InitOffset(tree, 5); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree)); + EXPECT_THAT(pos.edge, Eq(tree->Edges()[1])); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.offset, Eq(2)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeNavigatorTest, InitOffsetAndSeekBeyondLength) { + CordRepBtree* tree1 = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtree* tree2 = CordRepBtree::Create(MakeFlat("def")); + + CordRepBtreeNavigator nav; + nav.InitFirst(tree1); + EXPECT_THAT(nav.Seek(3).edge, Eq(nullptr)); + EXPECT_THAT(nav.Seek(100).edge, Eq(nullptr)); + EXPECT_THAT(nav.btree(), Eq(tree1)); + EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front())); + + EXPECT_THAT(nav.InitOffset(tree2, 3).edge, Eq(nullptr)); + EXPECT_THAT(nav.InitOffset(tree2, 100).edge, Eq(nullptr)); + EXPECT_THAT(nav.btree(), Eq(tree1)); + EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front())); + + CordRep::Unref(tree1); + CordRep::Unref(tree2); +} + +TEST_P(CordRepBtreeNavigatorTest, Read) { + const Flats& flats = this->flats(); + const std::string& data = this->data(); + + for (size_t offset = 0; offset < data.size(); ++offset) { + for (size_t length = 1; length <= data.size() - offset; ++length) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + // Skip towards edge holding offset + size_t edge_offset = nav.Skip(offset).offset; + + // Read node + ReadResult result = nav.Read(edge_offset, length); + ASSERT_THAT(result.tree, Ne(nullptr)); + EXPECT_THAT(result.tree->length, Eq(length)); + if (result.tree->tag == BTREE) { + ASSERT_TRUE(CordRepBtree::IsValid(result.tree->btree())); + } + + // Verify contents + std::string value = CordToString(result.tree); + EXPECT_THAT(value, Eq(data.substr(offset, length))); + + // Verify 'partial last edge' reads. + size_t partial = (offset + length) % kCharsPerFlat; + ASSERT_THAT(result.n, Eq(partial)); + + // Verify ending position if not EOF + if (offset + length < data.size()) { + size_t index = (offset + length) / kCharsPerFlat; + EXPECT_THAT(nav.Current(), Eq(flats[index])); + } + + CordRep::Unref(result.tree); + } + } +} + +TEST_P(CordRepBtreeNavigatorTest, ReadBeyondLengthOfTree) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + ReadResult result = nav.Read(2, tree()->length); + ASSERT_THAT(result.tree, Eq(nullptr)); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.cc new file mode 100644 index 0000000000..5dc76966d2 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.cc @@ -0,0 +1,68 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_reader.h" + +#include <cassert> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_navigator.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size, + CordRep*& tree) { + assert(chunk_size <= navigator_.Current()->length); + + // If chunk_size is non-zero, we need to start inside last returned edge. + // Else we start reading at the next data edge of the tree. + CordRep* edge = chunk_size ? navigator_.Current() : navigator_.Next(); + const size_t offset = chunk_size ? edge->length - chunk_size : 0; + + // Read the sub tree and verify we got what we wanted. + ReadResult result = navigator_.Read(offset, n); + tree = result.tree; + + // If the data returned in `tree` was covered entirely by `chunk_size`, i.e., + // read from the 'previous' edge, we did not consume any additional data, and + // can directly return the substring into the current data edge as the next + // chunk. We can easily establish from the above code that `navigator_.Next()` + // has not been called as that requires `chunk_size` to be zero. + if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n); + + // The amount of data taken from the last edge is `chunk_size` and `result.n` + // contains the offset into the current edge trailing the read data (which can + // be 0). As the call to `navigator_.Read()` could have consumed all remaining + // data, calling `navigator_.Current()` is not safe before checking if we + // already consumed all remaining data. + const size_t consumed_by_read = n - chunk_size - result.n; + if (consumed_by_read >= remaining_) { + remaining_ = 0; + return {}; + } + + // We did not read all data, return remaining data from current edge. + edge = navigator_.Current(); + remaining_ -= consumed_by_read + edge->length; + return CordRepBtree::EdgeData(edge).substr(result.n); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.h new file mode 100644 index 0000000000..7aa79dbf10 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.h @@ -0,0 +1,211 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ + +#include <cassert> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_navigator.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeReader implements logic to iterate over cord btrees. +// References to the underlying data are returned as absl::string_view values. +// The most typical use case is a forward only iteration over tree data. +// The class also provides `Skip()`, `Seek()` and `Read()` methods similar to +// CordRepBtreeNavigator that allow more advanced navigation. +// +// Example: iterate over all data inside a cord btree: +// +// CordRepBtreeReader reader; +// for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) { +// DoSomethingWithDataIn(sv); +// } +// +// All navigation methods always return the next 'chunk' of data. The class +// assumes that all data is directly 'consumed' by the caller. For example: +// invoking `Skip()` will skip the desired number of bytes, and directly +// read and return the next chunk of data directly after the skipped bytes. +// +// Example: iterate over all data inside a btree skipping the first 100 bytes: +// +// CordRepBtreeReader reader; +// absl::string_view sv = reader.Init(tree); +// if (sv.length() > 100) { +// sv.RemovePrefix(100); +// } else { +// sv = reader.Skip(100 - sv.length()); +// } +// while (!sv.empty()) { +// DoSomethingWithDataIn(sv); +// absl::string_view sv = reader.Next(); +// } +// +// It is important to notice that `remaining` is based on the end position of +// the last data edge returned to the caller, not the cumulative data returned +// to the caller which can be less in cases of skipping or seeking over data. +// +// For example, consider a cord btree with five data edges: "abc", "def", "ghi", +// "jkl" and "mno": +// +// absl::string_view sv; +// CordRepBtreeReader reader; +// +// sv = reader.Init(tree); // sv = "abc", remaining = 12 +// sv = reader.Skip(4); // sv = "hi", remaining = 6 +// sv = reader.Skip(2); // sv = "l", remaining = 3 +// sv = reader.Next(); // sv = "mno", remaining = 0 +// sv = reader.Seek(1); // sv = "bc", remaining = 12 +// +class CordRepBtreeReader { + public: + using ReadResult = CordRepBtreeNavigator::ReadResult; + using Position = CordRepBtreeNavigator::Position; + + // Returns true if this instance is not empty. + explicit operator bool() const { return navigator_.btree() != nullptr; } + + // Returns the tree referenced by this instance or nullptr if empty. + CordRepBtree* btree() const { return navigator_.btree(); } + + // Returns the current data edge inside the referenced btree. + // Requires that the current instance is not empty. + CordRep* node() const { return navigator_.Current(); } + + // Returns the length of the referenced tree. + // Requires that the current instance is not empty. + size_t length() const; + + // Returns the number of remaining bytes available for iteration, which is the + // number of bytes directly following the end of the last chunk returned. + // This value will be zero if we iterated over the last edge in the bound + // tree, in which case any call to Next() or Skip() will return an empty + // string_view reflecting the EOF state. + // Note that a call to `Seek()` resets `remaining` to a value based on the + // end position of the chunk returned by that call. + size_t remaining() const { return remaining_; } + + // Resets this instance to an empty value. + void Reset() { navigator_.Reset(); } + + // Initializes this instance with `tree`. `tree` must not be null. + // Returns a reference to the first data edge of the provided tree. + absl::string_view Init(CordRepBtree* tree); + + // Navigates to and returns the next data edge of the referenced tree. + // Returns an empty string_view if an attempt is made to read beyond the end + // of the tree, i.e.: if `remaining()` is zero indicating an EOF condition. + // Requires that the current instance is not empty. + absl::string_view Next(); + + // Skips the provided amount of bytes and returns a reference to the data + // directly following the skipped bytes. + absl::string_view Skip(size_t skip); + + // Reads `n` bytes into `tree`. + // If `chunk_size` is zero, starts reading at the next data edge. If + // `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of + // the last returned data edge. Effectively, this means that the read starts + // at offset `consumed() - chunk_size`. + // Requires that `chunk_size` is less than or equal to the length of the + // last returned data edge. The purpose of `chunk_size` is to simplify code + // partially consuming a returned chunk and wanting to include the remaining + // bytes in the Read call. For example, the below code will read 1000 bytes of + // data into a cord tree if the first chunk starts with "big:": + // + // CordRepBtreeReader reader; + // absl::string_view sv = reader.Init(tree); + // if (absl::StartsWith(sv, "big:")) { + // CordRepBtree tree; + // sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree); + // } + // + // This method will return an empty string view if all remaining data was + // read. If `n` exceeded the amount of remaining data this function will + // return an empty string view and `tree` will be set to nullptr. + // In both cases, `consumed` will be set to `length`. + absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree); + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling `Seek(13)` on a + // cord tree containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Returns an empty string view if `offset` is equal to or greater than the + // length of the referenced tree. + absl::string_view Seek(size_t offset); + + private: + size_t remaining_ = 0; + CordRepBtreeNavigator navigator_; +}; + +inline size_t CordRepBtreeReader::length() const { + assert(btree() != nullptr); + return btree()->length; +} + +inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) { + assert(tree != nullptr); + const CordRep* edge = navigator_.InitFirst(tree); + remaining_ = tree->length - edge->length; + return CordRepBtree::EdgeData(edge); +} + +inline absl::string_view CordRepBtreeReader::Next() { + if (remaining_ == 0) return {}; + const CordRep* edge = navigator_.Next(); + assert(edge != nullptr); + remaining_ -= edge->length; + return CordRepBtree::EdgeData(edge); +} + +inline absl::string_view CordRepBtreeReader::Skip(size_t skip) { + // As we are always positioned on the last 'consumed' edge, we + // need to skip the current edge as well as `skip`. + const size_t edge_length = navigator_.Current()->length; + CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + remaining_ = 0; + return {}; + } + // The combined length of all edges skipped before `pos.edge` is `skip - + // pos.offset`, all of which are 'consumed', as well as the current edge. + remaining_ -= skip - pos.offset + pos.edge->length; + return CordRepBtree::EdgeData(pos.edge).substr(pos.offset); +} + +inline absl::string_view CordRepBtreeReader::Seek(size_t offset) { + const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + remaining_ = 0; + return {}; + } + absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset); + remaining_ = length() - offset - chunk.length(); + return chunk; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader_test.cc new file mode 100644 index 0000000000..9b27a81fdb --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader_test.cc @@ -0,0 +1,293 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_reader.h" + +#include <iostream> +#include <random> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Ne; +using ::testing::Not; + +using ::absl::cordrep_testing::CordRepBtreeFromFlats; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; + +using ReadResult = CordRepBtreeReader::ReadResult; + +TEST(CordRepBtreeReaderTest, Next) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + CordRepBtreeReader reader; + size_t remaining = data.length(); + absl::string_view chunk = reader.Init(node); + EXPECT_THAT(chunk, Eq(data.substr(0, chunk.length()))); + + remaining -= chunk.length(); + EXPECT_THAT(reader.remaining(), Eq(remaining)); + + while (remaining > 0) { + const size_t offset = data.length() - remaining; + chunk = reader.Next(); + EXPECT_THAT(chunk, Eq(data.substr(offset, chunk.length()))); + + remaining -= chunk.length(); + EXPECT_THAT(reader.remaining(), Eq(remaining)); + } + + EXPECT_THAT(reader.remaining(), Eq(0)); + + // Verify trying to read beyond EOF returns empty string_view + EXPECT_THAT(reader.Next(), testing::IsEmpty()); + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, Skip) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t skip1 = 0; skip1 < data.length() - kChars; ++skip1) { + for (size_t skip2 = 0; skip2 < data.length() - kChars; ++skip2) { + CordRepBtreeReader reader; + size_t remaining = data.length(); + absl::string_view chunk = reader.Init(node); + remaining -= chunk.length(); + + chunk = reader.Skip(skip1); + size_t offset = data.length() - remaining; + ASSERT_THAT(chunk, Eq(data.substr(offset + skip1, chunk.length()))); + remaining -= chunk.length() + skip1; + ASSERT_THAT(reader.remaining(), Eq(remaining)); + + if (remaining == 0) continue; + + size_t skip = std::min(remaining - 1, skip2); + chunk = reader.Skip(skip); + offset = data.length() - remaining; + ASSERT_THAT(chunk, Eq(data.substr(offset + skip, chunk.length()))); + } + } + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, SkipBeyondLength) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeReader reader; + reader.Init(tree); + EXPECT_THAT(reader.Skip(100), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeReaderTest, Seek) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t seek = 0; seek < data.length() - 1; ++seek) { + CordRepBtreeReader reader; + reader.Init(node); + absl::string_view chunk = reader.Seek(seek); + ASSERT_THAT(chunk, Not(IsEmpty())); + ASSERT_THAT(chunk, Eq(data.substr(seek, chunk.length()))); + ASSERT_THAT(reader.remaining(), + Eq(data.length() - seek - chunk.length())); + } + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, SeekBeyondLength) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeReader reader; + reader.Init(tree); + EXPECT_THAT(reader.Seek(6), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + EXPECT_THAT(reader.Seek(100), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeReaderTest, Read) { + std::string data = "abcdefghijklmno"; + std::vector<CordRep*> flats = CreateFlatsFromString(data, 5); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + CordRep* tree; + CordRepBtreeReader reader; + absl::string_view chunk; + + // Read zero bytes + chunk = reader.Init(node); + chunk = reader.Read(0, chunk.length(), tree); + EXPECT_THAT(tree, Eq(nullptr)); + EXPECT_THAT(chunk, Eq("abcde")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + + // Read in full + chunk = reader.Init(node); + chunk = reader.Read(15, chunk.length(), tree); + EXPECT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("abcdefghijklmno")); + EXPECT_THAT(chunk, Eq("")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + // Read < chunk bytes + chunk = reader.Init(node); + chunk = reader.Read(3, chunk.length(), tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("abc")); + EXPECT_THAT(chunk, Eq("de")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + CordRep::Unref(tree); + + // Read < chunk bytes at offset + chunk = reader.Init(node); + chunk = reader.Read(2, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cd")); + EXPECT_THAT(chunk, Eq("e")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + CordRep::Unref(tree); + + // Read from consumed chunk + chunk = reader.Init(node); + chunk = reader.Read(3, 0, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("fgh")); + EXPECT_THAT(chunk, Eq("ij")); + EXPECT_THAT(reader.remaining(), Eq(5)); + EXPECT_THAT(reader.Next(), Eq("klmno")); + CordRep::Unref(tree); + + // Read across chunks + chunk = reader.Init(node); + chunk = reader.Read(12, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cdefghijklmn")); + EXPECT_THAT(chunk, Eq("o")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + // Read across chunks landing on exact edge boundary + chunk = reader.Init(node); + chunk = reader.Read(10 - 2, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cdefghij")); + EXPECT_THAT(chunk, Eq("klmno")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + CordRep::Unref(node); +} + +TEST(CordRepBtreeReaderTest, ReadExhaustive) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap + 1, cap * cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t read_size : {kChars - 1, kChars, kChars + 7, cap * cap}) { + CordRepBtreeReader reader; + absl::string_view chunk = reader.Init(node); + + // `consumed` tracks the end of last consumed chunk which is the start of + // the next chunk: we always read with `chunk_size = chunk.length()`. + size_t consumed = 0; + size_t remaining = data.length(); + while (remaining > 0) { + CordRep* tree; + size_t n = (std::min)(remaining, read_size); + chunk = reader.Read(n, chunk.length(), tree); + EXPECT_THAT(tree, Ne(nullptr)); + if (tree) { + EXPECT_THAT(CordToString(tree), Eq(data.substr(consumed, n))); + CordRep::Unref(tree); + } + + consumed += n; + remaining -= n; + EXPECT_THAT(reader.remaining(), Eq(remaining - chunk.length())); + + if (remaining > 0) { + ASSERT_FALSE(chunk.empty()); + ASSERT_THAT(chunk, Eq(data.substr(consumed, chunk.length()))); + } else { + ASSERT_TRUE(chunk.empty()) << chunk; + } + } + } + + CordRep::Unref(node); + } +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_test.cc new file mode 100644 index 0000000000..be9473d41d --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_test.cc @@ -0,0 +1,1489 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree.h" + +#include <cmath> +#include <deque> +#include <iostream> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/cleanup/cleanup.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +class CordRepBtreeTestPeer { + public: + static void SetEdge(CordRepBtree* node, size_t idx, CordRep* edge) { + node->edges_[idx] = edge; + } + static void AddEdge(CordRepBtree* node, CordRep* edge) { + node->edges_[node->fetch_add_end(1)] = edge; + } +}; + +namespace { + +using ::absl::cordrep_testing::AutoUnref; +using ::absl::cordrep_testing::CordCollectRepsIf; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CordVisitReps; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; +using ::absl::cordrep_testing::MakeConcat; +using ::absl::cordrep_testing::MakeExternal; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; +using ::testing::_; +using ::testing::AllOf; +using ::testing::AnyOf; +using ::testing::Conditional; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Le; +using ::testing::Ne; +using ::testing::Not; +using ::testing::SizeIs; +using ::testing::TypedEq; + +MATCHER_P(EqFlatHolding, data, "Equals flat holding data") { + if (arg->tag < FLAT) { + *result_listener << "Expected FLAT, got tag " << static_cast<int>(arg->tag); + return false; + } + std::string actual = CordToString(arg); + if (actual != data) { + *result_listener << "Expected flat holding \"" << data + << "\", got flat holding \"" << actual << "\""; + return false; + } + return true; +} + +MATCHER_P(IsNode, height, absl::StrCat("Is a valid node of height ", height)) { + if (arg == nullptr) { + *result_listener << "Expected NODE, got nullptr"; + return false; + } + if (arg->tag != BTREE) { + *result_listener << "Expected NODE, got " << static_cast<int>(arg->tag); + return false; + } + if (!CordRepBtree::IsValid(arg->btree())) { + CordRepBtree::Dump(arg->btree(), "Expected valid NODE, got:", false, + *result_listener->stream()); + return false; + } + if (arg->btree()->height() != height) { + *result_listener << "Expected NODE of height " << height << ", got " + << arg->btree()->height(); + return false; + } + return true; +} + +MATCHER_P2(IsSubstring, start, length, + absl::StrCat("Is a substring(start = ", start, ", length = ", length, + ")")) { + if (arg == nullptr) { + *result_listener << "Expected substring, got nullptr"; + return false; + } + if (arg->tag != SUBSTRING) { + *result_listener << "Expected SUBSTRING, got " + << static_cast<int>(arg->tag); + return false; + } + const CordRepSubstring* const substr = arg->substring(); + if (substr->start != start || substr->length != length) { + *result_listener << "Expected substring(" << start << ", " << length + << "), got substring(" << substr->start << ", " + << substr->length << ")"; + return false; + } + return true; +} + +// DataConsumer is a simple helper class used by tests to 'consume' string +// fragments from the provided input in forward or backward direction. +class DataConsumer { + public: + // Starts consumption of `data`. Caller must make sure `data` outlives this + // instance. Consumes data starting at the front if `forward` is true, else + // consumes data from the back. + DataConsumer(absl::string_view data, bool forward) + : data_(data), forward_(forward) {} + + // Return the next `n` bytes from referenced data. + absl::string_view Next(size_t n) { + assert(n <= data_.size() - consumed_); + consumed_ += n; + return data_.substr(forward_ ? consumed_ - n : data_.size() - consumed_, n); + } + + // Returns all data consumed so far. + absl::string_view Consumed() const { + return forward_ ? data_.substr(0, consumed_) + : data_.substr(data_.size() - consumed_); + } + + private: + absl::string_view data_; + size_t consumed_ = 0; + bool forward_; +}; + +// BtreeAdd returns either CordRepBtree::Append or CordRepBtree::Prepend. +CordRepBtree* BtreeAdd(CordRepBtree* node, bool append, + absl::string_view data) { + return append ? CordRepBtree::Append(node, data) + : CordRepBtree::Prepend(node, data); +} + +// Recursively collects all leaf edges from `tree` and appends them to `edges`. +void GetLeafEdges(const CordRepBtree* tree, std::vector<CordRep*>& edges) { + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + edges.push_back(edge); + } + } else { + for (CordRep* edge : tree->Edges()) { + GetLeafEdges(edge->btree(), edges); + } + } +} + +// Recursively collects and returns all leaf edges from `tree`. +std::vector<CordRep*> GetLeafEdges(const CordRepBtree* tree) { + std::vector<CordRep*> edges; + GetLeafEdges(tree, edges); + return edges; +} + +// Creates a flat containing the hexadecimal value of `i` zero padded +// to at least 4 digits prefixed with "0x", e.g.: "0x04AC". +CordRepFlat* MakeHexFlat(size_t i) { + return MakeFlat(absl::StrCat("0x", absl::Hex(i, absl::kZeroPad4))); +} + +CordRepBtree* MakeLeaf(size_t size = CordRepBtree::kMaxCapacity) { + assert(size <= CordRepBtree::kMaxCapacity); + CordRepBtree* leaf = CordRepBtree::Create(MakeHexFlat(0)); + for (size_t i = 1; i < size; ++i) { + leaf = CordRepBtree::Append(leaf, MakeHexFlat(i)); + } + return leaf; +} + +CordRepBtree* MakeTree(size_t size, bool append = true) { + CordRepBtree* tree = CordRepBtree::Create(MakeHexFlat(0)); + for (size_t i = 1; i < size; ++i) { + tree = append ? CordRepBtree::Append(tree, MakeHexFlat(i)) + : CordRepBtree::Prepend(tree, MakeHexFlat(i)); + } + return tree; +} + +CordRepBtree* CreateTree(absl::Span<CordRep* const> reps) { + auto it = reps.begin(); + CordRepBtree* tree = CordRepBtree::Create(*it); + while (++it != reps.end()) tree = CordRepBtree::Append(tree, *it); + return tree; +} + +CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) { + return CreateTree(CreateFlatsFromString(data, chunk_size)); +} + +CordRepBtree* CreateTreeReverse(absl::string_view data, size_t chunk_size) { + std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size); + auto rit = flats.rbegin(); + CordRepBtree* tree = CordRepBtree::Create(*rit); + while (++rit != flats.rend()) tree = CordRepBtree::Prepend(tree, *rit); + return tree; +} + +class CordRepBtreeTest : public testing::TestWithParam<bool> { + public: + bool shared() const { return GetParam(); } + + static std::string ToString(testing::TestParamInfo<bool> param) { + return param.param ? "Shared" : "Private"; + } +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeTest, testing::Bool(), + CordRepBtreeTest::ToString); + +class CordRepBtreeHeightTest : public testing::TestWithParam<int> { + public: + int height() const { return GetParam(); } + + static std::string ToString(testing::TestParamInfo<int> param) { + return absl::StrCat(param.param); + } +}; + +INSTANTIATE_TEST_SUITE_P(WithHeights, CordRepBtreeHeightTest, + testing::Range(0, CordRepBtree::kMaxHeight), + CordRepBtreeHeightTest::ToString); + +using TwoBools = testing::tuple<bool, bool>; + +class CordRepBtreeDualTest : public testing::TestWithParam<TwoBools> { + public: + bool first_shared() const { return std::get<0>(GetParam()); } + bool second_shared() const { return std::get<1>(GetParam()); } + + static std::string ToString(testing::TestParamInfo<TwoBools> param) { + if (std::get<0>(param.param)) { + return std::get<1>(param.param) ? "BothShared" : "FirstShared"; + } + return std::get<1>(param.param) ? "SecondShared" : "Private"; + } +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeDualTest, + testing::Combine(testing::Bool(), testing::Bool()), + CordRepBtreeDualTest::ToString); + +TEST(CordRepBtreeTest, SizeIsMultipleOf64) { + // Only enforce for fully 64-bit platforms. + if (sizeof(size_t) == 8 && sizeof(void*) == 8) { + EXPECT_THAT(sizeof(CordRepBtree) % 64, Eq(0)) << "Should be multiple of 64"; + } +} + +TEST(CordRepBtreeTest, NewDestroyEmptyTree) { + auto* tree = CordRepBtree::New(); + EXPECT_THAT(tree->size(), Eq(0)); + EXPECT_THAT(tree->height(), Eq(0)); + EXPECT_THAT(tree->Edges(), ElementsAre()); + CordRepBtree::Destroy(tree); +} + +TEST(CordRepBtreeTest, NewDestroyEmptyTreeAtHeight) { + auto* tree = CordRepBtree::New(3); + EXPECT_THAT(tree->size(), Eq(0)); + EXPECT_THAT(tree->height(), Eq(3)); + EXPECT_THAT(tree->Edges(), ElementsAre()); + CordRepBtree::Destroy(tree); +} + +TEST(CordRepBtreeTest, Btree) { + CordRep* rep = CordRepBtree::New(); + EXPECT_THAT(rep->btree(), Eq(rep)); + EXPECT_THAT(static_cast<const CordRep*>(rep)->btree(), Eq(rep)); + CordRep::Unref(rep); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + rep = MakeFlat("Hello world"); + EXPECT_DEATH(rep->btree(), ".*"); + EXPECT_DEATH(static_cast<const CordRep*>(rep)->btree(), ".*"); + CordRep::Unref(rep); +#endif +} + +TEST(CordRepBtreeTest, EdgeData) { + CordRepFlat* flat = MakeFlat("Hello world"); + CordRepExternal* external = MakeExternal("Hello external"); + CordRep* substr1 = MakeSubstring(1, 6, CordRep::Ref(flat)); + CordRep* substr2 = MakeSubstring(1, 6, CordRep::Ref(external)); + CordRep* concat = MakeConcat(CordRep::Ref(flat), CordRep::Ref(external)); + CordRep* bad_substr = MakeSubstring(1, 2, CordRep::Ref(substr1)); + + EXPECT_TRUE(CordRepBtree::IsDataEdge(flat)); + EXPECT_THAT(CordRepBtree::EdgeDataPtr(flat), + TypedEq<const void*>(flat->Data())); + EXPECT_THAT(CordRepBtree::EdgeData(flat), Eq("Hello world")); + + EXPECT_TRUE(CordRepBtree::IsDataEdge(external)); + EXPECT_THAT(CordRepBtree::EdgeDataPtr(external), + TypedEq<const void*>(external->base)); + EXPECT_THAT(CordRepBtree::EdgeData(external), Eq("Hello external")); + + EXPECT_TRUE(CordRepBtree::IsDataEdge(substr1)); + EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr1), + TypedEq<const void*>(flat->Data() + 1)); + EXPECT_THAT(CordRepBtree::EdgeData(substr1), Eq("ello w")); + + EXPECT_TRUE(CordRepBtree::IsDataEdge(substr2)); + EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr2), + TypedEq<const void*>(external->base + 1)); + EXPECT_THAT(CordRepBtree::EdgeData(substr2), Eq("ello e")); + + EXPECT_FALSE(CordRepBtree::IsDataEdge(concat)); + EXPECT_FALSE(CordRepBtree::IsDataEdge(bad_substr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(CordRepBtree::EdgeData(concat), ".*"); + EXPECT_DEATH(CordRepBtree::EdgeDataPtr(concat), ".*"); + EXPECT_DEATH(CordRepBtree::EdgeData(bad_substr), ".*"); + EXPECT_DEATH(CordRepBtree::EdgeDataPtr(bad_substr), ".*"); +#endif + + CordRep::Unref(bad_substr); + CordRep::Unref(concat); + CordRep::Unref(substr2); + CordRep::Unref(substr1); + CordRep::Unref(external); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, CreateUnrefLeaf) { + auto* flat = MakeFlat("a"); + auto* leaf = CordRepBtree::Create(flat); + EXPECT_THAT(leaf->size(), Eq(1)); + EXPECT_THAT(leaf->height(), Eq(0)); + EXPECT_THAT(leaf->Edges(), ElementsAre(flat)); + CordRepBtree::Unref(leaf); +} + +TEST(CordRepBtreeTest, NewUnrefNode) { + auto* leaf = CordRepBtree::Create(MakeFlat("a")); + CordRepBtree* tree = CordRepBtree::New(leaf); + EXPECT_THAT(tree->size(), Eq(1)); + EXPECT_THAT(tree->height(), Eq(1)); + EXPECT_THAT(tree->Edges(), ElementsAre(leaf)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeTest, AppendToLeafToCapacity) { + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.back()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + flats.push_back(MakeHexFlat(i)); + auto* result = CordRepBtree::Append(leaf, flats.back()); + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, PrependToLeafToCapacity) { + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_front(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.front()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + flats.push_front(MakeHexFlat(i)); + auto* result = CordRepBtree::Prepend(leaf, flats.front()); + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +// This test specifically aims at code aligning data at either the front or the +// back of the contained `edges[]` array, alternating Append and Prepend will +// move `begin()` and `end()` values as needed for each added value. +TEST_P(CordRepBtreeTest, AppendPrependToLeafToCapacity) { + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_front(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.front()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + CordRepBtree* result; + if (i % 2 != 0) { + flats.push_front(MakeHexFlat(i)); + result = CordRepBtree::Prepend(leaf, flats.front()); + } else { + flats.push_back(MakeHexFlat(i)); + result = CordRepBtree::Append(leaf, flats.back()); + } + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, AppendToLeafBeyondCapacity) { + AutoUnref refs; + auto* leaf = MakeLeaf(); + refs.RefIf(shared(), leaf); + CordRep* flat = MakeFlat("abc"); + auto* result = CordRepBtree::Append(leaf, flat); + ASSERT_THAT(result, IsNode(1)); + EXPECT_THAT(result, Ne(leaf)); + absl::Span<CordRep* const> edges = result->Edges(); + ASSERT_THAT(edges, ElementsAre(leaf, IsNode(0))); + EXPECT_THAT(edges[1]->btree()->Edges(), ElementsAre(flat)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, PrependToLeafBeyondCapacity) { + AutoUnref refs; + auto* leaf = MakeLeaf(); + refs.RefIf(shared(), leaf); + CordRep* flat = MakeFlat("abc"); + auto* result = CordRepBtree::Prepend(leaf, flat); + ASSERT_THAT(result, IsNode(1)); + EXPECT_THAT(result, Ne(leaf)); + absl::Span<CordRep* const> edges = result->Edges(); + ASSERT_THAT(edges, ElementsAre(IsNode(0), leaf)); + EXPECT_THAT(edges[0]->btree()->Edges(), ElementsAre(flat)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, AppendToTreeOneDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap; ++i) { + flats.push_back(MakeHexFlat(i)); + tree = CordRepBtree::Append(tree, flats.back()); + } + ASSERT_THAT(tree, IsNode(1)); + + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref leaf node once every 4 iterations, which should not have an + // observable effect other than that the leaf itself is copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 4 == 0, tree->Edges().back()); + + flats.push_back(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Append(tree, flats.back()); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap * max_cap; ++i) { + flats.push_back(MakeHexFlat(i)); + tree = CordRepBtree::Append(tree, flats.back()); + } + ASSERT_THAT(tree, IsNode(2)); + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref child node once every 16 iterations, and leaf node every 4 + // iterrations which which should not have an observable effect other than + // the node and/or the leaf below it being copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 16 == 0, tree->Edges().back()); + refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back()); + + flats.push_back(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Append(tree, flats.back()); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, PrependToTreeOneDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap; ++i) { + flats.push_front(MakeHexFlat(i)); + tree = CordRepBtree::Prepend(tree, flats.front()); + } + ASSERT_THAT(tree, IsNode(1)); + + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref leaf node once every 4 iterations which should not have an observable + // effect other than than the leaf itself is copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 4 == 0, tree->Edges().back()); + + flats.push_front(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front()); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap * max_cap; ++i) { + flats.push_front(MakeHexFlat(i)); + tree = CordRepBtree::Prepend(tree, flats.front()); + } + ASSERT_THAT(tree, IsNode(2)); + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref child node once every 16 iterations, and leaf node every 4 + // iterrations which which should not have an observable effect other than + // the node and/or the leaf below it being copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 16 == 0, tree->Edges().back()); + refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back()); + + flats.push_front(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front()); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeDualTest, MergeLeafsNotExceedingCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side leaf appending all contained flats to `flats` + CordRepBtree* left = MakeLeaf(3); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side leaf appending all contained flats to `flats` + CordRepBtree* right = MakeLeaf(2); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(0)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(tree->Edges(), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafsExceedingCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + + // Build `left` side tree appending all contained flats to `flats` + CordRepBtree* left = MakeLeaf(CordRepBtree::kMaxCapacity - 2); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all contained flats to `flats` + CordRepBtree* right = MakeLeaf(CordRepBtree::kMaxCapacity - 1); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), ElementsAre(left, right)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeEqualHeightTrees) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all contained flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all contained flats to `flats` + CordRepBtree* right = MakeTree(CordRepBtree::kMaxCapacity * 2); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(5)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeNotExceedingLeafCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all added flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 2 + 2); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all added flats to `flats` + CordRepBtree* right = MakeTree(3); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(3)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeExceedingLeafCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all added flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3 - 2); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all added flats to `flats` + CordRepBtree* right = MakeTree(3); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(4)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +void RefEdgesAt(size_t depth, AutoUnref& refs, CordRepBtree* tree) { + absl::Span<CordRep* const> edges = tree->Edges(); + if (depth == 0) { + refs.Ref(edges.front()); + refs.Ref(edges.back()); + } else { + assert(tree->height() > 0); + RefEdgesAt(depth - 1, refs, edges.front()->btree()); + RefEdgesAt(depth - 1, refs, edges.back()->btree()); + } +} + +TEST(CordRepBtreeTest, MergeFuzzTest) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + std::minstd_rand rnd; + std::uniform_int_distribution<int> coin_flip(0, 1); + std::uniform_int_distribution<int> dice_throw(1, 6); + + auto random_leaf_count = [&]() { + std::uniform_int_distribution<int> dist_height(0, 3); + std::uniform_int_distribution<int> dist_leaf(0, max_cap - 1); + const size_t height = dist_height(rnd); + return (height ? pow(max_cap, height) : 0) + dist_leaf(rnd); + }; + + for (int i = 0; i < 10000; ++i) { + AutoUnref refs; + std::vector<CordRep*> flats; + + CordRepBtree* left = MakeTree(random_leaf_count(), coin_flip(rnd)); + GetLeafEdges(left, flats); + if (dice_throw(rnd) == 1) { + std::uniform_int_distribution<int> dist(0, left->height()); + RefEdgesAt(dist(rnd), refs, left); + } + + CordRepBtree* right = MakeTree(random_leaf_count(), coin_flip(rnd)); + GetLeafEdges(right, flats); + if (dice_throw(rnd) == 1) { + std::uniform_int_distribution<int> dist(0, right->height()); + RefEdgesAt(dist(rnd), refs, right); + } + + CordRepBtree* tree = CordRepBtree::Append(left, right); + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeTest, RemoveSuffix) { + // Create tree of 1, 2 and 3 levels high + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + for (size_t cap : {max_cap - 1, max_cap * 2, max_cap * max_cap * 2}) { + const std::string data = CreateRandomString(cap * 512); + + { + // Verify RemoveSuffix(<all>) + AutoUnref refs; + CordRepBtree* node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, data.length()), Eq(nullptr)); + + // Verify RemoveSuffix(<none>) + node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, 0), Eq(node)); + CordRep::Unref(node); + } + + for (int n = 1; n < data.length(); ++n) { + AutoUnref refs; + auto flats = CreateFlatsFromString(data, 512); + CordRepBtree* node = refs.RefIf(shared(), CreateTree(flats)); + CordRep* rep = refs.Add(CordRepBtree::RemoveSuffix(node, n)); + EXPECT_THAT(CordToString(rep), Eq(data.substr(0, data.length() - n))); + + // Collect all flats + auto is_flat = [](CordRep* rep) { return rep->tag >= FLAT; }; + std::vector<CordRep*> edges = CordCollectRepsIf(is_flat, rep); + ASSERT_THAT(edges.size(), Le(flats.size())); + + // Isolate last edge + CordRep* last_edge = edges.back(); + edges.pop_back(); + const size_t last_length = rep->length - edges.size() * 512; + + // All flats except the last edge must be kept or copied 'as is' + int index = 0; + for (CordRep* edge : edges) { + ASSERT_THAT(edge, Eq(flats[index++])); + ASSERT_THAT(edge->length, Eq(512)); + } + + // CordRepBtree may optimize small substrings to avoid waste, so only + // check for flat sharing / updates where the code should always do this. + if (last_length >= 500) { + EXPECT_THAT(last_edge, Eq(flats[index++])); + if (shared()) { + EXPECT_THAT(last_edge->length, Eq(512)); + } else { + EXPECT_TRUE(last_edge->refcount.IsOne()); + EXPECT_THAT(last_edge->length, Eq(last_length)); + } + } + } + } +} + +TEST(CordRepBtreeTest, SubTree) { + // Create tree of at least 2 levels high + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * 2; + const std::string data = CreateRandomString(n * 3); + std::vector<CordRep*> flats; + for (absl::string_view s = data; !s.empty(); s.remove_prefix(3)) { + flats.push_back(MakeFlat(s.substr(0, 3))); + } + CordRepBtree* node = CordRepBtree::Create(CordRep::Ref(flats[0])); + for (size_t i = 1; i < flats.size(); ++i) { + node = CordRepBtree::Append(node, CordRep::Ref(flats[i])); + } + + for (int offset = 0; offset < data.length(); ++offset) { + for (int length = 1; length <= data.length() - offset; ++length) { + CordRep* rep = node->SubTree(offset, length); + EXPECT_THAT(CordToString(rep), Eq(data.substr(offset, length))); + CordRep::Unref(rep); + } + } + CordRepBtree::Unref(node); + for (CordRep* rep : flats) { + CordRep::Unref(rep); + } +} + +TEST(CordRepBtreeTest, SubTreeOnExistingSubstring) { + // This test verifies that a SubTree call on a pre-existing (large) substring + // adjusts the existing substring if not shared, and else rewrites the + // existing substring. + AutoUnref refs; + std::string data = CreateRandomString(1000); + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + CordRep* flat = MakeFlat(data); + leaf = CordRepBtree::Append(leaf, flat); + + // Setup tree containing substring. + CordRep* result = leaf->SubTree(0, 3 + 990); + ASSERT_THAT(result->tag, Eq(BTREE)); + CordRep::Unref(leaf); + leaf = result->btree(); + ASSERT_THAT(leaf->Edges(), ElementsAre(_, IsSubstring(0, 990))); + EXPECT_THAT(leaf->Edges()[1]->substring()->child, Eq(flat)); + + // Verify substring of substring. + result = leaf->SubTree(3 + 5, 970); + ASSERT_THAT(result, IsSubstring(5, 970)); + EXPECT_THAT(result->substring()->child, Eq(flat)); + CordRep::Unref(result); + + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, AddDataToLeaf) { + const size_t n = CordRepBtree::kMaxCapacity; + const std::string data = CreateRandomString(n * 3); + + for (bool append : {true, false}) { + AutoUnref refs; + DataConsumer consumer(data, append); + SCOPED_TRACE(append ? "Append" : "Prepend"); + + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat(consumer.Next(3))); + for (size_t i = 1; i < n; ++i) { + refs.RefIf(shared(), leaf); + CordRepBtree* result = BtreeAdd(leaf, append, consumer.Next(3)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(CordToString(result), Eq(consumer.Consumed())); + leaf = result; + } + CordRep::Unref(leaf); + } +} + +TEST_P(CordRepBtreeTest, AppendDataToTree) { + AutoUnref refs; + size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = refs.RefIf(shared(), CreateTree(data, 3)); + CordRepBtree* leaf0 = tree->Edges()[0]->btree(); + CordRepBtree* leaf1 = tree->Edges()[1]->btree(); + CordRepBtree* result = CordRepBtree::Append(tree, "123456789"); + EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + EXPECT_THAT(result->Edges(), + ElementsAre(leaf0, Conditional(shared(), Ne(leaf1), Eq(leaf1)))); + EXPECT_THAT(CordToString(result), Eq(data + "123456789")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, PrependDataToTree) { + AutoUnref refs; + size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = refs.RefIf(shared(), CreateTreeReverse(data, 3)); + CordRepBtree* leaf0 = tree->Edges()[0]->btree(); + CordRepBtree* leaf1 = tree->Edges()[1]->btree(); + CordRepBtree* result = CordRepBtree::Prepend(tree, "123456789"); + EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + EXPECT_THAT(result->Edges(), + ElementsAre(Conditional(shared(), Ne(leaf0), Eq(leaf0)), leaf1)); + EXPECT_THAT(CordToString(result), Eq("123456789" + data)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, AddDataToTreeThreeLevelsDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * max_cap; + const std::string data = CreateRandomString(n * 3); + + for (bool append : {true, false}) { + AutoUnref refs; + DataConsumer consumer(data, append); + SCOPED_TRACE(append ? "Append" : "Prepend"); + + // Fill leaf + CordRepBtree* tree = CordRepBtree::Create(MakeFlat(consumer.Next(3))); + for (size_t i = 1; i < max_cap; ++i) { + tree = BtreeAdd(tree, append, consumer.Next(3)); + } + ASSERT_THAT(CordToString(tree), Eq(consumer.Consumed())); + + // Fill to maximum at one deep + refs.RefIf(shared(), tree); + CordRepBtree* result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Ne(tree)); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + } + + // Fill to maximum at two deep + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Ne(tree)); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; + ++i) { + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + } + + CordRep::Unref(tree); + } +} + +TEST_P(CordRepBtreeTest, AddLargeDataToLeaf) { + const size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * max_cap * 3 + 2; + const std::string data = CreateRandomString(n * kMaxFlatLength); + + for (bool append : {true, false}) { + AutoUnref refs; + SCOPED_TRACE(append ? "Append" : "Prepend"); + + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + refs.RefIf(shared(), leaf); + CordRepBtree* result = BtreeAdd(leaf, append, data); + EXPECT_THAT(CordToString(result), Eq(append ? "abc" + data : data + "abc")); + CordRep::Unref(result); + } +} + +TEST_P(CordRepBtreeDualTest, CreateFromConcat) { + AutoUnref refs; + CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; + auto* left = MakeConcat(flats[0], flats[1]); + auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3])); + auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right)); + CordRepBtree* result = CordRepBtree::Create(concat); + ASSERT_TRUE(CordRepBtree::IsValid(result)); + EXPECT_THAT(result->length, Eq(26)); + EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeDualTest, AppendConcat) { + AutoUnref refs; + CordRep* flats[] = {MakeFlat("defgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; + auto* left = MakeConcat(flats[0], flats[1]); + auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3])); + auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right)); + CordRepBtree* result = CordRepBtree::Create(MakeFlat("abc")); + result = CordRepBtree::Append(result, concat); + ASSERT_TRUE(CordRepBtree::IsValid(result)); + EXPECT_THAT(result->length, Eq(26)); + EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeDualTest, PrependConcat) { + AutoUnref refs; + CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wx")}; + auto* left = MakeConcat(flats[0], flats[1]); + auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3])); + auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right)); + CordRepBtree* result = CordRepBtree::Create(MakeFlat("yz")); + result = CordRepBtree::Prepend(result, concat); + ASSERT_TRUE(CordRepBtree::IsValid(result)); + EXPECT_THAT(result->length, Eq(26)); + EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, CreateFromTreeReturnsTree) { + AutoUnref refs; + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world")); + refs.RefIf(shared(), leaf); + CordRepBtree* result = CordRepBtree::Create(leaf); + EXPECT_THAT(result, Eq(leaf)); + CordRep::Unref(result); +} + +TEST(CordRepBtreeTest, GetCharacter) { + size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = CreateTree(data, 3); + // Add a substring node for good measure. + tree = tree->Append(tree, MakeSubstring(4, 5, MakeFlat("abcdefghijklm"))); + data += "efghi"; + for (size_t i = 0; i < data.length(); ++i) { + ASSERT_THAT(tree->GetCharacter(i), Eq(data[i])); + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, IsFlatSingleFlat) { + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world")); + + absl::string_view fragment; + EXPECT_TRUE(leaf->IsFlat(nullptr)); + EXPECT_TRUE(leaf->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + fragment = ""; + EXPECT_TRUE(leaf->IsFlat(0, 11, nullptr)); + EXPECT_TRUE(leaf->IsFlat(0, 11, &fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + + // Arbitrary ranges must check true as well. + EXPECT_TRUE(leaf->IsFlat(1, 4, &fragment)); + EXPECT_THAT(fragment, Eq("ello")); + EXPECT_TRUE(leaf->IsFlat(6, 5, &fragment)); + EXPECT_THAT(fragment, Eq("world")); + + CordRep::Unref(leaf); +} + +TEST(CordRepBtreeTest, IsFlatMultiFlat) { + size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = CreateTree(data, 3); + // Add substring nodes for good measure. + tree = tree->Append(tree, MakeSubstring(4, 3, MakeFlat("abcdefghijklm"))); + tree = tree->Append(tree, MakeSubstring(8, 3, MakeFlat("abcdefghijklm"))); + data += "efgijk"; + + EXPECT_FALSE(tree->IsFlat(nullptr)); + absl::string_view fragment = "Can't touch this"; + EXPECT_FALSE(tree->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + + for (size_t offset = 0; offset < data.size(); offset += 3) { + EXPECT_TRUE(tree->IsFlat(offset, 3, nullptr)); + EXPECT_TRUE(tree->IsFlat(offset, 3, &fragment)); + EXPECT_THAT(fragment, Eq(data.substr(offset, 3))); + + fragment = "Can't touch this"; + if (offset > 0) { + EXPECT_FALSE(tree->IsFlat(offset - 1, 4, nullptr)); + EXPECT_FALSE(tree->IsFlat(offset - 1, 4, &fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + } + if (offset < data.size() - 4) { + EXPECT_FALSE(tree->IsFlat(offset, 4, nullptr)); + EXPECT_FALSE(tree->IsFlat(offset, 4, &fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + } + } + + CordRep::Unref(tree); +} + +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotPrivate) { + CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo")); + CordRepBtree::Ref(tree); + EXPECT_DEATH(tree->GetAppendBuffer(1), ".*"); + CordRepBtree::Unref(tree); + CordRepBtree::Unref(tree); +} + +#endif // defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotFlat) { + CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo")); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNotPrivate) { + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat)); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); + CordRep::Unref(flat); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferTreeNotPrivate) { + if (height() == 0) return; + AutoUnref refs; + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat)); + for (int i = 1; i <= height(); ++i) { + if (i == (height() + 1) / 2) refs.Ref(tree); + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); + CordRep::Unref(flat); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNoCapacity) { + CordRepFlat* flat = MakeFlat("abc"); + flat->length = flat->Capacity(); + CordRepBtree* tree = CordRepBtree::Create(flat); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatWithCapacity) { + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(flat); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + absl::Span<char> span = tree->GetAppendBuffer(2); + EXPECT_THAT(span, SizeIs(2)); + EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 3)); + EXPECT_THAT(tree->length, Eq(5)); + + size_t avail = flat->Capacity() - 5; + span = tree->GetAppendBuffer(avail + 100); + EXPECT_THAT(span, SizeIs(avail)); + EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 5)); + EXPECT_THAT(tree->length, Eq(5 + avail)); + + CordRepBtree::Unref(tree); +} + +TEST(CordRepBtreeTest, Dump) { + // Handles nullptr + std::stringstream ss; + CordRepBtree::Dump(nullptr, ss); + CordRepBtree::Dump(nullptr, "Once upon a label", ss); + CordRepBtree::Dump(nullptr, "Once upon a label", false, ss); + CordRepBtree::Dump(nullptr, "Once upon a label", true, ss); + + // Cover legal edges + CordRepFlat* flat = MakeFlat("Hello world"); + CordRepExternal* external = MakeExternal("Hello external"); + CordRep* substr_flat = MakeSubstring(1, 6, CordRep::Ref(flat)); + CordRep* substr_external = MakeSubstring(2, 7, CordRep::Ref(external)); + + // Build tree + CordRepBtree* tree = CordRepBtree::Create(flat); + tree = CordRepBtree::Append(tree, external); + tree = CordRepBtree::Append(tree, substr_flat); + tree = CordRepBtree::Append(tree, substr_external); + + // Repeat until we have a tree + while (tree->height() == 0) { + tree = CordRepBtree::Append(tree, CordRep::Ref(flat)); + tree = CordRepBtree::Append(tree, CordRep::Ref(external)); + tree = CordRepBtree::Append(tree, CordRep::Ref(substr_flat)); + tree = CordRepBtree::Append(tree, CordRep::Ref(substr_external)); + } + + for (int api = 0; api <= 3; ++api) { + absl::string_view api_scope; + std::stringstream ss; + switch (api) { + case 0: + api_scope = "Bare"; + CordRepBtree::Dump(tree, ss); + break; + case 1: + api_scope = "Label only"; + CordRepBtree::Dump(tree, "Once upon a label", ss); + break; + case 2: + api_scope = "Label no content"; + CordRepBtree::Dump(tree, "Once upon a label", false, ss); + break; + default: + api_scope = "Label and content"; + CordRepBtree::Dump(tree, "Once upon a label", true, ss); + break; + } + SCOPED_TRACE(api_scope); + std::string str = ss.str(); + + // Contains Node(depth) / Leaf and private / shared indicators + EXPECT_THAT(str, AllOf(HasSubstr("Node(1)"), HasSubstr("Leaf"), + HasSubstr("Private"), HasSubstr("Shared"))); + + // Contains length and start offset of all data edges + EXPECT_THAT(str, AllOf(HasSubstr("len = 11"), HasSubstr("len = 14"), + HasSubstr("len = 6"), HasSubstr("len = 7"), + HasSubstr("start = 1"), HasSubstr("start = 2"))); + + // Contains address of all data edges + EXPECT_THAT( + str, AllOf(HasSubstr(absl::StrCat("0x", absl::Hex(flat))), + HasSubstr(absl::StrCat("0x", absl::Hex(external))), + HasSubstr(absl::StrCat("0x", absl::Hex(substr_flat))), + HasSubstr(absl::StrCat("0x", absl::Hex(substr_external))))); + + if (api != 0) { + // Contains label + EXPECT_THAT(str, HasSubstr("Once upon a label")); + } + + if (api != 3) { + // Does not contain contents + EXPECT_THAT(str, Not(AnyOf((HasSubstr("data = \"Hello world\""), + HasSubstr("data = \"Hello external\""), + HasSubstr("data = \"ello w\""), + HasSubstr("data = \"llo ext\""))))); + } else { + // Contains contents + EXPECT_THAT(str, AllOf((HasSubstr("data = \"Hello world\""), + HasSubstr("data = \"Hello external\""), + HasSubstr("data = \"ello w\""), + HasSubstr("data = \"llo ext\"")))); + } + } + + CordRep::Unref(tree); +} + +TEST(CordRepBtreeTest, IsValid) { + EXPECT_FALSE(CordRepBtree::IsValid(nullptr)); + + CordRepBtree* empty = CordRepBtree::New(0); + EXPECT_TRUE(CordRepBtree::IsValid(empty)); + CordRep::Unref(empty); + + for (bool as_tree : {false, true}) { + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtree* tree = as_tree ? CordRepBtree::New(leaf) : nullptr; + CordRepBtree* check = as_tree ? tree : leaf; + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->length--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->length++; + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->tag--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->tag++; + + // Height + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->storage[0] = static_cast<uint8_t>(CordRepBtree::kMaxHeight + 1); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[0] = 1; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[0] = 0; + + // Begin + ASSERT_TRUE(CordRepBtree::IsValid(check)); + const uint8_t begin = leaf->storage[1]; + leaf->storage[1] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[1] = 2; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[1] = begin; + + // End + ASSERT_TRUE(CordRepBtree::IsValid(check)); + const uint8_t end = leaf->storage[2]; + leaf->storage[2] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity + 1); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[2] = end; + + // DataEdge tag and value + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep* const edge = leaf->Edges()[0]; + const uint8_t tag = edge->tag; + CordRepBtreeTestPeer::SetEdge(leaf, begin, nullptr); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + CordRepBtreeTestPeer::SetEdge(leaf, begin, edge); + edge->tag = BTREE; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + edge->tag = tag; + + if (as_tree) { + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->length--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->length++; + + // Height + ASSERT_TRUE(CordRepBtree::IsValid(check)); + tree->storage[0] = static_cast<uint8_t>(2); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + tree->storage[0] = 1; + + // Btree edge + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep* const edge = tree->Edges()[0]; + const uint8_t tag = edge->tag; + edge->tag = FLAT; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + edge->tag = tag; + } + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep::Unref(check); + } +} + +TEST(CordRepBtreeTest, AssertValid) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + const CordRepBtree* ctree = tree; + EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree)); + EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree)); + +#if defined(GTEST_HAS_DEATH_TEST) + CordRepBtree* nulltree = nullptr; + const CordRepBtree* cnulltree = nullptr; + EXPECT_DEBUG_DEATH( + EXPECT_THAT(CordRepBtree::AssertValid(nulltree), Eq(nulltree)), ".*"); + EXPECT_DEBUG_DEATH( + EXPECT_THAT(CordRepBtree::AssertValid(cnulltree), Eq(cnulltree)), ".*"); + + tree->length--; + EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree)), + ".*"); + EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree)), + ".*"); + tree->length++; +#endif + CordRep::Unref(tree); +} + +TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { + // Restore exhaustive validation on any exit. + const bool exhaustive_validation = cord_btree_exhaustive_validation.load(); + auto cleanup = absl::MakeCleanup([exhaustive_validation] { + cord_btree_exhaustive_validation.store(exhaustive_validation); + }); + + // Create a tree of at least 2 levels, and mess with the original flat, which + // should go undetected in shallow mode as the flat is too far away, but + // should be detected in forced non-shallow mode. + CordRep* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(flat); + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * 2; + for (size_t i = 0; i < n; ++i) { + tree = CordRepBtree::Append(tree, MakeFlat("Hello world")); + } + flat->length = 100; + + cord_btree_exhaustive_validation.store(false); + EXPECT_FALSE(CordRepBtree::IsValid(tree)); + EXPECT_TRUE(CordRepBtree::IsValid(tree, true)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); + CordRepBtree::AssertValid(tree); + CordRepBtree::AssertValid(tree, true); +#if defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*"); +#endif + + cord_btree_exhaustive_validation.store(true); + EXPECT_FALSE(CordRepBtree::IsValid(tree)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, true)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); +#if defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree), ".*"); + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, true), ".*"); +#endif + + flat->length = 3; + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, Rebuild) { + for (size_t size : {3, 8, 100, 10000, 1000000}) { + SCOPED_TRACE(absl::StrCat("Rebuild @", size)); + + std::vector<CordRepFlat*> flats; + for (int i = 0; i < size; ++i) { + flats.push_back(CordRepFlat::New(2)); + flats.back()->Data()[0] = 'x'; + flats.back()->length = 1; + } + + // Build the tree into 'right', and each so many 'split_limit' edges, + // combine 'left' + 'right' into a new 'left', and start a new 'right'. + // This guarantees we get a reasonable amount of chaos in the tree. + size_t split_count = 0; + size_t split_limit = 3; + auto it = flats.begin(); + CordRepBtree* left = nullptr; + CordRepBtree* right = CordRepBtree::New(*it); + while (++it != flats.end()) { + if (++split_count >= split_limit) { + split_limit += split_limit / 16; + left = left ? CordRepBtree::Append(left, right) : right; + right = CordRepBtree::New(*it); + } else { + right = CordRepBtree::Append(right, *it); + } + } + + // Finalize tree + left = left ? CordRepBtree::Append(left, right) : right; + + // Rebuild + AutoUnref ref; + left = ref.Add(CordRepBtree::Rebuild(ref.RefIf(shared(), left))); + ASSERT_TRUE(CordRepBtree::IsValid(left)); + + // Verify we have the exact same edges in the exact same order. + bool ok = true; + it = flats.begin(); + CordVisitReps(left, [&](CordRep* edge) { + if (edge->tag < FLAT) return; + ok = ok && (it != flats.end() && *it++ == edge); + }); + EXPECT_TRUE(ok && it == flats.end()) << "Rebuild edges mismatch"; + } +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.cc new file mode 100644 index 0000000000..81514543db --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.cc @@ -0,0 +1,129 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_consume.h" + +#include <array> +#include <utility> + +#include "absl/container/inlined_vector.h" +#include "absl/functional/function_ref.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +// Unrefs the provided `substring`, and returns `substring->child` +// Adds or assumes a reference on `substring->child` +CordRep* ClipSubstring(CordRepSubstring* substring) { + CordRep* child = substring->child; + if (substring->refcount.IsOne()) { + delete substring; + } else { + CordRep::Ref(child); + CordRep::Unref(substring); + } + return child; +} + +// Unrefs the provided `concat`, and returns `{concat->left, concat->right}` +// Adds or assumes a reference on `concat->left` and `concat->right`. +// Returns an array of 2 elements containing the left and right nodes. +std::array<CordRep*, 2> ClipConcat(CordRepConcat* concat) { + std::array<CordRep*, 2> result{concat->left, concat->right}; + if (concat->refcount.IsOne()) { + delete concat; + } else { + CordRep::Ref(result[0]); + CordRep::Ref(result[1]); + CordRep::Unref(concat); + } + return result; +} + +void Consume(bool forward, CordRep* rep, ConsumeFn consume_fn) { + size_t offset = 0; + size_t length = rep->length; + struct Entry { + CordRep* rep; + size_t offset; + size_t length; + }; + absl::InlinedVector<Entry, 40> stack; + + for (;;) { + if (rep->tag == CONCAT) { + std::array<CordRep*, 2> res = ClipConcat(rep->concat()); + CordRep* left = res[0]; + CordRep* right = res[1]; + + if (left->length <= offset) { + // Don't need left node + offset -= left->length; + CordRep::Unref(left); + rep = right; + continue; + } + + size_t length_left = left->length - offset; + if (length_left >= length) { + // Don't need right node + CordRep::Unref(right); + rep = left; + continue; + } + + // Need both nodes + size_t length_right = length - length_left; + if (forward) { + stack.push_back({right, 0, length_right}); + rep = left; + length = length_left; + } else { + stack.push_back({left, offset, length_left}); + rep = right; + offset = 0; + length = length_right; + } + } else if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = ClipSubstring(rep->substring()); + } else { + consume_fn(rep, offset, length); + if (stack.empty()) return; + + rep = stack.back().rep; + offset = stack.back().offset; + length = stack.back().length; + stack.pop_back(); + } + } +} + +} // namespace + +void Consume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(true, rep, std::move(consume_fn)); +} + +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(false, rep, std::move(consume_fn)); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.h new file mode 100644 index 0000000000..d46fca2b21 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.h @@ -0,0 +1,50 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ + +#include <functional> + +#include "absl/functional/function_ref.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Functor for the Consume() and ReverseConsume() functions: +// void ConsumeFunc(CordRep* rep, size_t offset, size_t length); +// See the Consume() and ReverseConsume() function comments for documentation. +using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>; + +// Consume() and ReverseConsume() consume CONCAT based trees and invoke the +// provided functor with the contained nodes in the proper forward or reverse +// order, which is used to convert CONCAT trees into other tree or cord data. +// All CONCAT and SUBSTRING nodes are processed internally. The 'offset` +// parameter of the functor is non-zero for any nodes below SUBSTRING nodes. +// It's up to the caller to form these back into SUBSTRING nodes or otherwise +// store offset / prefix information. These functions are intended to be used +// only for migration / transitional code where due to factors such as ODR +// violations, we can not 100% guarantee that all code respects 'new format' +// settings and flags, so we need to be able to parse old data on the fly until +// all old code is deprecated / no longer the default format. +void Consume(CordRep* rep, ConsumeFn consume_fn); +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume_test.cc new file mode 100644 index 0000000000..e507824b4f --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume_test.cc @@ -0,0 +1,173 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_consume.h" + +#include <functional> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using testing::InSequence; +using testing::MockFunction; + +// Returns the depth of a node +int Depth(const CordRep* rep) { + return (rep->tag == CONCAT) ? rep->concat()->depth() : 0; +} + +// Creates a concatenation of the specified nodes. +CordRepConcat* CreateConcat(CordRep* left, CordRep* right) { + auto* concat = new CordRepConcat(); + concat->tag = CONCAT; + concat->left = left; + concat->right = right; + concat->length = left->length + right->length; + concat->set_depth(1 + (std::max)(Depth(left), Depth(right))); + return concat; +} + +// Creates a flat with the length set to `length` +CordRepFlat* CreateFlatWithLength(size_t length) { + auto* flat = CordRepFlat::New(length); + flat->length = length; + return flat; +} + +// Creates a substring node on the specified child. +CordRepSubstring* CreateSubstring(CordRep* child, size_t start, size_t length) { + auto* rep = new CordRepSubstring(); + rep->length = length; + rep->tag = SUBSTRING; + rep->start = start; + rep->child = child; + return rep; +} + +// Flats we use in the tests +CordRep* flat[6]; + +// Creates a test tree +CordRep* CreateTestTree() { + flat[0] = CreateFlatWithLength(1); + flat[1] = CreateFlatWithLength(7); + CordRepConcat* left = CreateConcat(flat[0], CreateSubstring(flat[1], 2, 4)); + + flat[2] = CreateFlatWithLength(9); + flat[3] = CreateFlatWithLength(13); + CordRepConcat* right1 = CreateConcat(flat[2], flat[3]); + + flat[4] = CreateFlatWithLength(15); + flat[5] = CreateFlatWithLength(19); + CordRepConcat* right2 = CreateConcat(flat[4], flat[5]); + + CordRepConcat* right = CreateConcat(right1, CreateSubstring(right2, 5, 17)); + return CreateConcat(left, right); +} + +TEST(CordRepConsumeTest, Consume) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + Consume(tree, consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_TRUE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } +} + +TEST(CordRepConsumeTest, ConsumeShared) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + Consume(CordRep::Ref(tree), consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_FALSE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } + CordRep::Unref(tree); +} + +TEST(CordRepConsumeTest, Reverse) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + ReverseConsume(tree, consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_TRUE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } +} + +TEST(CordRepConsumeTest, ReverseShared) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + ReverseConsume(CordRep::Ref(tree), consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_FALSE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } + CordRep::Unref(tree); +} + +TEST(CordRepConsumeTest, UnreachableFlat) { + InSequence in_sequence; + CordRepFlat* flat1 = CreateFlatWithLength(10); + CordRepFlat* flat2 = CreateFlatWithLength(20); + CordRepConcat* concat = CreateConcat(flat1, flat2); + CordRepSubstring* tree = CreateSubstring(concat, 15, 10); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat2, 5, 10)); + Consume(tree, consume.AsStdFunction()); + EXPECT_TRUE(flat2->refcount.IsOne()); + CordRep::Unref(flat2); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_flat.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_flat.h new file mode 100644 index 0000000000..4d0f988697 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_flat.h @@ -0,0 +1,146 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> + +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Note: all constants below are never ODR used and internal to cord, we define +// these as static constexpr to avoid 'in struct' definition and usage clutter. + +// Largest and smallest flat node lengths we are willing to allocate +// Flat allocation size is stored in tag, which currently can encode sizes up +// to 4K, encoded as multiple of either 8 or 32 bytes. +// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. +// kMinFlatSize is bounded by tag needing to be at least FLAT * 8 bytes, and +// ideally a 'nice' size aligning with allocation and cacheline sizes like 32. +// kMaxFlatSize is bounded by the size resulting in a computed tag no greater +// than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values. +static constexpr size_t kFlatOverhead = offsetof(CordRep, storage); +static constexpr size_t kMinFlatSize = 32; +static constexpr size_t kMaxFlatSize = 4096; +static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; +static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead; + +constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) { + return static_cast<uint8_t>((size <= 1024) ? size / 8 + 1 + : 129 + size / 32 - 1024 / 32); +} + +static_assert(kMinFlatSize / 8 + 1 >= FLAT, ""); +static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, ""); + +// Helper functions for rounded div, and rounding to exact sizes. +constexpr size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +constexpr size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } + +// Returns the size to the nearest equal or larger value that can be +// expressed exactly as a tag value. +inline size_t RoundUpForTag(size_t size) { + return RoundUp(size, (size <= 1024) ? 8 : 32); +} + +// Converts the allocated size to a tag, rounding down if the size +// does not exactly match a 'tag expressible' size value. The result is +// undefined if the size exceeds the maximum size that can be encoded in +// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). +inline uint8_t AllocatedSizeToTag(size_t size) { + const uint8_t tag = AllocatedSizeToTagUnchecked(size); + assert(tag <= MAX_FLAT_TAG); + return tag; +} + +// Converts the provided tag to the corresponding allocated size +constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= 129) ? ((tag - 1) * 8) : (1024 + (tag - 129) * 32); +} + +// Converts the provided tag to the corresponding available data length +constexpr size_t TagToLength(uint8_t tag) { + return TagToAllocatedSize(tag) - kFlatOverhead; +} + +// Enforce that kMaxFlatSize maps to a well-known exact tag value. +static_assert(TagToAllocatedSize(225) == kMaxFlatSize, "Bad tag logic"); + +struct CordRepFlat : public CordRep { + // Creates a new flat node. + static CordRepFlat* New(size_t len) { + if (len <= kMinFlatLength) { + len = kMinFlatLength; + } else if (len > kMaxFlatLength) { + len = kMaxFlatLength; + } + + // Round size up so it matches a size we can exactly express in a tag. + const size_t size = RoundUpForTag(len + kFlatOverhead); + void* const raw_rep = ::operator new(size); + CordRepFlat* rep = new (raw_rep) CordRepFlat(); + rep->tag = AllocatedSizeToTag(size); + return rep; + } + + // Deletes a CordRepFlat instance created previously through a call to New(). + // Flat CordReps are allocated and constructed with raw ::operator new and + // placement new, and must be destructed and deallocated accordingly. + static void Delete(CordRep*rep) { + assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG); + +#if defined(__cpp_sized_deallocation) + size_t size = TagToAllocatedSize(rep->tag); + rep->~CordRep(); + ::operator delete(rep, size); +#else + rep->~CordRep(); + ::operator delete(rep); +#endif + } + + // Returns a pointer to the data inside this flat rep. + char* Data() { return reinterpret_cast<char*>(storage); } + const char* Data() const { return reinterpret_cast<const char*>(storage); } + + // Returns the maximum capacity (payload size) of this instance. + size_t Capacity() const { return TagToLength(tag); } + + // Returns the allocated size (payload + overhead) of this instance. + size_t AllocatedSize() const { return TagToAllocatedSize(tag); } +}; + +// Now that CordRepFlat is defined, we can define CordRep's helper casts: +inline CordRepFlat* CordRep::flat() { + assert(tag >= FLAT && tag <= MAX_FLAT_TAG); + return reinterpret_cast<CordRepFlat*>(this); +} + +inline const CordRepFlat* CordRep::flat() const { + assert(tag >= FLAT && tag <= MAX_FLAT_TAG); + return reinterpret_cast<const CordRepFlat*>(this); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.cc new file mode 100644 index 0000000000..07c77eb3e5 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.cc @@ -0,0 +1,771 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cord_rep_ring.h" + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iostream> +#include <limits> +#include <memory> +#include <string> + +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/throw_delegate.h" +#include "absl/base/macros.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_consume.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +using index_type = CordRepRing::index_type; + +enum class Direction { kForward, kReversed }; + +inline bool IsFlatOrExternal(CordRep* rep) { + return rep->IsFlat() || rep->IsExternal(); +} + +// Verifies that n + extra <= kMaxCapacity: throws std::length_error otherwise. +inline void CheckCapacity(size_t n, size_t extra) { + if (ABSL_PREDICT_FALSE(extra > CordRepRing::kMaxCapacity - n)) { + base_internal::ThrowStdLengthError("Maximum capacity exceeded"); + } +} + +// Creates a flat from the provided string data, allocating up to `extra` +// capacity in the returned flat depending on kMaxFlatLength limitations. +// Requires `len` to be less or equal to `kMaxFlatLength` +CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT + assert(n <= kMaxFlatLength); + auto* rep = CordRepFlat::New(n + extra); + rep->length = n; + memcpy(rep->Data(), s, n); + return rep; +} + +// Unrefs the entries in `[head, tail)`. +// Requires all entries to be a FLAT or EXTERNAL node. +void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { + rep->ForEach(head, tail, [rep](index_type ix) { + CordRep* child = rep->entry_child(ix); + if (!child->refcount.Decrement()) { + if (child->tag >= FLAT) { + CordRepFlat::Delete(child->flat()); + } else { + CordRepExternal::Delete(child->external()); + } + } + }); +} + +} // namespace + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) { + // Note: 'pos' values are defined as size_t (for overflow reasons), but that + // prints really awkward for small prepended values such as -5. ssize_t is not + // portable (POSIX), so we use ptrdiff_t instead to cast to signed values. + s << " CordRepRing(" << &rep << ", length = " << rep.length + << ", head = " << rep.head_ << ", tail = " << rep.tail_ + << ", cap = " << rep.capacity_ << ", rc = " << rep.refcount.Get() + << ", begin_pos_ = " << static_cast<ptrdiff_t>(rep.begin_pos_) << ") {\n"; + CordRepRing::index_type head = rep.head(); + do { + CordRep* child = rep.entry_child(head); + s << " entry[" << head << "] length = " << rep.entry_length(head) + << ", child " << child << ", clen = " << child->length + << ", tag = " << static_cast<int>(child->tag) + << ", rc = " << child->refcount.Get() + << ", offset = " << rep.entry_data_offset(head) + << ", end_pos = " << static_cast<ptrdiff_t>(rep.entry_end_pos(head)) + << "\n"; + head = rep.advance(head); + } while (head != rep.tail()); + return s << "}\n"; +} + +void CordRepRing::AddDataOffset(index_type index, size_t n) { + entry_data_offset()[index] += static_cast<offset_type>(n); +} + +void CordRepRing::SubLength(index_type index, size_t n) { + entry_end_pos()[index] -= n; +} + +class CordRepRing::Filler { + public: + Filler(CordRepRing* rep, index_type pos) : rep_(rep), head_(pos), pos_(pos) {} + + index_type head() const { return head_; } + index_type pos() const { return pos_; } + + void Add(CordRep* child, size_t offset, pos_type end_pos) { + rep_->entry_end_pos()[pos_] = end_pos; + rep_->entry_child()[pos_] = child; + rep_->entry_data_offset()[pos_] = static_cast<offset_type>(offset); + pos_ = rep_->advance(pos_); + } + + private: + CordRepRing* rep_; + index_type head_; + index_type pos_; +}; + +constexpr size_t CordRepRing::kMaxCapacity; // NOLINT: needed for c++11 + +bool CordRepRing::IsValid(std::ostream& output) const { + if (capacity_ == 0) { + output << "capacity == 0"; + return false; + } + + if (head_ >= capacity_ || tail_ >= capacity_) { + output << "head " << head_ << " and/or tail " << tail_ << "exceed capacity " + << capacity_; + return false; + } + + const index_type back = retreat(tail_); + size_t pos_length = Distance(begin_pos_, entry_end_pos(back)); + if (pos_length != length) { + output << "length " << length << " does not match positional length " + << pos_length << " from begin_pos " << begin_pos_ << " and entry[" + << back << "].end_pos " << entry_end_pos(back); + return false; + } + + index_type head = head_; + pos_type begin_pos = begin_pos_; + do { + pos_type end_pos = entry_end_pos(head); + size_t entry_length = Distance(begin_pos, end_pos); + if (entry_length == 0) { + output << "entry[" << head << "] has an invalid length " << entry_length + << " from begin_pos " << begin_pos << " and end_pos " << end_pos; + return false; + } + + CordRep* child = entry_child(head); + if (child == nullptr) { + output << "entry[" << head << "].child == nullptr"; + return false; + } + if (child->tag < FLAT && child->tag != EXTERNAL) { + output << "entry[" << head << "].child has an invalid tag " + << static_cast<int>(child->tag); + return false; + } + + size_t offset = entry_data_offset(head); + if (offset >= child->length || entry_length > child->length - offset) { + output << "entry[" << head << "] has offset " << offset + << " and entry length " << entry_length + << " which are outside of the child's length of " << child->length; + return false; + } + + begin_pos = end_pos; + head = advance(head); + } while (head != tail_); + + return true; +} + +#ifdef EXTRA_CORD_RING_VALIDATION +CordRepRing* CordRepRing::Validate(CordRepRing* rep, const char* file, + int line) { + if (!rep->IsValid(std::cerr)) { + std::cerr << "\nERROR: CordRepRing corrupted"; + if (line) std::cerr << " at line " << line; + if (file) std::cerr << " in file " << file; + std::cerr << "\nContent = " << *rep; + abort(); + } + return rep; +} +#endif // EXTRA_CORD_RING_VALIDATION + +CordRepRing* CordRepRing::New(size_t capacity, size_t extra) { + CheckCapacity(capacity, extra); + + size_t size = AllocSize(capacity += extra); + void* mem = ::operator new(size); + auto* rep = new (mem) CordRepRing(static_cast<index_type>(capacity)); + rep->tag = RING; + rep->capacity_ = static_cast<index_type>(capacity); + rep->begin_pos_ = 0; + return rep; +} + +void CordRepRing::SetCapacityForTesting(size_t capacity) { + // Adjust for the changed layout + assert(capacity <= capacity_); + assert(head() == 0 || head() < tail()); + memmove(Layout::Partial(capacity).Pointer<1>(data_) + head(), + Layout::Partial(capacity_).Pointer<1>(data_) + head(), + entries() * sizeof(Layout::ElementType<1>)); + memmove(Layout::Partial(capacity, capacity).Pointer<2>(data_) + head(), + Layout::Partial(capacity_, capacity_).Pointer<2>(data_) + head(), + entries() * sizeof(Layout::ElementType<2>)); + capacity_ = static_cast<index_type>(capacity); +} + +void CordRepRing::Delete(CordRepRing* rep) { + assert(rep != nullptr && rep->IsRing()); +#if defined(__cpp_sized_deallocation) + size_t size = AllocSize(rep->capacity_); + rep->~CordRepRing(); + ::operator delete(rep, size); +#else + rep->~CordRepRing(); + ::operator delete(rep); +#endif +} + +void CordRepRing::Destroy(CordRepRing* rep) { + UnrefEntries(rep, rep->head(), rep->tail()); + Delete(rep); +} + +template <bool ref> +void CordRepRing::Fill(const CordRepRing* src, index_type head, + index_type tail) { + this->length = src->length; + head_ = 0; + tail_ = advance(0, src->entries(head, tail)); + begin_pos_ = src->begin_pos_; + + // TODO(mvels): there may be opportunities here for large buffers. + auto* dst_pos = entry_end_pos(); + auto* dst_child = entry_child(); + auto* dst_offset = entry_data_offset(); + src->ForEach(head, tail, [&](index_type index) { + *dst_pos++ = src->entry_end_pos(index); + CordRep* child = src->entry_child(index); + *dst_child++ = ref ? CordRep::Ref(child) : child; + *dst_offset++ = src->entry_data_offset(index); + }); +} + +CordRepRing* CordRepRing::Copy(CordRepRing* rep, index_type head, + index_type tail, size_t extra) { + CordRepRing* newrep = CordRepRing::New(rep->entries(head, tail), extra); + newrep->Fill<true>(rep, head, tail); + CordRep::Unref(rep); + return newrep; +} + +CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { + // Get current number of entries, and check for max capacity. + size_t entries = rep->entries(); + + if (!rep->refcount.IsMutable()) { + return Copy(rep, rep->head(), rep->tail(), extra); + } else if (entries + extra > rep->capacity()) { + const size_t min_grow = rep->capacity() + rep->capacity() / 2; + const size_t min_extra = (std::max)(extra, min_grow - entries); + CordRepRing* newrep = CordRepRing::New(entries, min_extra); + newrep->Fill<false>(rep, rep->head(), rep->tail()); + CordRepRing::Delete(rep); + return newrep; + } else { + return rep; + } +} + +Span<char> CordRepRing::GetAppendBuffer(size_t size) { + assert(refcount.IsMutable()); + index_type back = retreat(tail_); + CordRep* child = entry_child(back); + if (child->tag >= FLAT && child->refcount.IsMutable()) { + size_t capacity = child->flat()->Capacity(); + pos_type end_pos = entry_end_pos(back); + size_t data_offset = entry_data_offset(back); + size_t entry_length = Distance(entry_begin_pos(back), end_pos); + size_t used = data_offset + entry_length; + if (size_t n = (std::min)(capacity - used, size)) { + child->length = data_offset + entry_length + n; + entry_end_pos()[back] = end_pos + n; + this->length += n; + return {child->flat()->Data() + used, n}; + } + } + return {nullptr, 0}; +} + +Span<char> CordRepRing::GetPrependBuffer(size_t size) { + assert(refcount.IsMutable()); + CordRep* child = entry_child(head_); + size_t data_offset = entry_data_offset(head_); + if (data_offset && child->refcount.IsMutable() && child->tag >= FLAT) { + size_t n = (std::min)(data_offset, size); + this->length += n; + begin_pos_ -= n; + data_offset -= n; + entry_data_offset()[head_] = static_cast<offset_type>(data_offset); + return {child->flat()->Data() + data_offset, n}; + } + return {nullptr, 0}; +} + +CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset, + size_t len, size_t extra) { + CordRepRing* rep = CordRepRing::New(1, extra); + rep->head_ = 0; + rep->tail_ = rep->advance(0); + rep->length = len; + rep->entry_end_pos()[0] = len; + rep->entry_child()[0] = child; + rep->entry_data_offset()[0] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::CreateSlow(CordRep* child, size_t extra) { + CordRepRing* rep = nullptr; + Consume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + if (IsFlatOrExternal(child_arg)) { + rep = rep ? AppendLeaf(rep, child_arg, offset, len) + : CreateFromLeaf(child_arg, offset, len, extra); + } else if (rep) { + rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len); + } else if (offset == 0 && child_arg->length == len) { + rep = Mutable(child_arg->ring(), extra); + } else { + rep = SubRing(child_arg->ring(), offset, len, extra); + } + }); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return CreateFromLeaf(child, 0, length, extra); + } + if (child->IsRing()) { + return Mutable(child->ring(), extra); + } + return CreateSlow(child, extra); +} + +template <CordRepRing::AddMode mode> +CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t len) { + assert(offset < ring->length); + constexpr bool append = mode == AddMode::kAppend; + Position head = ring->Find(offset); + Position tail = ring->FindTail(head.index, offset + len); + const index_type entries = ring->entries(head.index, tail.index); + + rep = Mutable(rep, entries); + + // The delta for making ring[head].end_pos into 'len - offset' + const pos_type delta_length = + (append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - len) - + ring->entry_begin_pos(head.index) - head.offset; + + // Start filling at `tail`, or `entries` before `head` + Filler filler(rep, append ? rep->tail_ : rep->retreat(rep->head_, entries)); + + if (ring->refcount.IsOne()) { + // Copy entries from source stealing the ref and adjusting the end position. + // Commit the filler as this is no-op. + ring->ForEach(head.index, tail.index, [&](index_type ix) { + filler.Add(ring->entry_child(ix), ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + }); + + // Unref entries we did not copy over, and delete source. + if (head.index != ring->head_) UnrefEntries(ring, ring->head_, head.index); + if (tail.index != ring->tail_) UnrefEntries(ring, tail.index, ring->tail_); + CordRepRing::Delete(ring); + } else { + ring->ForEach(head.index, tail.index, [&](index_type ix) { + CordRep* child = ring->entry_child(ix); + filler.Add(child, ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + CordRep::Ref(child); + }); + CordRepRing::Unref(ring); + } + + if (head.offset) { + // Increase offset of first 'source' entry appended or prepended. + // This is always the entry in `filler.head()` + rep->AddDataOffset(filler.head(), head.offset); + } + + if (tail.offset) { + // Reduce length of last 'source' entry appended or prepended. + // This is always the entry tailed by `filler.pos()` + rep->SubLength(rep->retreat(filler.pos()), tail.offset); + } + + // Commit changes + rep->length += len; + if (append) { + rep->tail_ = filler.pos(); + } else { + rep->head_ = filler.head(); + rep->begin_pos_ -= len; + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::AppendSlow(CordRepRing* rep, CordRep* child) { + Consume(child, [&rep](CordRep* child_arg, size_t offset, size_t len) { + if (child_arg->IsRing()) { + rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len); + } else { + rep = AppendLeaf(rep, child_arg, offset, len); + } + }); + return rep; +} + +CordRepRing* CordRepRing::AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t len) { + rep = Mutable(rep, 1); + index_type back = rep->tail_; + const pos_type begin_pos = rep->begin_pos_ + rep->length; + rep->tail_ = rep->advance(rep->tail_); + rep->length += len; + rep->entry_end_pos()[back] = begin_pos + len; + rep->entry_child()[back] = child; + rep->entry_data_offset()[back] = static_cast<offset_type>(offset); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return AppendLeaf(rep, child, 0, length); + } + if (child->IsRing()) { + return AddRing<AddMode::kAppend>(rep, child->ring(), 0, length); + } + return AppendSlow(rep, child); +} + +CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) { + ReverseConsume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + if (IsFlatOrExternal(child_arg)) { + rep = PrependLeaf(rep, child_arg, offset, len); + } else { + rep = AddRing<AddMode::kPrepend>(rep, child_arg->ring(), offset, len); + } + }); + return Validate(rep); +} + +CordRepRing* CordRepRing::PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t len) { + rep = Mutable(rep, 1); + index_type head = rep->retreat(rep->head_); + pos_type end_pos = rep->begin_pos_; + rep->head_ = head; + rep->length += len; + rep->begin_pos_ -= len; + rep->entry_end_pos()[head] = end_pos; + rep->entry_child()[head] = child; + rep->entry_data_offset()[head] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return PrependLeaf(rep, child, 0, length); + } + if (child->IsRing()) { + return AddRing<AddMode::kPrepend>(rep, child->ring(), 0, length); + } + return PrependSlow(rep, child); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, + size_t extra) { + if (rep->refcount.IsMutable()) { + Span<char> avail = rep->GetAppendBuffer(data.length()); + if (!avail.empty()) { + memcpy(avail.data(), data.data(), avail.length()); + data.remove_prefix(avail.length()); + } + } + if (data.empty()) return Validate(rep); + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + + Filler filler(rep, rep->tail_); + pos_type pos = rep->begin_pos_ + rep->length; + + while (data.length() >= kMaxFlatLength) { + auto* flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos += kMaxFlatLength); + data.remove_prefix(kMaxFlatLength); + } + + if (data.length()) { + auto* flat = CreateFlat(data.data(), data.length(), extra); + filler.Add(flat, 0, pos += data.length()); + } + + rep->length = pos - rep->begin_pos_; + rep->tail_ = filler.pos(); + + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data, + size_t extra) { + if (rep->refcount.IsMutable()) { + Span<char> avail = rep->GetPrependBuffer(data.length()); + if (!avail.empty()) { + const char* tail = data.data() + data.length() - avail.length(); + memcpy(avail.data(), tail, avail.length()); + data.remove_suffix(avail.length()); + } + } + if (data.empty()) return rep; + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + pos_type pos = rep->begin_pos_; + Filler filler(rep, rep->retreat(rep->head_, static_cast<index_type>(flats))); + + size_t first_size = data.size() - (flats - 1) * kMaxFlatLength; + CordRepFlat* flat = CordRepFlat::New(first_size + extra); + flat->length = first_size + extra; + memcpy(flat->Data() + extra, data.data(), first_size); + data.remove_prefix(first_size); + filler.Add(flat, extra, pos); + pos -= first_size; + + while (!data.empty()) { + assert(data.size() >= kMaxFlatLength); + flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos); + pos -= kMaxFlatLength; + data.remove_prefix(kMaxFlatLength); + } + + rep->head_ = filler.head(); + rep->length += rep->begin_pos_ - pos; + rep->begin_pos_ = pos; + + return Validate(rep); +} + +// 32 entries is 32 * sizeof(pos_type) = 4 cache lines on x86 +static constexpr index_type kBinarySearchThreshold = 32; +static constexpr index_type kBinarySearchEndCount = 8; + +template <bool wrap> +CordRepRing::index_type CordRepRing::FindBinary(index_type head, + index_type tail, + size_t offset) const { + index_type count = tail + (wrap ? capacity_ : 0) - head; + do { + count = (count - 1) / 2; + assert(count < entries(head, tail_)); + index_type mid = wrap ? advance(head, count) : head + count; + index_type after_mid = wrap ? advance(mid) : mid + 1; + bool larger = (offset >= entry_end_offset(mid)); + head = larger ? after_mid : head; + tail = larger ? tail : mid; + assert(head != tail); + } while (ABSL_PREDICT_TRUE(count > kBinarySearchEndCount)); + return head; +} + +CordRepRing::Position CordRepRing::FindSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, offset); + } + } + + pos_type pos = entry_begin_pos(head); + pos_type end_pos = entry_end_pos(head); + while (offset >= Distance(begin_pos_, end_pos)) { + head = advance(head); + pos = end_pos; + end_pos = entry_end_pos(head); + } + + return {head, offset - Distance(begin_pos_, pos)}; +} + +CordRepRing::Position CordRepRing::FindTailSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + const size_t tail_offset = offset - 1; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, tail_offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, tail_offset); + } + } + + size_t end_offset = entry_end_offset(head); + while (tail_offset >= end_offset) { + head = advance(head); + end_offset = entry_end_offset(head); + } + + return {advance(head), end_offset - offset}; +} + +char CordRepRing::GetCharacter(size_t offset) const { + assert(offset < length); + + Position pos = Find(offset); + size_t data_offset = entry_data_offset(pos.index) + pos.offset; + return GetRepData(entry_child(pos.index))[data_offset]; +} + +CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, + size_t len, size_t extra) { + assert(offset <= rep->length); + assert(offset <= rep->length - len); + + if (len == 0) { + CordRep::Unref(rep); + return nullptr; + } + + // Find position of first byte + Position head = rep->Find(offset); + Position tail = rep->FindTail(head.index, offset + len); + const size_t new_entries = rep->entries(head.index, tail.index); + + if (rep->refcount.IsMutable() && extra <= (rep->capacity() - new_entries)) { + // We adopt a privately owned rep and no extra entries needed. + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->head_ = head.index; + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, head.index, tail.index, extra); + head.index = rep->head_; + tail.index = rep->tail_; + } + + // Adjust begin_pos and length + rep->length = len; + rep->begin_pos_ += offset; + + // Adjust head and tail blocks + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position head = rep->Find(len); + if (rep->refcount.IsMutable()) { + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + rep->head_ = head.index; + } else { + rep = Copy(rep, head.index, rep->tail_, extra); + head.index = rep->head_; + } + + // Adjust begin_pos and length + rep->length -= len; + rep->begin_pos_ += len; + + // Adjust head block + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position tail = rep->FindTail(rep->length - len); + if (rep->refcount.IsMutable()) { + // We adopt a privately owned rep, scrub. + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, rep->head_, tail.index, extra); + tail.index = rep->tail_; + } + + // Adjust length + rep->length -= len; + + // Adjust tail block + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.h new file mode 100644 index 0000000000..2000e21ea0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.h @@ -0,0 +1,607 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iosfwd> +#include <limits> +#include <memory> + +#include "absl/container/internal/layout.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// All operations modifying a ring buffer are implemented as static methods +// requiring a CordRepRing instance with a reference adopted by the method. +// +// The methods return the modified ring buffer, which may be equal to the input +// if the input was not shared, and having large enough capacity to accommodate +// any newly added node(s). Otherwise, a copy of the input rep with the new +// node(s) added is returned. +// +// Any modification on non shared ring buffers with enough capacity will then +// require minimum atomic operations. Caller should where possible provide +// reasonable `extra` hints for both anticipated extra `flat` byte space, as +// well as anticipated extra nodes required for complex operations. +// +// Example of code creating a ring buffer, adding some data to it, +// and discarding the buffer when done: +// +// void FunWithRings() { +// // Create ring with 3 flats +// CordRep* flat = CreateFlat("Hello"); +// CordRepRing* ring = CordRepRing::Create(flat, 2); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +// Example of code Copying an existing ring buffer and modifying it: +// +// void MoreFunWithRings(CordRepRing* src) { +// CordRepRing* ring = CordRep::Ref(src)->ring(); +// ring = CordRepRing::Append(ring, CreateFlat("Hello")); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +class CordRepRing : public CordRep { + public: + // `pos_type` represents a 'logical position'. A CordRepRing instance has a + // `begin_pos` (default 0), and each node inside the buffer will have an + // `end_pos` which is the `end_pos` of the previous node (or `begin_pos`) plus + // this node's length. The purpose is to allow for a binary search on this + // position, while allowing O(1) prepend and append operations. + using pos_type = size_t; + + // `index_type` is the type for the `head`, `tail` and `capacity` indexes. + // Ring buffers are limited to having no more than four billion entries. + using index_type = uint32_t; + + // `offset_type` is the type for the data offset inside a child rep's data. + using offset_type = uint32_t; + + // Position holds the node index and relative offset into the node for + // some physical offset in the contained data as returned by the Find() + // and FindTail() methods. + struct Position { + index_type index; + size_t offset; + }; + + // The maximum # of child nodes that can be hosted inside a CordRepRing. + static constexpr size_t kMaxCapacity = (std::numeric_limits<uint32_t>::max)(); + + // CordRepring can not be default constructed, moved, copied or assigned. + CordRepRing() = delete; + CordRepRing(const CordRepRing&) = delete; + CordRepRing& operator=(const CordRepRing&) = delete; + + // Returns true if this instance is valid, false if some or all of the + // invariants are broken. Intended for debug purposes only. + // `output` receives an explanation of the broken invariants. + bool IsValid(std::ostream& output) const; + + // Returns the size in bytes for a CordRepRing with `capacity' entries. + static constexpr size_t AllocSize(size_t capacity); + + // Returns the distance in bytes from `pos` to `end_pos`. + static constexpr size_t Distance(pos_type pos, pos_type end_pos); + + // Creates a new ring buffer from the provided `rep`. Adopts a reference + // on `rep`. The returned ring buffer has a capacity of at least `extra + 1` + static CordRepRing* Create(CordRep* child, size_t extra = 0); + + // `head`, `tail` and `capacity` indexes defining the ring buffer boundaries. + index_type head() const { return head_; } + index_type tail() const { return tail_; } + index_type capacity() const { return capacity_; } + + // Returns the number of entries in this instance. + index_type entries() const { return entries(head_, tail_); } + + // Returns the logical begin position of this instance. + pos_type begin_pos() const { return begin_pos_; } + + // Returns the number of entries for a given head-tail range. + // Requires `head` and `tail` values to be less than `capacity()`. + index_type entries(index_type head, index_type tail) const { + assert(head < capacity_ && tail < capacity_); + return tail - head + ((tail > head) ? 0 : capacity_); + } + + // Returns the logical end position of entry `index`. + pos_type const& entry_end_pos(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial().Pointer<0>(data_)[index]; + } + + // Returns the child pointer of entry `index`. + CordRep* const& entry_child(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity()).Pointer<1>(data_)[index]; + } + + // Returns the data offset of entry `index` + offset_type const& entry_data_offset(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_)[index]; + } + + // Appends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is added + // 'as is', with an offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING or + // CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Append(CordRepRing* rep, CordRep* child); + + // Appends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the last + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any last node added to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent append actions. + static CordRepRing* Append(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Prepends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is + // prepended 'as is', with an optional offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING + // or CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Prepend(CordRepRing* rep, CordRep* child); + + // Prepends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the first + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any first node prepnded to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent prepend actions. + static CordRepRing* Prepend(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Returns a span referencing potentially unused capacity in the last node. + // The returned span may be empty if no such capacity is available, or if the + // current instance is shared. Else, a span of size `n <= size` is returned. + // If non empty, the ring buffer is adjusted to the new length, with the newly + // added capacity left uninitialized. Callers should assign a value to the + // entire span before any other operations on this instance. + Span<char> GetAppendBuffer(size_t size); + + // Returns a span referencing potentially unused capacity in the first node. + // This function is identical to GetAppendBuffer except that it returns a span + // referencing up to `size` capacity directly before the existing data. + Span<char> GetPrependBuffer(size_t size); + + // Returns a cord ring buffer containing `len` bytes of data starting at + // `offset`. If the input is not shared, this function will remove all head + // and tail child nodes outside of the requested range, and adjust the new + // head and tail nodes as required. If the input is shared, this function + // returns a new instance sharing some or all of the nodes from the input. + static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t len, + size_t extra = 0); + + // Returns a cord ring buffer with the first `len` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemoveSuffix(CordRepRing* r, size_t len, + size_t extra = 0); + + // Returns a cord ring buffer with the last `len` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemovePrefix(CordRepRing* r, size_t len, + size_t extra = 0); + + // Returns the character at `offset`. Requires that `offset < length`. + char GetCharacter(size_t offset) const; + + // Returns true if this instance manages a single contiguous buffer, in which + // case the (optional) output parameter `fragment` is set. Otherwise, the + // function returns false, and `fragment` is left unchanged. + bool IsFlat(absl::string_view* fragment) const; + + // Returns true if the data starting at `offset` with length `len` is + // managed by this instance inside a single contiguous buffer, in which case + // the (optional) output parameter `fragment` is set to the contiguous memory + // starting at offset `offset` with length `length`. Otherwise, the function + // returns false, and `fragment` is left unchanged. + bool IsFlat(size_t offset, size_t len, absl::string_view* fragment) const; + + // Testing only: set capacity to requested capacity. + void SetCapacityForTesting(size_t capacity); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that the provided `rep` is either a FLAT or EXTERNAL CordRep. + static const char* GetLeafData(const CordRep* rep); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that `rep` is either a FLAT, EXTERNAL, or SUBSTRING CordRep. + static const char* GetRepData(const CordRep* rep); + + // Advances the provided position, wrapping around capacity as needed. + // Requires `index` < capacity() + inline index_type advance(index_type index) const; + + // Advances the provided position by 'n`, wrapping around capacity as needed. + // Requires `index` < capacity() and `n` <= capacity. + inline index_type advance(index_type index, index_type n) const; + + // Retreats the provided position, wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index) const; + + // Retreats the provided position by 'n', wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index, index_type n) const; + + // Returns the logical begin position of entry `index` + pos_type const& entry_begin_pos(index_type index) const { + return (index == head_) ? begin_pos_ : entry_end_pos(retreat(index)); + } + + // Returns the physical start offset of entry `index` + size_t entry_start_offset(index_type index) const { + return Distance(begin_pos_, entry_begin_pos(index)); + } + + // Returns the physical end offset of entry `index` + size_t entry_end_offset(index_type index) const { + return Distance(begin_pos_, entry_end_pos(index)); + } + + // Returns the data length for entry `index` + size_t entry_length(index_type index) const { + return Distance(entry_begin_pos(index), entry_end_pos(index)); + } + + // Returns the data for entry `index` + absl::string_view entry_data(index_type index) const; + + // Returns the position for `offset` as {index, prefix}. `index` holds the + // index of the entry at the specified offset and `prefix` holds the relative + // offset inside that entry. + // Requires `offset` < length. + // + // For example we can implement GetCharacter(offset) as: + // char GetCharacter(size_t offset) { + // Position pos = this->Find(offset); + // return this->entry_data(pos.pos)[pos.offset]; + // } + inline Position Find(size_t offset) const; + + // Find starting at `head` + inline Position Find(index_type head, size_t offset) const; + + // Returns the tail position for `offset` as {tail index, suffix}. + // `tail index` holds holds the index of the entry holding the offset directly + // before 'offset` advanced by one. 'suffix` holds the relative offset from + // that relative offset in the entry to the end of the entry. + // For example, FindTail(length) will return {tail(), 0}, FindTail(length - 5) + // will return {retreat(tail), 5)} provided the preceding entry contains at + // least 5 bytes of data. + // Requires offset >= 1 && offset <= length. + // + // This function is very useful in functions that need to clip the end of some + // ring buffer such as 'RemovePrefix'. + // For example, we could implement RemovePrefix for non shared instances as: + // void RemoveSuffix(size_t n) { + // Position pos = FindTail(length - n); + // UnrefEntries(pos.pos, this->tail_); + // this->tail_ = pos.pos; + // entry(retreat(pos.pos)).end_pos -= pos.offset; + // } + inline Position FindTail(size_t offset) const; + + // Find tail starting at `head` + inline Position FindTail(index_type head, size_t offset) const; + + // Invokes f(index_type index) for each entry inside the range [head, tail> + template <typename F> + void ForEach(index_type head, index_type tail, F&& f) const { + index_type n1 = (tail > head) ? tail : capacity_; + for (index_type i = head; i < n1; ++i) f(i); + if (tail <= head) { + for (index_type i = 0; i < tail; ++i) f(i); + } + } + + // Invokes f(index_type index) for each entry inside this instance. + template <typename F> + void ForEach(F&& f) const { + ForEach(head_, tail_, std::forward<F>(f)); + } + + // Dump this instance's data tp stream `s` in human readable format, excluding + // the actual data content itself. Intended for debug purposes only. + friend std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + + private: + enum class AddMode { kAppend, kPrepend }; + + using Layout = container_internal::Layout<pos_type, CordRep*, offset_type>; + + class Filler; + class Transaction; + class CreateTransaction; + + static constexpr size_t kLayoutAlignment = Layout::Partial().Alignment(); + + // Creates a new CordRepRing. + explicit CordRepRing(index_type capacity) : capacity_(capacity) {} + + // Returns true if `index` is a valid index into this instance. + bool IsValidIndex(index_type index) const; + + // Debug use only: validates the provided CordRepRing invariants. + // Verification of all CordRepRing methods can be enabled by defining + // EXTRA_CORD_RING_VALIDATION, i.e.: `--copts=-DEXTRA_CORD_RING_VALIDATION` + // Verification is VERY expensive, so only do it for debugging purposes. + static CordRepRing* Validate(CordRepRing* rep, const char* file = nullptr, + int line = 0); + + // Allocates a CordRepRing large enough to hold `capacity + extra' entries. + // The returned capacity may be larger if the allocated memory allows for it. + // The maximum capacity of a CordRepRing is capped at kMaxCapacity. + // Throws `std::length_error` if `capacity + extra' exceeds kMaxCapacity. + static CordRepRing* New(size_t capacity, size_t extra); + + // Deallocates (but does not destroy) the provided ring buffer. + static void Delete(CordRepRing* rep); + + // Destroys the provided ring buffer, decrementing the reference count of all + // contained child CordReps. The provided 1\`rep` should have a ref count of + // one (pre decrement destroy call observing `refcount.IsOne()`) or zero + // (post decrement destroy call observing `!refcount.Decrement()`). + static void Destroy(CordRepRing* rep); + + // Returns a mutable reference to the logical end position array. + pos_type* entry_end_pos() { + return Layout::Partial().Pointer<0>(data_); + } + + // Returns a mutable reference to the child pointer array. + CordRep** entry_child() { + return Layout::Partial(capacity()).Pointer<1>(data_); + } + + // Returns a mutable reference to the data offset array. + offset_type* entry_data_offset() { + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_); + } + + // Find implementations for the non fast path 0 / length cases. + Position FindSlow(index_type head, size_t offset) const; + Position FindTailSlow(index_type head, size_t offset) const; + + // Finds the index of the first node that is inside a reasonable distance + // of the node at `offset` from which we can continue with a linear search. + template <bool wrap> + index_type FindBinary(index_type head, index_type tail, size_t offset) const; + + // Fills the current (initialized) instance from the provided source, copying + // entries [head, tail). Adds a reference to copied entries if `ref` is true. + template <bool ref> + void Fill(const CordRepRing* src, index_type head, index_type tail); + + // Create a copy of 'rep', copying all entries [head, tail), allocating room + // for `extra` entries. Adds a reference on all copied entries. + static CordRepRing* Copy(CordRepRing* rep, index_type head, index_type tail, + size_t extra = 0); + + // Returns a Mutable CordRepRing reference from `rep` with room for at least + // `extra` additional nodes. Adopts a reference count from `rep`. + // This function will return `rep` if, and only if: + // - rep.entries + extra <= rep.capacity + // - rep.refcount == 1 + // Otherwise, this function will create a new copy of `rep` with additional + // capacity to satisfy `extra` extra nodes, and unref the old `rep` instance. + // + // If a new CordRepRing can not be allocated, or the new capacity would exceed + // the maxmimum capacity, then the input is consumed only, and an exception is + // thrown. + static CordRepRing* Mutable(CordRepRing* rep, size_t extra); + + // Slow path for Append(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Append() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* AppendSlow(CordRepRing* rep, CordRep* child); + + // Appends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Prepends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Slow path for Prepend(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* PrependSlow(CordRepRing* rep, CordRep* child); + + // Slow path for Create(CordRep* child, size_t extra). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* CreateSlow(CordRep* child, size_t extra); + + // Creates a new ring buffer from the provided `child` leaf node. Requires + // `child` to be FLAT or EXTERNAL. on `rep`. + // The returned ring buffer has a capacity of at least `1 + extra` + static CordRepRing* CreateFromLeaf(CordRep* child, size_t offset, + size_t length, size_t extra); + + // Appends or prepends (depending on AddMode) the ring buffer in `ring' to + // `rep` starting at `offset` with length `len`. + template <AddMode mode> + static CordRepRing* AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t len); + + // Increases the data offset for entry `index` by `n`. + void AddDataOffset(index_type index, size_t n); + + // Descreases the length for entry `index` by `n`. + void SubLength(index_type index, size_t n); + + index_type head_; + index_type tail_; + index_type capacity_; + pos_type begin_pos_; + + alignas(kLayoutAlignment) char data_[kLayoutAlignment]; + + friend struct CordRep; +}; + +constexpr size_t CordRepRing::AllocSize(size_t capacity) { + return sizeof(CordRepRing) - sizeof(data_) + + Layout(capacity, capacity, capacity).AllocSize(); +} + +inline constexpr size_t CordRepRing::Distance(pos_type pos, pos_type end_pos) { + return (end_pos - pos); +} + +inline const char* CordRepRing::GetLeafData(const CordRep* rep) { + return rep->tag != EXTERNAL ? rep->flat()->Data() : rep->external()->base; +} + +inline const char* CordRepRing::GetRepData(const CordRep* rep) { + if (rep->tag >= FLAT) return rep->flat()->Data(); + if (rep->tag == EXTERNAL) return rep->external()->base; + return GetLeafData(rep->substring()->child) + rep->substring()->start; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index) const { + assert(index < capacity_); + return ++index == capacity_ ? 0 : index; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return (index += n) >= capacity_ ? index - capacity_ : index; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index) const { + assert(index < capacity_); + return (index > 0 ? index : capacity_) - 1; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return index >= n ? index - n : capacity_ - n + index; +} + +inline absl::string_view CordRepRing::entry_data(index_type index) const { + size_t data_offset = entry_data_offset(index); + return {GetRepData(entry_child(index)) + data_offset, entry_length(index)}; +} + +inline bool CordRepRing::IsValidIndex(index_type index) const { + if (index >= capacity_) return false; + return (tail_ > head_) ? (index >= head_ && index < tail_) + : (index >= head_ || index < tail_); +} + +#ifndef EXTRA_CORD_RING_VALIDATION +inline CordRepRing* CordRepRing::Validate(CordRepRing* rep, + const char* /*file*/, int /*line*/) { + return rep; +} +#endif + +inline CordRepRing::Position CordRepRing::Find(size_t offset) const { + assert(offset < length); + return (offset == 0) ? Position{head_, 0} : FindSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::Find(index_type head, + size_t offset) const { + assert(offset < length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head)); + return (offset == 0) ? Position{head_, 0} : FindSlow(head, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(size_t offset) const { + assert(offset > 0 && offset <= length); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(index_type head, + size_t offset) const { + assert(offset > 0 && offset <= length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head) + 1); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head, offset); +} + +// Now that CordRepRing is defined, we can define CordRep's helper casts: +inline CordRepRing* CordRep::ring() { + assert(IsRing()); + return static_cast<CordRepRing*>(this); +} + +inline const CordRepRing* CordRep::ring() const { + assert(IsRing()); + return static_cast<const CordRepRing*>(this); +} + +inline bool CordRepRing::IsFlat(absl::string_view* fragment) const { + if (entries() == 1) { + if (fragment) *fragment = entry_data(head()); + return true; + } + return false; +} + +inline bool CordRepRing::IsFlat(size_t offset, size_t len, + absl::string_view* fragment) const { + const Position pos = Find(offset); + const absl::string_view data = entry_data(pos.index); + if (data.length() >= len && data.length() - len >= pos.offset) { + if (fragment) *fragment = data.substr(pos.offset, len); + return true; + } + return false; +} + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring_reader.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring_reader.h new file mode 100644 index 0000000000..7ceeaa000e --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring_reader.h @@ -0,0 +1,118 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepRingReader provides basic navigation over CordRepRing data. +class CordRepRingReader { + public: + // Returns true if this instance is not empty. + explicit operator bool() const { return ring_ != nullptr; } + + // Returns the ring buffer reference for this instance, or nullptr if empty. + CordRepRing* ring() const { return ring_; } + + // Returns the current node index inside the ring buffer for this instance. + // The returned value is undefined if this instance is empty. + CordRepRing::index_type index() const { return index_; } + + // Returns the current node inside the ring buffer for this instance. + // The returned value is undefined if this instance is empty. + CordRep* node() const { return ring_->entry_child(index_); } + + // Returns the length of the referenced ring buffer. + // Requires the current instance to be non empty. + size_t length() const { + assert(ring_); + return ring_->length; + } + + // Returns the end offset of the last navigated-to chunk, which represents the + // total bytes 'consumed' relative to the start of the ring. The returned + // value is never zero. For example, initializing a reader with a ring buffer + // with a first chunk of 19 bytes will return consumed() = 19. + // Requires the current instance to be non empty. + size_t consumed() const { + assert(ring_); + return ring_->entry_end_offset(index_); + } + + // Returns the number of bytes remaining beyond the last navigated-to chunk. + // Requires the current instance to be non empty. + size_t remaining() const { + assert(ring_); + return length() - consumed(); + } + + // Resets this instance to an empty value + void Reset() { ring_ = nullptr; } + + // Resets this instance to the start of `ring`. `ring` must not be null. + // Returns a reference into the first chunk of the provided ring. + absl::string_view Reset(CordRepRing* ring) { + assert(ring); + ring_ = ring; + index_ = ring_->head(); + return ring_->entry_data(index_); + } + + // Navigates to the next chunk inside the reference ring buffer. + // Returns a reference into the navigated-to chunk. + // Requires remaining() to be non zero. + absl::string_view Next() { + assert(remaining()); + index_ = ring_->advance(index_); + return ring_->entry_data(index_); + } + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated-to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling Seek(13) on a + // ring buffer containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Requires `offset` to be less than `length()` + absl::string_view Seek(size_t offset) { + assert(offset < length()); + size_t current = ring_->entry_end_offset(index_); + CordRepRing::index_type hint = (offset >= current) ? index_ : ring_->head(); + const CordRepRing::Position head = ring_->Find(hint, offset); + index_ = head.index; + auto data = ring_->entry_data(head.index); + data.remove_prefix(head.offset); + return data; + } + + private: + CordRepRing* ring_ = nullptr; + CordRepRing::index_type index_; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_test_util.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_test_util.h new file mode 100644 index 0000000000..ad828af2a5 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_test_util.h @@ -0,0 +1,220 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ + +#include <cassert> +#include <memory> +#include <random> +#include <string> +#include <vector> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cordrep_testing { + +inline cord_internal::CordRepSubstring* MakeSubstring( + size_t start, size_t len, cord_internal::CordRep* rep) { + auto* sub = new cord_internal::CordRepSubstring; + sub->tag = cord_internal::SUBSTRING; + sub->start = start; + sub->length = len <= 0 ? rep->length - start + len : len; + sub->child = rep; + return sub; +} + +inline cord_internal::CordRepConcat* MakeConcat(cord_internal::CordRep* left, + cord_internal::CordRep* right, + int depth = 0) { + auto* concat = new cord_internal::CordRepConcat; + concat->tag = cord_internal::CONCAT; + concat->length = left->length + right->length; + concat->left = left; + concat->right = right; + concat->set_depth(depth); + return concat; +} + +inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) { + assert(value.length() <= cord_internal::kMaxFlatLength); + auto* flat = cord_internal::CordRepFlat::New(value.length()); + flat->length = value.length(); + memcpy(flat->Data(), value.data(), value.length()); + return flat; +} + +// Creates an external node for testing +inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) { + struct Rep : public cord_internal::CordRepExternal { + std::string s; + explicit Rep(absl::string_view sv) : s(sv) { + this->tag = cord_internal::EXTERNAL; + this->base = s.data(); + this->length = s.length(); + this->releaser_invoker = [](cord_internal::CordRepExternal* self) { + delete static_cast<Rep*>(self); + }; + } + }; + return new Rep(s); +} + +inline std::string CreateRandomString(size_t n) { + absl::string_view data = + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789~!@#$%^&*()_+=-<>?:\"{}[]|"; + std::minstd_rand rnd; + std::uniform_int_distribution<size_t> dist(0, data.size() - 1); + std::string s(n, ' '); + for (size_t i = 0; i < n; ++i) { + s[i] = data[dist(rnd)]; + } + return s; +} + +// Creates an array of flats from the provided string, chopping +// the provided string up into flats of size `chunk_size` characters +// resulting in roughly `data.size() / chunk_size` total flats. +inline std::vector<cord_internal::CordRep*> CreateFlatsFromString( + absl::string_view data, size_t chunk_size) { + assert(chunk_size > 0); + std::vector<cord_internal::CordRep*> flats; + for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) { + flats.push_back(MakeFlat(s.substr(0, chunk_size))); + } + return flats; +} + +inline cord_internal::CordRepBtree* CordRepBtreeFromFlats( + absl::Span<cord_internal::CordRep* const> flats) { + assert(!flats.empty()); + auto* node = cord_internal::CordRepBtree::Create(flats[0]); + for (size_t i = 1; i < flats.size(); ++i) { + node = cord_internal::CordRepBtree::Append(node, flats[i]); + } + return node; +} + +template <typename Fn> +inline void CordVisitReps(cord_internal::CordRep* rep, Fn&& fn) { + fn(rep); + while (rep->tag == cord_internal::SUBSTRING) { + rep = rep->substring()->child; + fn(rep); + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordVisitReps(edge, fn); + } + } else if (rep->tag == cord_internal::CONCAT) { + CordVisitReps(rep->concat()->left, fn); + CordVisitReps(rep->concat()->right, fn); + } +} + +template <typename Predicate> +inline std::vector<cord_internal::CordRep*> CordCollectRepsIf( + Predicate&& predicate, cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + CordVisitReps(rep, [&reps, &predicate](cord_internal::CordRep* rep) { + if (predicate(rep)) reps.push_back(rep); + }); + return reps; +} + +inline std::vector<cord_internal::CordRep*> CordCollectReps( + cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + auto fn = [&reps](cord_internal::CordRep* rep) { reps.push_back(rep); }; + CordVisitReps(rep, fn); + return reps; +} + +inline void CordToString(cord_internal::CordRep* rep, std::string& s) { + size_t offset = 0; + size_t length = rep->length; + while (rep->tag == cord_internal::SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordToString(edge, s); + } + } else if (rep->tag >= cord_internal::FLAT) { + s.append(rep->flat()->Data() + offset, length); + } else if (rep->tag == cord_internal::EXTERNAL) { + s.append(rep->external()->base + offset, length); + } else { + ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag); + } +} + +inline std::string CordToString(cord_internal::CordRep* rep) { + std::string s; + s.reserve(rep->length); + CordToString(rep, s); + return s; +} + +// RAII Helper class to automatically unref reps on destruction. +class AutoUnref { + public: + ~AutoUnref() { + for (CordRep* rep : unrefs_) CordRep::Unref(rep); + } + + // Adds `rep` to the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* Add(CordRepType* rep) { + unrefs_.push_back(rep); + return rep; + } + + // Increments the reference count of `rep` by one, and adds it to + // the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* Ref(CordRepType* rep) { + unrefs_.push_back(CordRep::Ref(rep)); + return rep; + } + + // Increments the reference count of `rep` by one if `condition` is true, + // and adds it to the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* RefIf(bool condition, CordRepType* rep) { + if (condition) unrefs_.push_back(CordRep::Ref(rep)); + return rep; + } + + private: + using CordRep = absl::cord_internal::CordRep; + + std::vector<CordRep*> unrefs_; +}; + +} // namespace cordrep_testing +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc new file mode 100644 index 0000000000..20d314f03c --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc @@ -0,0 +1,96 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_functions.h" + +#include <atomic> +#include <cmath> +#include <limits> +#include <random> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/profiling/internal/exponential_biased.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +// The average interval until the next sample. A value of 0 disables profiling +// while a value of 1 will profile all Cords. +std::atomic<int> g_cordz_mean_interval(50000); + +} // namespace + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +// Special negative 'not initialized' per thread value for cordz_next_sample. +static constexpr int64_t kInitCordzNextSample = -1; + +ABSL_CONST_INIT thread_local int64_t cordz_next_sample = kInitCordzNextSample; + +// kIntervalIfDisabled is the number of profile-eligible events need to occur +// before the code will confirm that cordz is still disabled. +constexpr int64_t kIntervalIfDisabled = 1 << 16; + +ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() { + + thread_local absl::profiling_internal::ExponentialBiased + exponential_biased_generator; + int32_t mean_interval = get_cordz_mean_interval(); + + // Check if we disabled profiling. If so, set the next sample to a "large" + // number to minimize the overhead of the should_profile codepath. + if (mean_interval <= 0) { + cordz_next_sample = kIntervalIfDisabled; + return false; + } + + // Check if we're always sampling. + if (mean_interval == 1) { + cordz_next_sample = 1; + return true; + } + + if (cordz_next_sample <= 0) { + // If first check on current thread, check cordz_should_profile() + // again using the created (initial) stride in cordz_next_sample. + const bool initialized = cordz_next_sample != kInitCordzNextSample; + cordz_next_sample = exponential_biased_generator.GetStride(mean_interval); + return initialized || cordz_should_profile(); + } + + --cordz_next_sample; + return false; +} + +void cordz_set_next_sample_for_testing(int64_t next_sample) { + cordz_next_sample = next_sample; +} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +int32_t get_cordz_mean_interval() { + return g_cordz_mean_interval.load(std::memory_order_acquire); +} + +void set_cordz_mean_interval(int32_t mean_interval) { + g_cordz_mean_interval.store(mean_interval, std::memory_order_release); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h b/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h new file mode 100644 index 0000000000..c9ba14508a --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h @@ -0,0 +1,85 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_FUNCTIONS_H_ +#define ABSL_STRINGS_CORDZ_FUNCTIONS_H_ + +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/optimization.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Returns the current sample rate. This represents the average interval +// between samples. +int32_t get_cordz_mean_interval(); + +// Sets the sample rate with the average interval between samples. +void set_cordz_mean_interval(int32_t mean_interval); + +// Enable cordz unless any of the following applies: +// - no thread local support +// - MSVC build +// - Android build +// - Apple build +// - DLL build +// Hashtablez is turned off completely in opensource builds. +// MSVC's static atomics are dynamically initialized in debug mode, which breaks +// sampling. +#if defined(ABSL_HAVE_THREAD_LOCAL) && !defined(_MSC_VER) && \ + !defined(ABSL_BUILD_DLL) && !defined(ABSL_CONSUME_DLL) && \ + !defined(__ANDROID__) && !defined(__APPLE__) +#define ABSL_INTERNAL_CORDZ_ENABLED 1 +#endif + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +// cordz_next_sample is the number of events until the next sample event. If +// the value is 1 or less, the code will check on the next event if cordz is +// enabled, and if so, will sample the Cord. cordz is only enabled when we can +// use thread locals. +ABSL_CONST_INIT extern thread_local int64_t cordz_next_sample; + +// Determines if the next sample should be profiled. If it is, the value pointed +// at by next_sample will be set with the interval until the next sample. +bool cordz_should_profile_slow(); + +// Returns true if the next cord should be sampled. +inline bool cordz_should_profile() { + if (ABSL_PREDICT_TRUE(cordz_next_sample > 1)) { + cordz_next_sample--; + return false; + } + return cordz_should_profile_slow(); +} + +// Sets the interval until the next sample (for testing only) +void cordz_set_next_sample_for_testing(int64_t next_sample); + +#else // ABSL_INTERNAL_CORDZ_ENABLED + +inline bool cordz_should_profile() { return false; } +inline void cordz_set_next_sample_for_testing(int64_t) {} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_FUNCTIONS_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc new file mode 100644 index 0000000000..350623c1f3 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc @@ -0,0 +1,149 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_functions.h" + +#include <thread> // NOLINT we need real clean new threads + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::Ge; +using ::testing::Le; + +TEST(CordzFunctionsTest, SampleRate) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + int32_t expected_sample_rate = 123; + set_cordz_mean_interval(expected_sample_rate); + EXPECT_THAT(get_cordz_mean_interval(), Eq(expected_sample_rate)); + set_cordz_mean_interval(orig_sample_rate); +} + +// Cordz is disabled when we don't have thread_local. All calls to +// should_profile will return false when cordz is diabled, so we might want to +// avoid those tests. +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +TEST(CordzFunctionsTest, ShouldProfileDisable) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(0); + cordz_set_next_sample_for_testing(0); + EXPECT_FALSE(cordz_should_profile()); + // 1 << 16 is from kIntervalIfDisabled in cordz_functions.cc. + EXPECT_THAT(cordz_next_sample, Eq(1 << 16)); + + set_cordz_mean_interval(orig_sample_rate); +} + +TEST(CordzFunctionsTest, ShouldProfileAlways) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(1); + cordz_set_next_sample_for_testing(1); + EXPECT_TRUE(cordz_should_profile()); + EXPECT_THAT(cordz_next_sample, Le(1)); + + set_cordz_mean_interval(orig_sample_rate); +} + +TEST(CordzFunctionsTest, DoesNotAlwaysSampleFirstCord) { + // Set large enough interval such that the chance of 'tons' of threads + // randomly sampling the first call is infinitely small. + set_cordz_mean_interval(10000); + int tries = 0; + bool sampled = false; + do { + ++tries; + ASSERT_THAT(tries, Le(1000)); + std::thread thread([&sampled] { + sampled = cordz_should_profile(); + }); + thread.join(); + } while (sampled); +} + +TEST(CordzFunctionsTest, ShouldProfileRate) { + static constexpr int kDesiredMeanInterval = 1000; + static constexpr int kSamples = 10000; + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(kDesiredMeanInterval); + + int64_t sum_of_intervals = 0; + for (int i = 0; i < kSamples; i++) { + // Setting next_sample to 0 will force cordz_should_profile to generate a + // new value for next_sample each iteration. + cordz_set_next_sample_for_testing(0); + cordz_should_profile(); + sum_of_intervals += cordz_next_sample; + } + + // The sum of independent exponential variables is an Erlang distribution, + // which is a gamma distribution where the shape parameter is equal to the + // number of summands. The distribution used for cordz_should_profile is + // actually floor(Exponential(1/mean)) which introduces bias. However, we can + // apply the squint-really-hard correction factor. That is, when mean is + // large, then if we squint really hard the shape of the distribution between + // N and N+1 looks like a uniform distribution. On average, each value for + // next_sample will be about 0.5 lower than we would expect from an + // exponential distribution. This squint-really-hard correction approach won't + // work when mean is smaller than about 10 but works fine when mean is 1000. + // + // We can use R to calculate a confidence interval. This + // shows how to generate a confidence interval with a false positive rate of + // one in a billion. + // + // $ R -q + // > mean = 1000 + // > kSamples = 10000 + // > errorRate = 1e-9 + // > correction = -kSamples / 2 + // > low = qgamma(errorRate/2, kSamples, 1/mean) + correction + // > high = qgamma(1 - errorRate/2, kSamples, 1/mean) + correction + // > low + // [1] 9396115 + // > high + // [1] 10618100 + EXPECT_THAT(sum_of_intervals, Ge(9396115)); + EXPECT_THAT(sum_of_intervals, Le(10618100)); + + set_cordz_mean_interval(orig_sample_rate); +} + +#else // ABSL_INTERNAL_CORDZ_ENABLED + +TEST(CordzFunctionsTest, ShouldProfileDisabled) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(1); + cordz_set_next_sample_for_testing(0); + EXPECT_FALSE(cordz_should_profile()); + + set_cordz_mean_interval(orig_sample_rate); +} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_handle.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_handle.cc new file mode 100644 index 0000000000..a73fefed59 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_handle.cc @@ -0,0 +1,139 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cordz_handle.h" + +#include <atomic> + +#include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK +#include "absl/base/internal/spinlock.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ::absl::base_internal::SpinLockHolder; + +ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(absl::kConstInit); + +CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) { + if (is_snapshot) { + SpinLockHolder lock(&queue_->mutex); + CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire); + if (dq_tail != nullptr) { + dq_prev_ = dq_tail; + dq_tail->dq_next_ = this; + } + queue_->dq_tail.store(this, std::memory_order_release); + } +} + +CordzHandle::~CordzHandle() { + ODRCheck(); + if (is_snapshot_) { + std::vector<CordzHandle*> to_delete; + { + SpinLockHolder lock(&queue_->mutex); + CordzHandle* next = dq_next_; + if (dq_prev_ == nullptr) { + // We were head of the queue, delete every CordzHandle until we reach + // either the end of the list, or a snapshot handle. + while (next && !next->is_snapshot_) { + to_delete.push_back(next); + next = next->dq_next_; + } + } else { + // Another CordzHandle existed before this one, don't delete anything. + dq_prev_->dq_next_ = next; + } + if (next) { + next->dq_prev_ = dq_prev_; + } else { + queue_->dq_tail.store(dq_prev_, std::memory_order_release); + } + } + for (CordzHandle* handle : to_delete) { + delete handle; + } + } +} + +bool CordzHandle::SafeToDelete() const { + return is_snapshot_ || queue_->IsEmpty(); +} + +void CordzHandle::Delete(CordzHandle* handle) { + assert(handle); + if (handle) { + handle->ODRCheck(); + Queue* const queue = handle->queue_; + if (!handle->SafeToDelete()) { + SpinLockHolder lock(&queue->mutex); + CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire); + if (dq_tail != nullptr) { + handle->dq_prev_ = dq_tail; + dq_tail->dq_next_ = handle; + queue->dq_tail.store(handle, std::memory_order_release); + return; + } + } + delete handle; + } +} + +std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { + std::vector<const CordzHandle*> handles; + SpinLockHolder lock(&global_queue_.mutex); + CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire); + for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) { + handles.push_back(p); + } + return handles; +} + +bool CordzHandle::DiagnosticsHandleIsSafeToInspect( + const CordzHandle* handle) const { + ODRCheck(); + if (!is_snapshot_) return false; + if (handle == nullptr) return true; + if (handle->is_snapshot_) return false; + bool snapshot_found = false; + SpinLockHolder lock(&queue_->mutex); + for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) { + if (p == handle) return !snapshot_found; + if (p == this) snapshot_found = true; + } + ABSL_ASSERT(snapshot_found); // Assert that 'this' is in delete queue. + return true; +} + +std::vector<const CordzHandle*> +CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() { + ODRCheck(); + std::vector<const CordzHandle*> handles; + if (!is_snapshot()) { + return handles; + } + + SpinLockHolder lock(&queue_->mutex); + for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) { + if (!p->is_snapshot()) { + handles.push_back(p); + } + } + return handles; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_handle.h b/third_party/abseil-cpp/absl/strings/internal/cordz_handle.h new file mode 100644 index 0000000000..5df53c782a --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_handle.h @@ -0,0 +1,131 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_HANDLE_H_ +#define ABSL_STRINGS_CORDZ_HANDLE_H_ + +#include <atomic> +#include <vector> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/spinlock.h" +#include "absl/synchronization/mutex.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// This base class allows multiple types of object (CordzInfo and +// CordzSampleToken) to exist simultaneously on the delete queue (pointed to by +// global_dq_tail and traversed using dq_prev_ and dq_next_). The +// delete queue guarantees that once a profiler creates a CordzSampleToken and +// has gained visibility into a CordzInfo object, that CordzInfo object will not +// be deleted prematurely. This allows the profiler to inspect all CordzInfo +// objects that are alive without needing to hold a global lock. +class CordzHandle { + public: + CordzHandle() : CordzHandle(false) {} + + bool is_snapshot() const { return is_snapshot_; } + + // Returns true if this instance is safe to be deleted because it is either a + // snapshot, which is always safe to delete, or not included in the global + // delete queue and thus not included in any snapshot. + // Callers are responsible for making sure this instance can not be newly + // discovered by other threads. For example, CordzInfo instances first de-list + // themselves from the global CordzInfo list before determining if they are + // safe to be deleted directly. + // If SafeToDelete returns false, callers MUST use the Delete() method to + // safely queue CordzHandle instances for deletion. + bool SafeToDelete() const; + + // Deletes the provided instance, or puts it on the delete queue to be deleted + // once there are no more sample tokens (snapshot) instances potentially + // referencing the instance. `handle` should not be null. + static void Delete(CordzHandle* handle); + + // Returns the current entries in the delete queue in LIFO order. + static std::vector<const CordzHandle*> DiagnosticsGetDeleteQueue(); + + // Returns true if the provided handle is nullptr or guarded by this handle. + // Since the CordzSnapshot token is itself a CordzHandle, this method will + // allow tests to check if that token is keeping an arbitrary CordzHandle + // alive. + bool DiagnosticsHandleIsSafeToInspect(const CordzHandle* handle) const; + + // Returns the current entries in the delete queue, in LIFO order, that are + // protected by this. CordzHandle objects are only placed on the delete queue + // after CordzHandle::Delete is called with them as an argument. Only + // CordzHandle objects that are not also CordzSnapshot objects will be + // included in the return vector. For each of the handles in the return + // vector, the earliest that their memory can be freed is when this + // CordzSnapshot object is deleted. + std::vector<const CordzHandle*> DiagnosticsGetSafeToInspectDeletedHandles(); + + protected: + explicit CordzHandle(bool is_snapshot); + virtual ~CordzHandle(); + + private: + // Global queue data. CordzHandle stores a pointer to the global queue + // instance to harden against ODR violations. + struct Queue { + constexpr explicit Queue(absl::ConstInitType) + : mutex(absl::kConstInit, + absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + + absl::base_internal::SpinLock mutex; + std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr}; + + // Returns true if this delete queue is empty. This method does not acquire + // the lock, but does a 'load acquire' observation on the delete queue tail. + // It is used inside Delete() to check for the presence of a delete queue + // without holding the lock. The assumption is that the caller is in the + // state of 'being deleted', and can not be newly discovered by a concurrent + // 'being constructed' snapshot instance. Practically, this means that any + // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens + // before / after' semantics and atomic fences. + bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return dq_tail.load(std::memory_order_acquire) == nullptr; + } + }; + + void ODRCheck() const { +#ifndef NDEBUG + ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord"); +#endif + } + + ABSL_CONST_INIT static Queue global_queue_; + Queue* const queue_ = &global_queue_; + const bool is_snapshot_; + + // dq_prev_ and dq_next_ require the global queue mutex to be held. + // Unfortunately we can't use thread annotations such that the thread safety + // analysis understands that queue_ and global_queue_ are one and the same. + CordzHandle* dq_prev_ = nullptr; + CordzHandle* dq_next_ = nullptr; +}; + +class CordzSnapshot : public CordzHandle { + public: + CordzSnapshot() : CordzHandle(true) {} +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_HANDLE_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_handle_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_handle_test.cc new file mode 100644 index 0000000000..fd68e06b3e --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_handle_test.cc @@ -0,0 +1,265 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cordz_handle.h" + +#include <random> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Gt; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +// Local less verbose helper +std::vector<const CordzHandle*> DeleteQueue() { + return CordzHandle::DiagnosticsGetDeleteQueue(); +} + +struct CordzHandleDeleteTracker : public CordzHandle { + bool* deleted; + explicit CordzHandleDeleteTracker(bool* deleted) : deleted(deleted) {} + ~CordzHandleDeleteTracker() override { *deleted = true; } +}; + +TEST(CordzHandleTest, DeleteQueueIsEmpty) { + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzHandleTest, CordzHandleCreateDelete) { + bool deleted = false; + auto* handle = new CordzHandleDeleteTracker(&deleted); + EXPECT_FALSE(handle->is_snapshot()); + EXPECT_TRUE(handle->SafeToDelete()); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + + CordzHandle::Delete(handle); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_TRUE(deleted); +} + +TEST(CordzHandleTest, CordzSnapshotCreateDelete) { + auto* snapshot = new CordzSnapshot(); + EXPECT_TRUE(snapshot->is_snapshot()); + EXPECT_TRUE(snapshot->SafeToDelete()); + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot)); + delete snapshot; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzHandleTest, CordzHandleCreateDeleteWithSnapshot) { + bool deleted = false; + auto* snapshot = new CordzSnapshot(); + auto* handle = new CordzHandleDeleteTracker(&deleted); + EXPECT_FALSE(handle->SafeToDelete()); + + CordzHandle::Delete(handle); + EXPECT_THAT(DeleteQueue(), ElementsAre(handle, snapshot)); + EXPECT_FALSE(deleted); + EXPECT_FALSE(handle->SafeToDelete()); + + delete snapshot; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_TRUE(deleted); +} + +TEST(CordzHandleTest, MultiSnapshot) { + bool deleted[3] = {false, false, false}; + + CordzSnapshot* snapshot[3]; + CordzHandleDeleteTracker* handle[3]; + for (int i = 0; i < 3; ++i) { + snapshot[i] = new CordzSnapshot(); + handle[i] = new CordzHandleDeleteTracker(&deleted[i]); + CordzHandle::Delete(handle[i]); + } + + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1], + snapshot[1], handle[0], snapshot[0])); + EXPECT_THAT(deleted, ElementsAre(false, false, false)); + + delete snapshot[1]; + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1], + handle[0], snapshot[0])); + EXPECT_THAT(deleted, ElementsAre(false, false, false)); + + delete snapshot[0]; + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2])); + EXPECT_THAT(deleted, ElementsAre(true, true, false)); + + delete snapshot[2]; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_THAT(deleted, ElementsAre(true, true, deleted)); +} + +TEST(CordzHandleTest, DiagnosticsHandleIsSafeToInspect) { + CordzSnapshot snapshot1; + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(nullptr)); + + auto* handle1 = new CordzHandle(); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + + CordzHandle::Delete(handle1); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + + CordzSnapshot snapshot2; + auto* handle2 = new CordzHandle(); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle2)); + EXPECT_FALSE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle1)); + EXPECT_TRUE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle2)); + + CordzHandle::Delete(handle2); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); +} + +TEST(CordzHandleTest, DiagnosticsGetSafeToInspectDeletedHandles) { + EXPECT_THAT(DeleteQueue(), IsEmpty()); + + auto* handle = new CordzHandle(); + auto* snapshot1 = new CordzSnapshot(); + + // snapshot1 should be able to see handle. + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot1)); + EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle)); + EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(), + IsEmpty()); + + // This handle will be safe to inspect as long as snapshot1 is alive. However, + // since only snapshot1 can prove that it's alive, it will be hidden from + // snapshot2. + CordzHandle::Delete(handle); + + // This snapshot shouldn't be able to see handle because handle was already + // sent to Delete. + auto* snapshot2 = new CordzSnapshot(); + + // DeleteQueue elements are LIFO order. + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2, handle, snapshot1)); + + EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle)); + EXPECT_FALSE(snapshot2->DiagnosticsHandleIsSafeToInspect(handle)); + + EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(), + ElementsAre(handle)); + EXPECT_THAT(snapshot2->DiagnosticsGetSafeToInspectDeletedHandles(), + IsEmpty()); + + CordzHandle::Delete(snapshot1); + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2)); + + CordzHandle::Delete(snapshot2); + EXPECT_THAT(DeleteQueue(), IsEmpty()); +} + +// Create and delete CordzHandle and CordzSnapshot objects in multiple threads +// so that tsan has some time to chew on it and look for memory problems. +TEST(CordzHandleTest, MultiThreaded) { + Notification stop; + static constexpr int kNumThreads = 4; + // Keep the number of handles relatively small so that the test will naturally + // transition to an empty delete queue during the test. If there are, say, 100 + // handles, that will virtually never happen. With 10 handles and around 50k + // iterations in each of 4 threads, the delete queue appears to become empty + // around 200 times. + static constexpr int kNumHandles = 10; + + // Each thread is going to pick a random index and atomically swap its + // CordzHandle with one in handles. This way, each thread can avoid + // manipulating a CordzHandle that might be operated upon in another thread. + std::vector<std::atomic<CordzHandle*>> handles(kNumHandles); + + // global bool which is set when any thread did get some 'safe to inspect' + // handles. On some platforms and OSS tests, we might risk that some pool + // threads are starved, stalled, or just got a few unlikely random 'handle' + // coin tosses, so we satisfy this test with simply observing 'some' thread + // did something meaningful, which should minimize the potential for flakes. + std::atomic<bool> found_safe_to_inspect(false); + + { + absl::synchronization_internal::ThreadPool pool(kNumThreads); + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&stop, &handles, &found_safe_to_inspect]() { + std::minstd_rand gen; + std::uniform_int_distribution<int> dist_type(0, 2); + std::uniform_int_distribution<int> dist_handle(0, kNumHandles - 1); + + while (!stop.HasBeenNotified()) { + CordzHandle* handle; + switch (dist_type(gen)) { + case 0: + handle = new CordzHandle(); + break; + case 1: + handle = new CordzSnapshot(); + break; + default: + handle = nullptr; + break; + } + CordzHandle* old_handle = handles[dist_handle(gen)].exchange(handle); + if (old_handle != nullptr) { + std::vector<const CordzHandle*> safe_to_inspect = + old_handle->DiagnosticsGetSafeToInspectDeletedHandles(); + for (const CordzHandle* handle : safe_to_inspect) { + // We're in a tight loop, so don't generate too many error + // messages. + ASSERT_FALSE(handle->is_snapshot()); + } + if (!safe_to_inspect.empty()) { + found_safe_to_inspect.store(true); + } + CordzHandle::Delete(old_handle); + } + } + + // Have each thread attempt to clean up everything. Some thread will be + // the last to reach this cleanup code, and it will be guaranteed to + // clean up everything because nothing remains to create new handles. + for (auto& h : handles) { + if (CordzHandle* handle = h.exchange(nullptr)) { + CordzHandle::Delete(handle); + } + } + }); + } + + // The threads will hammer away. Give it a little bit of time for tsan to + // spot errors. + absl::SleepFor(absl::Seconds(3)); + stop.Notify(); + } + + // Confirm that the test did *something*. This check will be satisfied as + // long as any thread has deleted a CordzSnapshot object and a non-snapshot + // CordzHandle was deleted after the CordzSnapshot was created. + // See also comments on `found_safe_to_inspect` + EXPECT_TRUE(found_safe_to_inspect.load()); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc new file mode 100644 index 0000000000..5c18bbc566 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc @@ -0,0 +1,445 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_info.h" + +#include "absl/base/config.h" +#include "absl/base/internal/spinlock.h" +#include "absl/container/inlined_vector.h" +#include "absl/debugging/stacktrace.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ::absl::base_internal::SpinLockHolder; + +constexpr int CordzInfo::kMaxStackDepth; + +ABSL_CONST_INIT CordzInfo::List CordzInfo::global_list_{absl::kConstInit}; + +namespace { + +// CordRepAnalyzer performs the analysis of a cord. +// +// It computes absolute node counts and total memory usage, and an 'estimated +// fair share memory usage` statistic. +// Conceptually, it divides the 'memory usage' at each location in the 'cord +// graph' by the cumulative reference count of that location. The cumulative +// reference count is the factored total of all edges leading into that node. +// +// The top level node is treated specially: we assume the current thread +// (typically called from the CordzHandler) to hold a reference purely to +// perform a safe analysis, and not being part of the application. So we +// substract 1 from the reference count of the top node to compute the +// 'application fair share' excluding the reference of the current thread. +// +// An example of fair sharing, and why we multiply reference counts: +// Assume we have 2 CordReps, both being a Substring referencing a Flat: +// CordSubstring A (refcount = 5) --> child Flat C (refcount = 2) +// CordSubstring B (refcount = 9) --> child Flat C (refcount = 2) +// +// Flat C has 2 incoming edges from the 2 substrings (refcount = 2) and is not +// referenced directly anywhere else. Translated into a 'fair share', we then +// attribute 50% of the memory (memory / refcount = 2) to each incoming edge. +// Rep A has a refcount of 5, so we attribute each incoming edge 1 / 5th of the +// memory cost below it, i.e.: the fair share of Rep A of the memory used by C +// is then 'memory C / (refcount C * refcount A) + (memory A / refcount A)'. +// It is also easy to see how all incoming edges add up to 100%. +class CordRepAnalyzer { + public: + // Creates an analyzer instance binding to `statistics`. + explicit CordRepAnalyzer(CordzStatistics& statistics) + : statistics_(statistics) {} + + // Analyzes the memory statistics and node counts for the provided `rep`, and + // adds the results to `statistics`. Note that node counts and memory sizes + // are not initialized, computed values are added to any existing values. + void AnalyzeCordRep(const CordRep* rep) { + // Process all linear nodes. + // As per the class comments, use refcout - 1 on the top level node, as the + // top level node is assumed to be referenced only for analysis purposes. + size_t refcount = rep->refcount.Get(); + RepRef repref{rep, (refcount > 1) ? refcount - 1 : 1}; + + // Process all top level linear nodes (substrings and flats). + repref = CountLinearReps(repref, memory_usage_); + + if (repref.rep != nullptr) { + if (repref.rep->tag == RING) { + AnalyzeRing(repref); + } else if (repref.rep->tag == BTREE) { + AnalyzeBtree(repref); + } else if (repref.rep->tag == CONCAT) { + AnalyzeConcat(repref); + } else { + // We should have either a concat, btree, or ring node if not null. + assert(false); + } + } + + // Adds values to output + statistics_.estimated_memory_usage += memory_usage_.total; + statistics_.estimated_fair_share_memory_usage += + static_cast<size_t>(memory_usage_.fair_share); + } + + private: + // RepRef identifies a CordRep* inside the Cord tree with its cumulative + // refcount including itself. For example, a tree consisting of a substring + // with a refcount of 3 and a child flat with a refcount of 4 will have RepRef + // refcounts of 3 and 12 respectively. + struct RepRef { + const CordRep* rep; + size_t refcount; + + // Returns a 'child' RepRef which contains the cumulative reference count of + // this instance multiplied by the child's reference count. + RepRef Child(const CordRep* child) const { + return RepRef{child, refcount * child->refcount.Get()}; + } + }; + + // Memory usage values + struct MemoryUsage { + size_t total = 0; + double fair_share = 0.0; + + // Adds 'size` memory usage to this class, with a cumulative (recursive) + // reference count of `refcount` + void Add(size_t size, size_t refcount) { + total += size; + fair_share += static_cast<double>(size) / refcount; + } + }; + + // Returns `rr` if `rr.rep` is not null and a CONCAT type. + // Asserts that `rr.rep` is a concat node or null. + static RepRef AssertConcat(RepRef repref) { + const CordRep* rep = repref.rep; + assert(rep == nullptr || rep->tag == CONCAT); + return (rep != nullptr && rep->tag == CONCAT) ? repref : RepRef{nullptr, 0}; + } + + // Counts a flat of the provide allocated size + void CountFlat(size_t size) { + statistics_.node_count++; + statistics_.node_counts.flat++; + if (size <= 64) { + statistics_.node_counts.flat_64++; + } else if (size <= 128) { + statistics_.node_counts.flat_128++; + } else if (size <= 256) { + statistics_.node_counts.flat_256++; + } else if (size <= 512) { + statistics_.node_counts.flat_512++; + } else if (size <= 1024) { + statistics_.node_counts.flat_1k++; + } + } + + // Processes 'linear' reps (substring, flat, external) not requiring iteration + // or recursion. Returns RefRep{null} if all reps were processed, else returns + // the top-most non-linear concat or ring cordrep. + // Node counts are updated into `statistics_`, memory usage is update into + // `memory_usage`, which typically references `memory_usage_` except for ring + // buffers where we count children unrounded. + RepRef CountLinearReps(RepRef rep, MemoryUsage& memory_usage) { + // Consume all substrings + while (rep.rep->tag == SUBSTRING) { + statistics_.node_count++; + statistics_.node_counts.substring++; + memory_usage.Add(sizeof(CordRepSubstring), rep.refcount); + rep = rep.Child(rep.rep->substring()->child); + } + + // Consume possible FLAT + if (rep.rep->tag >= FLAT) { + size_t size = rep.rep->flat()->AllocatedSize(); + CountFlat(size); + memory_usage.Add(size, rep.refcount); + return RepRef{nullptr, 0}; + } + + // Consume possible external + if (rep.rep->tag == EXTERNAL) { + statistics_.node_count++; + statistics_.node_counts.external++; + size_t size = rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>); + memory_usage.Add(size, rep.refcount); + return RepRef{nullptr, 0}; + } + + return rep; + } + + // Analyzes the provided concat node in a flattened recursive way. + void AnalyzeConcat(RepRef rep) { + absl::InlinedVector<RepRef, 47> pending; + + while (rep.rep != nullptr) { + const CordRepConcat* concat = rep.rep->concat(); + RepRef left = rep.Child(concat->left); + RepRef right = rep.Child(concat->right); + + statistics_.node_count++; + statistics_.node_counts.concat++; + memory_usage_.Add(sizeof(CordRepConcat), rep.refcount); + + right = AssertConcat(CountLinearReps(right, memory_usage_)); + rep = AssertConcat(CountLinearReps(left, memory_usage_)); + if (rep.rep != nullptr) { + if (right.rep != nullptr) { + pending.push_back(right); + } + } else if (right.rep != nullptr) { + rep = right; + } else if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } + } + } + + // Analyzes the provided ring. + void AnalyzeRing(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.ring++; + const CordRepRing* ring = rep.rep->ring(); + memory_usage_.Add(CordRepRing::AllocSize(ring->capacity()), rep.refcount); + ring->ForEach([&](CordRepRing::index_type pos) { + CountLinearReps(rep.Child(ring->entry_child(pos)), memory_usage_); + }); + } + + // Analyzes the provided btree. + void AnalyzeBtree(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.btree++; + memory_usage_.Add(sizeof(CordRepBtree), rep.refcount); + const CordRepBtree* tree = rep.rep->btree(); + if (tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + AnalyzeBtree(rep.Child(edge)); + } + } else { + for (CordRep* edge : tree->Edges()) { + CountLinearReps(rep.Child(edge), memory_usage_); + } + } + } + + CordzStatistics& statistics_; + MemoryUsage memory_usage_; +}; + +} // namespace + +CordzInfo* CordzInfo::Head(const CordzSnapshot& snapshot) { + ABSL_ASSERT(snapshot.is_snapshot()); + + // We can do an 'unsafe' load of 'head', as we are guaranteed that the + // instance it points to is kept alive by the provided CordzSnapshot, so we + // can simply return the current value using an acquire load. + // We do enforce in DEBUG builds that the 'head' value is present in the + // delete queue: ODR violations may lead to 'snapshot' and 'global_list_' + // being in different libraries / modules. + CordzInfo* head = global_list_.head.load(std::memory_order_acquire); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(head)); + return head; +} + +CordzInfo* CordzInfo::Next(const CordzSnapshot& snapshot) const { + ABSL_ASSERT(snapshot.is_snapshot()); + + // Similar to the 'Head()' function, we do not need a mutex here. + CordzInfo* next = ci_next_.load(std::memory_order_acquire); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(this)); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(next)); + return next; +} + +void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method) { + assert(cord.is_tree()); + assert(!cord.is_profiled()); + CordzInfo* cordz_info = new CordzInfo(cord.as_tree(), nullptr, method); + cord.set_cordz_info(cordz_info); + cordz_info->Track(); +} + +void CordzInfo::TrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method) { + assert(cord.is_tree()); + assert(src.is_tree()); + + // Unsample current as we the current cord is being replaced with 'src', + // so any method history is no longer relevant. + CordzInfo* cordz_info = cord.cordz_info(); + if (cordz_info != nullptr) cordz_info->Untrack(); + + // Start new cord sample + cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method); + cord.set_cordz_info(cordz_info); + cordz_info->Track(); +} + +void CordzInfo::MaybeTrackCordImpl(InlineData& cord, const InlineData& src, + MethodIdentifier method) { + if (src.is_profiled()) { + TrackCord(cord, src, method); + } else if (cord.is_profiled()) { + cord.cordz_info()->Untrack(); + cord.clear_cordz_info(); + } +} + +CordzInfo::MethodIdentifier CordzInfo::GetParentMethod(const CordzInfo* src) { + if (src == nullptr) return MethodIdentifier::kUnknown; + return src->parent_method_ != MethodIdentifier::kUnknown ? src->parent_method_ + : src->method_; +} + +int CordzInfo::FillParentStack(const CordzInfo* src, void** stack) { + assert(stack); + if (src == nullptr) return 0; + if (src->parent_stack_depth_) { + memcpy(stack, src->parent_stack_, src->parent_stack_depth_ * sizeof(void*)); + return src->parent_stack_depth_; + } + memcpy(stack, src->stack_, src->stack_depth_ * sizeof(void*)); + return src->stack_depth_; +} + +CordzInfo::CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method) + : rep_(rep), + stack_depth_(absl::GetStackTrace(stack_, /*max_depth=*/kMaxStackDepth, + /*skip_count=*/1)), + parent_stack_depth_(FillParentStack(src, parent_stack_)), + method_(method), + parent_method_(GetParentMethod(src)), + create_time_(absl::Now()) { + update_tracker_.LossyAdd(method); + if (src) { + // Copy parent counters. + update_tracker_.LossyAdd(src->update_tracker_); + } +} + +CordzInfo::~CordzInfo() { + // `rep_` is potentially kept alive if CordzInfo is included + // in a collection snapshot (which should be rare). + if (ABSL_PREDICT_FALSE(rep_)) { + CordRep::Unref(rep_); + } +} + +void CordzInfo::Track() { + SpinLockHolder l(&list_->mutex); + + CordzInfo* const head = list_->head.load(std::memory_order_acquire); + if (head != nullptr) { + head->ci_prev_.store(this, std::memory_order_release); + } + ci_next_.store(head, std::memory_order_release); + list_->head.store(this, std::memory_order_release); +} + +void CordzInfo::Untrack() { + ODRCheck(); + { + SpinLockHolder l(&list_->mutex); + + CordzInfo* const head = list_->head.load(std::memory_order_acquire); + CordzInfo* const next = ci_next_.load(std::memory_order_acquire); + CordzInfo* const prev = ci_prev_.load(std::memory_order_acquire); + + if (next) { + ABSL_ASSERT(next->ci_prev_.load(std::memory_order_acquire) == this); + next->ci_prev_.store(prev, std::memory_order_release); + } + if (prev) { + ABSL_ASSERT(head != this); + ABSL_ASSERT(prev->ci_next_.load(std::memory_order_acquire) == this); + prev->ci_next_.store(next, std::memory_order_release); + } else { + ABSL_ASSERT(head == this); + list_->head.store(next, std::memory_order_release); + } + } + + // We can no longer be discovered: perform a fast path check if we are not + // listed on any delete queue, so we can directly delete this instance. + if (SafeToDelete()) { + UnsafeSetCordRep(nullptr); + delete this; + return; + } + + // We are likely part of a snapshot, extend the life of the CordRep + { + absl::MutexLock lock(&mutex_); + if (rep_) CordRep::Ref(rep_); + } + CordzHandle::Delete(this); +} + +void CordzInfo::Lock(MethodIdentifier method) + ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_) { + mutex_.Lock(); + update_tracker_.LossyAdd(method); + assert(rep_); +} + +void CordzInfo::Unlock() ABSL_UNLOCK_FUNCTION(mutex_) { + bool tracked = rep_ != nullptr; + mutex_.Unlock(); + if (!tracked) { + Untrack(); + } +} + +absl::Span<void* const> CordzInfo::GetStack() const { + return absl::MakeConstSpan(stack_, stack_depth_); +} + +absl::Span<void* const> CordzInfo::GetParentStack() const { + return absl::MakeConstSpan(parent_stack_, parent_stack_depth_); +} + +CordzStatistics CordzInfo::GetCordzStatistics() const { + CordzStatistics stats; + stats.method = method_; + stats.parent_method = parent_method_; + stats.update_tracker = update_tracker_; + if (CordRep* rep = RefCordRep()) { + stats.size = rep->length; + CordRepAnalyzer analyzer(stats); + analyzer.AnalyzeCordRep(rep); + CordRep::Unref(rep); + } + return stats; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_info.h b/third_party/abseil-cpp/absl/strings/internal/cordz_info.h new file mode 100644 index 0000000000..026d5b9981 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_info.h @@ -0,0 +1,298 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_INFO_H_ +#define ABSL_STRINGS_CORDZ_INFO_H_ + +#include <atomic> +#include <cstdint> +#include <functional> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cordz_functions.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzInfo tracks a profiled Cord. Each of these objects can be in two places. +// If a Cord is alive, the CordzInfo will be in the global_cordz_infos map, and +// can also be retrieved via the linked list starting with +// global_cordz_infos_head and continued via the cordz_info_next() method. When +// a Cord has reached the end of its lifespan, the CordzInfo object will be +// migrated out of the global_cordz_infos list and the global_cordz_infos_map, +// and will either be deleted or appended to the global_delete_queue. If it is +// placed on the global_delete_queue, the CordzInfo object will be cleaned in +// the destructor of a CordzSampleToken object. +class ABSL_LOCKABLE CordzInfo : public CordzHandle { + public: + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // TrackCord creates a CordzInfo instance which tracks important metrics of + // a sampled cord, and stores the created CordzInfo instance into `cord'. All + // CordzInfo instances are placed in a global list which is used to discover + // and snapshot all actively tracked cords. Callers are responsible for + // calling UntrackCord() before the tracked Cord instance is deleted, or to + // stop tracking the sampled Cord. Callers are also responsible for guarding + // changes to the 'tree' value of a Cord (InlineData.tree) through the Lock() + // and Unlock() calls. Any change resulting in a new tree value for the cord + // requires a call to SetCordRep() before the old tree has been unreffed + // and/or deleted. `method` identifies the Cord public API method initiating + // the cord to be sampled. + // Requires `cord` to hold a tree, and `cord.cordz_info()` to be null. + static void TrackCord(InlineData& cord, MethodIdentifier method); + + // Identical to TrackCord(), except that this function fills the + // `parent_stack` and `parent_method` properties of the returned CordzInfo + // instance from the provided `src` instance if `src` is sampled. + // This function should be used for sampling 'copy constructed' and 'copy + // assigned' cords. This function allows 'cord` to be already sampled, in + // which case the CordzInfo will be newly created from `src`. + static void TrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + // Maybe sample the cord identified by 'cord' for method 'method'. + // Uses `cordz_should_profile` to randomly pick cords to be sampled, and if + // so, invokes `TrackCord` to start sampling `cord`. + static void MaybeTrackCord(InlineData& cord, MethodIdentifier method); + + // Maybe sample the cord identified by 'cord' for method 'method'. + // `src` identifies a 'parent' cord which is assigned to `cord`, typically the + // input cord for a copy constructor, or an assign method such as `operator=` + // `cord` will be sampled if (and only if) `src` is sampled. + // If `cord` is currently being sampled and `src` is not being sampled, then + // this function will stop sampling the cord and reset the cord's cordz_info. + // + // Previously this function defined that `cord` will be sampled if either + // `src` is sampled, or if `cord` is randomly picked for sampling. However, + // this can cause issues, as there may be paths where some cord is assigned an + // indirect copy of it's own value. As such a 'string of copies' would then + // remain sampled (`src.is_profiled`), then assigning such a cord back to + // 'itself' creates a cycle where the cord will converge to 'always sampled`. + // + // For example: + // + // Cord x; + // for (...) { + // // Copy ctor --> y.is_profiled := x.is_profiled | random(...) + // Cord y = x; + // ... + // // Assign x = y --> x.is_profiled = y.is_profiled | random(...) + // // ==> x.is_profiled |= random(...) + // // ==> x converges to 'always profiled' + // x = y; + // } + static void MaybeTrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + // Stops tracking changes for a sampled cord, and deletes the provided info. + // This function must be called before the sampled cord instance is deleted, + // and before the root cordrep of the sampled cord is unreffed. + // This function may extend the lifetime of the cordrep in cases where the + // CordInfo instance is being held by a concurrent collection thread. + void Untrack(); + + // Invokes UntrackCord() on `info` if `info` is not null. + static void MaybeUntrackCord(CordzInfo* info); + + CordzInfo() = delete; + CordzInfo(const CordzInfo&) = delete; + CordzInfo& operator=(const CordzInfo&) = delete; + + // Retrieves the oldest existing CordzInfo. + static CordzInfo* Head(const CordzSnapshot& snapshot) + ABSL_NO_THREAD_SAFETY_ANALYSIS; + + // Retrieves the next oldest existing CordzInfo older than 'this' instance. + CordzInfo* Next(const CordzSnapshot& snapshot) const + ABSL_NO_THREAD_SAFETY_ANALYSIS; + + // Locks this instance for the update identified by `method`. + // Increases the count for `method` in `update_tracker`. + void Lock(MethodIdentifier method) ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_); + + // Unlocks this instance. If the contained `rep` has been set to null + // indicating the Cord has been cleared or is otherwise no longer sampled, + // then this method will delete this CordzInfo instance. + void Unlock() ABSL_UNLOCK_FUNCTION(mutex_); + + // Asserts that this CordzInfo instance is locked. + void AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_); + + // Updates the `rep` property of this instance. This methods is invoked by + // Cord logic each time the root node of a sampled Cord changes, and before + // the old root reference count is deleted. This guarantees that collection + // code can always safely take a reference on the tracked cord. + // Requires a lock to be held through the `Lock()` method. + // TODO(b/117940323): annotate with ABSL_EXCLUSIVE_LOCKS_REQUIRED once all + // Cord code is in a state where this can be proven true by the compiler. + void SetCordRep(CordRep* rep); + + // Returns the current `rep` property of this instance with a reference + // added, or null if this instance represents a cord that has since been + // deleted or untracked. + CordRep* RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_); + + // Returns the current value of `rep_` for testing purposes only. + CordRep* GetCordRepForTesting() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return rep_; + } + + // Sets the current value of `rep_` for testing purposes only. + void SetCordRepForTesting(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS { + rep_ = rep; + } + + // Returns the stack trace for where the cord was first sampled. Cords are + // potentially sampled when they promote from an inlined cord to a tree or + // ring representation, which is not necessarily the location where the cord + // was first created. Some cords are created as inlined cords, and only as + // data is added do they become a non-inlined cord. However, typically the + // location represents reasonably well where the cord is 'created'. + absl::Span<void* const> GetStack() const; + + // Returns the stack trace for a sampled cord's 'parent stack trace'. This + // value may be set if the cord is sampled (promoted) after being created + // from, or being assigned the value of an existing (sampled) cord. + absl::Span<void* const> GetParentStack() const; + + // Retrieves the CordzStatistics associated with this Cord. The statistics + // are only updated when a Cord goes through a mutation, such as an Append + // or RemovePrefix. + CordzStatistics GetCordzStatistics() const; + + private: + using SpinLock = absl::base_internal::SpinLock; + using SpinLockHolder = ::absl::base_internal::SpinLockHolder; + + // Global cordz info list. CordzInfo stores a pointer to the global list + // instance to harden against ODR violations. + struct List { + constexpr explicit List(absl::ConstInitType) + : mutex(absl::kConstInit, + absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + + SpinLock mutex; + std::atomic<CordzInfo*> head ABSL_GUARDED_BY(mutex){nullptr}; + }; + + static constexpr int kMaxStackDepth = 64; + + explicit CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method); + ~CordzInfo() override; + + // Sets `rep_` without holding a lock. + void UnsafeSetCordRep(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS; + + void Track(); + + // Returns the parent method from `src`, which is either `parent_method_` or + // `method_` depending on `parent_method_` being kUnknown. + // Returns kUnknown if `src` is null. + static MethodIdentifier GetParentMethod(const CordzInfo* src); + + // Fills the provided stack from `src`, copying either `parent_stack_` or + // `stack_` depending on `parent_stack_` being empty, returning the size of + // the parent stack. + // Returns 0 if `src` is null. + static int FillParentStack(const CordzInfo* src, void** stack); + + void ODRCheck() const { +#ifndef NDEBUG + ABSL_RAW_CHECK(list_ == &global_list_, "ODR violation in Cord"); +#endif + } + + // Non-inlined implementation of `MaybeTrackCord`, which is executed if + // either `src` is sampled or `cord` is sampled, and either untracks or + // tracks `cord` as documented per `MaybeTrackCord`. + static void MaybeTrackCordImpl(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + ABSL_CONST_INIT static List global_list_; + List* const list_ = &global_list_; + + // ci_prev_ and ci_next_ require the global list mutex to be held. + // Unfortunately we can't use thread annotations such that the thread safety + // analysis understands that list_ and global_list_ are one and the same. + std::atomic<CordzInfo*> ci_prev_{nullptr}; + std::atomic<CordzInfo*> ci_next_{nullptr}; + + mutable absl::Mutex mutex_; + CordRep* rep_ ABSL_GUARDED_BY(mutex_); + + void* stack_[kMaxStackDepth]; + void* parent_stack_[kMaxStackDepth]; + const int stack_depth_; + const int parent_stack_depth_; + const MethodIdentifier method_; + const MethodIdentifier parent_method_; + CordzUpdateTracker update_tracker_; + const absl::Time create_time_; +}; + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( + InlineData& cord, MethodIdentifier method) { + if (ABSL_PREDICT_FALSE(cordz_should_profile())) { + TrackCord(cord, method); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( + InlineData& cord, const InlineData& src, MethodIdentifier method) { + if (ABSL_PREDICT_FALSE(InlineData::is_either_profiled(cord, src))) { + MaybeTrackCordImpl(cord, src, method); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeUntrackCord( + CordzInfo* info) { + if (ABSL_PREDICT_FALSE(info)) { + info->Untrack(); + } +} + +inline void CordzInfo::AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_) { +#ifndef NDEBUG + mutex_.AssertHeld(); +#endif +} + +inline void CordzInfo::SetCordRep(CordRep* rep) { + AssertHeld(); + rep_ = rep; +} + +inline void CordzInfo::UnsafeSetCordRep(CordRep* rep) { rep_ = rep; } + +inline CordRep* CordzInfo::RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_) { + MutexLock lock(&mutex_); + return rep_ ? CordRep::Ref(rep_) : nullptr; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_INFO_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc new file mode 100644 index 0000000000..7430d281ca --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc @@ -0,0 +1,625 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <iostream> +#include <random> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_sample_token.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_scope.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Do not print statistics contents, the matcher prints them as needed. +inline void PrintTo(const CordzStatistics& stats, std::ostream* s) { + if (s) *s << "CordzStatistics{...}"; +} + +namespace { + +using ::testing::Ge; + +// Creates a flat of the specified allocated size +CordRepFlat* Flat(size_t size) { + // Round up to a tag size, as we are going to poke an exact tag size back into + // the allocated flat. 'size returning allocators' could grant us more than we + // wanted, but we are ok to poke the 'requested' size in the tag, even in the + // presence of sized deletes, so we need to make sure the size rounds + // perfectly to a tag value. + assert(size >= kMinFlatSize); + size = RoundUpForTag(size); + CordRepFlat* flat = CordRepFlat::New(size - kFlatOverhead); + flat->tag = AllocatedSizeToTag(size); + flat->length = size - kFlatOverhead; + return flat; +} + +// Creates an external of the specified length +CordRepExternal* External(int length = 512) { + return static_cast<CordRepExternal*>( + NewExternalRep(absl::string_view("", length), [](absl::string_view) {})); +} + +// Creates a substring on the provided rep of length - 1 +CordRepSubstring* Substring(CordRep* rep) { + auto* substring = new CordRepSubstring; + substring->length = rep->length - 1; + substring->tag = SUBSTRING; + substring->child = rep; + return substring; +} + +// Creates a concat on the provided reps +CordRepConcat* Concat(CordRep* left, CordRep* right) { + auto* concat = new CordRepConcat; + concat->length = left->length + right->length; + concat->tag = CONCAT; + concat->left = left; + concat->right = right; + return concat; +} + +// Reference count helper +struct RefHelper { + std::vector<CordRep*> refs; + + ~RefHelper() { + for (CordRep* rep : refs) { + CordRep::Unref(rep); + } + } + + // Invokes CordRep::Unref() on `rep` when this instance is destroyed. + template <typename T> + T* NeedsUnref(T* rep) { + refs.push_back(rep); + return rep; + } + + // Adds `n` reference counts to `rep` which will be unreffed when this + // instance is destroyed. + template <typename T> + T* Ref(T* rep, size_t n = 1) { + while (n--) { + NeedsUnref(CordRep::Ref(rep)); + } + return rep; + } +}; + +// Sizeof helper. Returns the allocated size of `p`, excluding any child +// elements for substring, concat and ring cord reps. +template <typename T> +size_t SizeOf(const T* rep) { + return sizeof(T); +} + +template <> +size_t SizeOf(const CordRepFlat* rep) { + return rep->AllocatedSize(); +} + +template <> +size_t SizeOf(const CordRepExternal* rep) { + // See cord.cc + return sizeof(CordRepExternalImpl<intptr_t>) + rep->length; +} + +template <> +size_t SizeOf(const CordRepRing* rep) { + return CordRepRing::AllocSize(rep->capacity()); +} + +// Computes fair share memory used in a naive 'we dare to recurse' way. +double FairShareImpl(CordRep* rep, size_t ref) { + double self = 0.0, children = 0.0; + ref *= rep->refcount.Get(); + if (rep->tag >= FLAT) { + self = SizeOf(rep->flat()); + } else if (rep->tag == EXTERNAL) { + self = SizeOf(rep->external()); + } else if (rep->tag == SUBSTRING) { + self = SizeOf(rep->substring()); + children = FairShareImpl(rep->substring()->child, ref); + } else if (rep->tag == BTREE) { + self = SizeOf(rep->btree()); + for (CordRep*edge : rep->btree()->Edges()) { + children += FairShareImpl(edge, ref); + } + } else if (rep->tag == RING) { + self = SizeOf(rep->ring()); + rep->ring()->ForEach([&](CordRepRing::index_type i) { + self += FairShareImpl(rep->ring()->entry_child(i), 1); + }); + } else if (rep->tag == CONCAT) { + self = SizeOf(rep->concat()); + children = FairShareImpl(rep->concat()->left, ref) + + FairShareImpl(rep->concat()->right, ref); + } else { + assert(false); + } + return self / ref + children; +} + +// Returns the fair share memory size from `ShareFhareImpl()` as a size_t. +size_t FairShare(CordRep* rep, size_t ref = 1) { + return static_cast<size_t>(FairShareImpl(rep, ref)); +} + +// Samples the cord and returns CordzInfo::GetStatistics() +CordzStatistics SampleCord(CordRep* rep) { + InlineData cord(rep); + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + CordzStatistics stats = cord.cordz_info()->GetCordzStatistics(); + cord.cordz_info()->Untrack(); + return stats; +} + +MATCHER_P(EqStatistics, stats, "Statistics equal expected values") { + bool ok = true; + +#define STATS_MATCHER_EXPECT_EQ(member) \ + if (stats.member != arg.member) { \ + *result_listener << "\n stats." << #member \ + << ": actual = " << arg.member << ", expected " \ + << stats.member; \ + ok = false; \ + } + + STATS_MATCHER_EXPECT_EQ(size); + STATS_MATCHER_EXPECT_EQ(node_count); + STATS_MATCHER_EXPECT_EQ(node_counts.flat); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_64); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_128); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_256); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_512); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_1k); + STATS_MATCHER_EXPECT_EQ(node_counts.external); + STATS_MATCHER_EXPECT_EQ(node_counts.concat); + STATS_MATCHER_EXPECT_EQ(node_counts.substring); + STATS_MATCHER_EXPECT_EQ(node_counts.ring); + STATS_MATCHER_EXPECT_EQ(node_counts.btree); + STATS_MATCHER_EXPECT_EQ(estimated_memory_usage); + STATS_MATCHER_EXPECT_EQ(estimated_fair_share_memory_usage); + +#undef STATS_MATCHER_EXPECT_EQ + + return ok; +} + +TEST(CordzInfoStatisticsTest, Flat) { + RefHelper ref; + auto* flat = ref.NeedsUnref(Flat(512)); + + CordzStatistics expected; + expected.size = flat->length; + expected.estimated_memory_usage = SizeOf(flat); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 1; + expected.node_counts.flat = 1; + expected.node_counts.flat_512 = 1; + + EXPECT_THAT(SampleCord(flat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedFlat) { + RefHelper ref; + auto* flat = ref.Ref(ref.NeedsUnref(Flat(64))); + + CordzStatistics expected; + expected.size = flat->length; + expected.estimated_memory_usage = SizeOf(flat); + expected.estimated_fair_share_memory_usage = SizeOf(flat) / 2; + expected.node_count = 1; + expected.node_counts.flat = 1; + expected.node_counts.flat_64 = 1; + + EXPECT_THAT(SampleCord(flat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, External) { + RefHelper ref; + auto* external = ref.NeedsUnref(External()); + + CordzStatistics expected; + expected.size = external->length; + expected.estimated_memory_usage = SizeOf(external); + expected.estimated_fair_share_memory_usage = SizeOf(external); + expected.node_count = 1; + expected.node_counts.external = 1; + + EXPECT_THAT(SampleCord(external), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedExternal) { + RefHelper ref; + auto* external = ref.Ref(ref.NeedsUnref(External())); + + CordzStatistics expected; + expected.size = external->length; + expected.estimated_memory_usage = SizeOf(external); + expected.estimated_fair_share_memory_usage = SizeOf(external) / 2; + expected.node_count = 1; + expected.node_counts.external = 1; + + EXPECT_THAT(SampleCord(external), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, Substring) { + RefHelper ref; + auto* flat = Flat(1024); + auto* substring = ref.NeedsUnref(Substring(flat)); + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(substring) + SizeOf(flat); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 2; + expected.node_counts.flat = 1; + expected.node_counts.flat_1k = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedSubstring) { + RefHelper ref; + auto* flat = ref.Ref(Flat(511), 2); + auto* substring = ref.Ref(ref.NeedsUnref(Substring(flat))); + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(flat) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = + SizeOf(substring) / 2 + SizeOf(flat) / 6; + expected.node_count = 2; + expected.node_counts.flat = 1; + expected.node_counts.flat_512 = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, Concat) { + RefHelper ref; + auto* flat1 = Flat(300); + auto* flat2 = Flat(2000); + auto* concat = ref.NeedsUnref(Concat(flat1, flat2)); + + CordzStatistics expected; + expected.size = concat->length; + expected.estimated_memory_usage = + SizeOf(concat) + SizeOf(flat1) + SizeOf(flat2); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 3; + expected.node_counts.flat = 2; + expected.node_counts.flat_512 = 1; + expected.node_counts.concat = 1; + + EXPECT_THAT(SampleCord(concat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, DeepConcat) { + RefHelper ref; + auto* flat1 = Flat(300); + auto* flat2 = Flat(2000); + auto* flat3 = Flat(400); + auto* external = External(3000); + auto* substring = Substring(external); + auto* concat1 = Concat(flat1, flat2); + auto* concat2 = Concat(flat3, substring); + auto* concat = ref.NeedsUnref(Concat(concat1, concat2)); + + CordzStatistics expected; + expected.size = concat->length; + expected.estimated_memory_usage = SizeOf(concat) * 3 + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + + expected.node_count = 8; + expected.node_counts.flat = 3; + expected.node_counts.flat_512 = 2; + expected.node_counts.external = 1; + expected.node_counts.concat = 3; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(concat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, DeepSharedConcat) { + RefHelper ref; + auto* flat1 = Flat(40); + auto* flat2 = ref.Ref(Flat(2000), 4); + auto* flat3 = Flat(70); + auto* external = ref.Ref(External(3000)); + auto* substring = ref.Ref(Substring(external), 3); + auto* concat1 = Concat(flat1, flat2); + auto* concat2 = Concat(flat3, substring); + auto* concat = ref.Ref(ref.NeedsUnref(Concat(concat1, concat2))); + + CordzStatistics expected; + expected.size = concat->length; + expected.estimated_memory_usage = SizeOf(concat) * 3 + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = FairShare(concat); + expected.node_count = 8; + expected.node_counts.flat = 3; + expected.node_counts.flat_64 = 1; + expected.node_counts.flat_128 = 1; + expected.node_counts.external = 1; + expected.node_counts.concat = 3; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(concat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, Ring) { + RefHelper ref; + auto* flat1 = Flat(240); + auto* flat2 = Flat(2000); + auto* flat3 = Flat(70); + auto* external = External(3000); + CordRepRing* ring = CordRepRing::Create(flat1); + ring = CordRepRing::Append(ring, flat2); + ring = CordRepRing::Append(ring, flat3); + ring = ref.NeedsUnref(CordRepRing::Append(ring, external)); + + CordzStatistics expected; + expected.size = ring->length; + expected.estimated_memory_usage = SizeOf(ring) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 5; + expected.node_counts.flat = 3; + expected.node_counts.flat_128 = 1; + expected.node_counts.flat_256 = 1; + expected.node_counts.external = 1; + expected.node_counts.ring = 1; + + EXPECT_THAT(SampleCord(ring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedSubstringRing) { + RefHelper ref; + auto* flat1 = ref.Ref(Flat(240)); + auto* flat2 = Flat(200); + auto* flat3 = Flat(70); + auto* external = ref.Ref(External(3000), 5); + CordRepRing* ring = CordRepRing::Create(flat1); + ring = CordRepRing::Append(ring, flat2); + ring = CordRepRing::Append(ring, flat3); + ring = ref.Ref(CordRepRing::Append(ring, external), 4); + auto* substring = ref.Ref(ref.NeedsUnref(Substring(ring))); + + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(ring) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = FairShare(substring); + expected.node_count = 6; + expected.node_counts.flat = 3; + expected.node_counts.flat_128 = 1; + expected.node_counts.flat_256 = 2; + expected.node_counts.external = 1; + expected.node_counts.ring = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, BtreeLeaf) { + ASSERT_THAT(CordRepBtree::kMaxCapacity, Ge(3)); + RefHelper ref; + auto* flat1 = Flat(2000); + auto* flat2 = Flat(200); + auto* substr = Substring(flat2); + auto* external = External(3000); + + CordRepBtree* tree = CordRepBtree::Create(flat1); + tree = CordRepBtree::Append(tree, substr); + tree = CordRepBtree::Append(tree, external); + size_t flat3_count = CordRepBtree::kMaxCapacity - 3; + size_t flat3_size = 0; + for (size_t i = 0; i < flat3_count; ++i) { + auto* flat3 = Flat(70); + flat3_size += SizeOf(flat3); + tree = CordRepBtree::Append(tree, flat3); + } + ref.NeedsUnref(tree); + + CordzStatistics expected; + expected.size = tree->length; + expected.estimated_memory_usage = SizeOf(tree) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(substr) + + flat3_size + SizeOf(external); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 1 + 3 + 1 + flat3_count; + expected.node_counts.flat = 2 + flat3_count; + expected.node_counts.flat_128 = flat3_count; + expected.node_counts.flat_256 = 1; + expected.node_counts.external = 1; + expected.node_counts.substring = 1; + expected.node_counts.btree = 1; + + EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, BtreeNodeShared) { + RefHelper ref; + static constexpr int leaf_count = 3; + const size_t flat3_count = CordRepBtree::kMaxCapacity - 3; + ASSERT_THAT(flat3_count, Ge(0)); + + CordRepBtree* tree = nullptr; + size_t mem_size = 0; + for (int i = 0; i < leaf_count; ++i) { + auto* flat1 = ref.Ref(Flat(2000), 9); + mem_size += SizeOf(flat1); + if (i == 0) { + tree = CordRepBtree::Create(flat1); + } else { + tree = CordRepBtree::Append(tree, flat1); + } + + auto* flat2 = Flat(200); + auto* substr = Substring(flat2); + mem_size += SizeOf(flat2) + SizeOf(substr); + tree = CordRepBtree::Append(tree, substr); + + auto* external = External(30); + mem_size += SizeOf(external); + tree = CordRepBtree::Append(tree, external); + + for (size_t i = 0; i < flat3_count; ++i) { + auto* flat3 = Flat(70); + mem_size += SizeOf(flat3); + tree = CordRepBtree::Append(tree, flat3); + } + + if (i == 0) { + mem_size += SizeOf(tree); + } else { + mem_size += SizeOf(tree->Edges().back()->btree()); + } + } + ref.NeedsUnref(tree); + + // Ref count: 2 for top (add 1), 5 for leaf 0 (add 4). + ref.Ref(tree, 1); + ref.Ref(tree->Edges().front(), 4); + + CordzStatistics expected; + expected.size = tree->length; + expected.estimated_memory_usage = SizeOf(tree) + mem_size; + expected.estimated_fair_share_memory_usage = FairShare(tree); + + expected.node_count = 1 + leaf_count * (1 + 3 + 1 + flat3_count); + expected.node_counts.flat = leaf_count * (2 + flat3_count); + expected.node_counts.flat_128 = leaf_count * flat3_count; + expected.node_counts.flat_256 = leaf_count; + expected.node_counts.external = leaf_count; + expected.node_counts.substring = leaf_count; + expected.node_counts.btree = 1 + leaf_count; + + EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, ThreadSafety) { + Notification stop; + static constexpr int kNumThreads = 8; + int64_t sampled_node_count = 0; + + { + absl::synchronization_internal::ThreadPool pool(kNumThreads); + + // Run analyzer thread emulating a CordzHandler collection. + pool.Schedule([&]() { + while (!stop.HasBeenNotified()) { + // Run every 10us (about 100K total collections). + absl::SleepFor(absl::Microseconds(10)); + CordzSampleToken token; + for (const CordzInfo& cord_info : token) { + CordzStatistics stats = cord_info.GetCordzStatistics(); + sampled_node_count += stats.node_count; + } + } + }); + + // Run 'application threads' + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&]() { + // Track 0 - 2 cordz infos at a time, providing permutations of 0, 1 + // and 2 CordzHandle and CordzInfo queues being active, with plenty of + // 'empty to non empty' transitions. + InlineData cords[2]; + std::minstd_rand gen; + std::uniform_int_distribution<int> coin_toss(0, 1); + + while (!stop.HasBeenNotified()) { + for (InlineData& cord : cords) { + // 50/50 flip the state of the cord + if (coin_toss(gen) != 0) { + if (cord.is_tree()) { + // 50/50 simulate delete (untrack) or 'edit to empty' + if (coin_toss(gen) != 0) { + CordzInfo::MaybeUntrackCord(cord.cordz_info()); + } else { + CordzUpdateScope scope(cord.cordz_info(), + CordzUpdateTracker::kUnknown); + scope.SetCordRep(nullptr); + } + CordRep::Unref(cord.as_tree()); + cord.set_inline_size(0); + } else { + // Coin toss to 25% ring, 25% btree, and 50% flat. + CordRep* rep = Flat(256); + if (coin_toss(gen) != 0) { + if (coin_toss(gen) != 0) { + rep = CordRepRing::Create(rep); + } else { + rep = CordRepBtree::Create(rep); + } + } + cord.make_tree(rep); + + // 50/50 sample + if (coin_toss(gen) != 0) { + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + } + } + } + } + } + for (InlineData& cord : cords) { + if (cord.is_tree()) { + CordzInfo::MaybeUntrackCord(cord.cordz_info()); + CordRep::Unref(cord.as_tree()); + } + } + }); + } + + // Run for 1 second to give memory and thread safety analyzers plenty of + // time to detect any mishaps or undefined behaviors. + absl::SleepFor(absl::Seconds(1)); + stop.Notify(); + } + + std::cout << "Sampled " << sampled_node_count << " nodes\n"; +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc new file mode 100644 index 0000000000..b98343ae79 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc @@ -0,0 +1,341 @@ +// Copyright 2019 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_info.h" + +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/debugging/stacktrace.h" +#include "absl/debugging/symbolize.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/strings/str_cat.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Ne; +using ::testing::SizeIs; + +// Used test values +auto constexpr kUnknownMethod = CordzUpdateTracker::kUnknown; +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; +auto constexpr kChildMethod = CordzUpdateTracker::kConstructorCord; +auto constexpr kUpdateMethod = CordzUpdateTracker::kAppendString; + +// Local less verbose helper +std::vector<const CordzHandle*> DeleteQueue() { + return CordzHandle::DiagnosticsGetDeleteQueue(); +} + +std::string FormatStack(absl::Span<void* const> raw_stack) { + static constexpr size_t buf_size = 1 << 14; + std::unique_ptr<char[]> buf(new char[buf_size]); + std::string output; + for (void* stackp : raw_stack) { + if (absl::Symbolize(stackp, buf.get(), buf_size)) { + absl::StrAppend(&output, " ", buf.get(), "\n"); + } + } + return output; +} + +TEST(CordzInfoTest, TrackCord) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + ASSERT_THAT(info, Ne(nullptr)); + EXPECT_FALSE(info->is_snapshot()); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info)); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep)); + info->Untrack(); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSampling) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithSampling) { + CordzSamplingIntervalHelper sample_all(1); + TestCordData parent, child; + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::TrackCord(parent.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + CordzInfo* parent_info = parent.data.cordz_info(); + CordzInfo* child_info = child.data.cordz_info(); + ASSERT_THAT(child_info, Ne(nullptr)); + EXPECT_THAT(child_info->GetCordRepForTesting(), Eq(child.rep.rep)); + EXPECT_THAT(child_info->GetParentStack(), parent_info->GetStack()); + parent_info->Untrack(); + child_info->Untrack(); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithSamplingChildSampled) { + CordzSamplingIntervalHelper sample_all(1); + TestCordData parent, child; + CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, UntrackCord) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + info->Untrack(); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzInfoTest, UntrackCordWithSnapshot) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + CordzSnapshot snapshot; + info->Untrack(); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr)); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep)); + EXPECT_THAT(DeleteQueue(), ElementsAre(info, &snapshot)); +} + +TEST(CordzInfoTest, SetCordRep) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + TestCordRep rep; + info->Lock(CordzUpdateTracker::kAppendCord); + info->SetCordRep(rep.rep); + info->Unlock(); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(rep.rep)); + + info->Untrack(); +} + +TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + info->Lock(CordzUpdateTracker::kAppendString); + info->SetCordRep(nullptr); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(nullptr)); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info)); + + info->Unlock(); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr)); +} + +TEST(CordzInfoTest, RefCordRep) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + size_t refcount = data.rep.rep->refcount.Get(); + EXPECT_THAT(info->RefCordRep(), Eq(data.rep.rep)); + EXPECT_THAT(data.rep.rep->refcount.Get(), Eq(refcount + 1)); + CordRep::Unref(data.rep.rep); + info->Untrack(); +} + +#if GTEST_HAS_DEATH_TEST + +TEST(CordzInfoTest, SetCordRepRequiresMutex) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + TestCordRep rep; + EXPECT_DEBUG_DEATH(info->SetCordRep(rep.rep), ".*"); + info->Untrack(); +} + +#endif // GTEST_HAS_DEATH_TEST + +TEST(CordzInfoTest, TrackUntrackHeadFirstV2) { + CordzSnapshot snapshot; + EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); + + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info1 = data.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + TestCordData data2; + CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo* info2 = data2.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info2->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info1->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); +} + +TEST(CordzInfoTest, TrackUntrackTailFirstV2) { + CordzSnapshot snapshot; + EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); + + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info1 = data.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + TestCordData data2; + CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo* info2 = data2.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info1->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(nullptr)); + + info2->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); +} + +TEST(CordzInfoTest, StackV2) { + TestCordData data; + // kMaxStackDepth is intentionally less than 64 (which is the max depth that + // Cordz will record) because if the actual stack depth is over 64 + // (which it is on Apple platforms) then the expected_stack will end up + // catching a few frames at the end that the actual_stack didn't get and + // it will no longer be subset. At the time of this writing 58 is the max + // that will allow this test to pass (with a minimum os version of iOS 9), so + // rounded down to 50 to hopefully not run into this in the future if Apple + // makes small modifications to its testing stack. 50 is sufficient to prove + // that we got a decent stack. + static constexpr int kMaxStackDepth = 50; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + std::vector<void*> local_stack; + local_stack.resize(kMaxStackDepth); + // In some environments we don't get stack traces. For example in Android + // absl::GetStackTrace will return 0 indicating it didn't find any stack. The + // resultant formatted stack will be "", but that still equals the stack + // recorded in CordzInfo, which is also empty. The skip_count is 1 so that the + // line number of the current stack isn't included in the HasSubstr check. + local_stack.resize(absl::GetStackTrace(local_stack.data(), kMaxStackDepth, + /*skip_count=*/1)); + + std::string got_stack = FormatStack(info->GetStack()); + std::string expected_stack = FormatStack(local_stack); + // If TrackCord is inlined, got_stack should match expected_stack. If it isn't + // inlined, got_stack should include an additional frame not present in + // expected_stack. Either way, expected_stack should be a substring of + // got_stack. + EXPECT_THAT(got_stack, HasSubstr(expected_stack)); + + info->Untrack(); +} + +// Local helper functions to get different stacks for child and parent. +CordzInfo* TrackChildCord(InlineData& data, const InlineData& parent) { + CordzInfo::TrackCord(data, parent, kChildMethod); + return data.cordz_info(); +} +CordzInfo* TrackParentCord(InlineData& data) { + CordzInfo::TrackCord(data, kTrackCordMethod); + return data.cordz_info(); +} + +TEST(CordzInfoTest, GetStatistics) { + TestCordData data; + CordzInfo* info = TrackParentCord(data.data); + + CordzStatistics statistics = info->GetCordzStatistics(); + EXPECT_THAT(statistics.size, Eq(data.rep.rep->length)); + EXPECT_THAT(statistics.method, Eq(kTrackCordMethod)); + EXPECT_THAT(statistics.parent_method, Eq(kUnknownMethod)); + EXPECT_THAT(statistics.update_tracker.Value(kTrackCordMethod), Eq(1)); + + info->Untrack(); +} + +TEST(CordzInfoTest, LockCountsMethod) { + TestCordData data; + CordzInfo* info = TrackParentCord(data.data); + + info->Lock(kUpdateMethod); + info->Unlock(); + info->Lock(kUpdateMethod); + info->Unlock(); + + CordzStatistics statistics = info->GetCordzStatistics(); + EXPECT_THAT(statistics.update_tracker.Value(kUpdateMethod), Eq(2)); + + info->Untrack(); +} + +TEST(CordzInfoTest, FromParent) { + TestCordData parent; + TestCordData child; + CordzInfo* info_parent = TrackParentCord(parent.data); + CordzInfo* info_child = TrackChildCord(child.data, parent.data); + + std::string stack = FormatStack(info_parent->GetStack()); + std::string parent_stack = FormatStack(info_child->GetParentStack()); + EXPECT_THAT(stack, Eq(parent_stack)); + + CordzStatistics statistics = info_child->GetCordzStatistics(); + EXPECT_THAT(statistics.size, Eq(child.rep.rep->length)); + EXPECT_THAT(statistics.method, Eq(kChildMethod)); + EXPECT_THAT(statistics.parent_method, Eq(kTrackCordMethod)); + EXPECT_THAT(statistics.update_tracker.Value(kChildMethod), Eq(1)); + + info_parent->Untrack(); + info_child->Untrack(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.cc new file mode 100644 index 0000000000..ba1270d8f0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.cc @@ -0,0 +1,64 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_sample_token.h" + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +CordzSampleToken::Iterator& CordzSampleToken::Iterator::operator++() { + if (current_) { + current_ = current_->Next(*token_); + } + return *this; +} + +CordzSampleToken::Iterator CordzSampleToken::Iterator::operator++(int) { + Iterator it(*this); + operator++(); + return it; +} + +bool operator==(const CordzSampleToken::Iterator& lhs, + const CordzSampleToken::Iterator& rhs) { + return lhs.current_ == rhs.current_ && + (lhs.current_ == nullptr || lhs.token_ == rhs.token_); +} + +bool operator!=(const CordzSampleToken::Iterator& lhs, + const CordzSampleToken::Iterator& rhs) { + return !(lhs == rhs); +} + +CordzSampleToken::Iterator::reference CordzSampleToken::Iterator::operator*() + const { + return *current_; +} + +CordzSampleToken::Iterator::pointer CordzSampleToken::Iterator::operator->() + const { + return current_; +} + +CordzSampleToken::Iterator::Iterator(const CordzSampleToken* token) + : token_(token), current_(CordzInfo::Head(*token)) {} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.h b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.h new file mode 100644 index 0000000000..28a1d70ccc --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.h @@ -0,0 +1,97 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" + +#ifndef ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ +#define ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// The existence of a CordzSampleToken guarantees that a reader can traverse the +// global_cordz_infos_head linked-list without needing to hold a mutex. When a +// CordzSampleToken exists, all CordzInfo objects that would be destroyed are +// instead appended to a deletion queue. When the CordzSampleToken is destroyed, +// it will also clean up any of these CordzInfo objects. +// +// E.g., ST are CordzSampleToken objects and CH are CordzHandle objects. +// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail +// +// This list tracks that CH1 and CH2 were created after ST1, so the thread +// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was +// created later, so the thread holding the ST2 token cannot have a reference to +// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1, +// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only +// delete ST2. +// +// If ST1 is cleaned up first, the new list will be: +// ST2 <- CH3 <- global_delete_queue_tail +// +// If ST2 is cleaned up first, the new list will be: +// ST1 <- CH1 <- CH2 <- CH3 <- global_delete_queue_tail +// +// All new CordzHandle objects are appended to the list, so if a new thread +// comes along before either ST1 or ST2 are cleaned up, the new list will be: +// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- ST3 <- global_delete_queue_tail +// +// A thread must hold the global_delete_queue_mu mutex whenever it's altering +// this list. +// +// It is safe for thread that holds a CordzSampleToken to read +// global_cordz_infos at any time since the objects it is able to retrieve will +// not be deleted while the CordzSampleToken exists. +class CordzSampleToken : public CordzSnapshot { + public: + class Iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = const CordzInfo&; + using difference_type = ptrdiff_t; + using pointer = const CordzInfo*; + using reference = value_type; + + Iterator() = default; + + Iterator& operator++(); + Iterator operator++(int); + friend bool operator==(const Iterator& lhs, const Iterator& rhs); + friend bool operator!=(const Iterator& lhs, const Iterator& rhs); + reference operator*() const; + pointer operator->() const; + + private: + friend class CordzSampleToken; + explicit Iterator(const CordzSampleToken* token); + + const CordzSampleToken* token_ = nullptr; + pointer current_ = nullptr; + }; + + CordzSampleToken() = default; + CordzSampleToken(const CordzSampleToken&) = delete; + CordzSampleToken& operator=(const CordzSampleToken&) = delete; + + Iterator begin() { return Iterator(this); } + Iterator end() { return Iterator(); } +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc new file mode 100644 index 0000000000..9f54301d68 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc @@ -0,0 +1,208 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_sample_token.h" + +#include <memory> +#include <type_traits> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/random/random.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Ne; + +// Used test values +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; + +TEST(CordzSampleTokenTest, IteratorTraits) { + static_assert(std::is_copy_constructible<CordzSampleToken::Iterator>::value, + ""); + static_assert(std::is_copy_assignable<CordzSampleToken::Iterator>::value, ""); + static_assert(std::is_move_constructible<CordzSampleToken::Iterator>::value, + ""); + static_assert(std::is_move_assignable<CordzSampleToken::Iterator>::value, ""); + static_assert( + std::is_same< + std::iterator_traits<CordzSampleToken::Iterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::value_type, + const CordzInfo&>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<CordzSampleToken::Iterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::pointer, + const CordzInfo*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::reference, + const CordzInfo&>::value, + ""); +} + +TEST(CordzSampleTokenTest, IteratorEmpty) { + CordzSampleToken token; + EXPECT_THAT(token.begin(), Eq(token.end())); +} + +TEST(CordzSampleTokenTest, Iterator) { + TestCordData cord1, cord2, cord3; + CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo* info1 = cord1.data.cordz_info(); + CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo* info2 = cord2.data.cordz_info(); + CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo* info3 = cord3.data.cordz_info(); + + CordzSampleToken token; + std::vector<const CordzInfo*> found; + for (const CordzInfo& cord_info : token) { + found.push_back(&cord_info); + } + + EXPECT_THAT(found, ElementsAre(info3, info2, info1)); + + info1->Untrack(); + info2->Untrack(); + info3->Untrack(); +} + +TEST(CordzSampleTokenTest, IteratorEquality) { + TestCordData cord1; + TestCordData cord2; + TestCordData cord3; + CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo* info1 = cord1.data.cordz_info(); + + CordzSampleToken token1; + // lhs starts with the CordzInfo corresponding to cord1 at the head. + CordzSampleToken::Iterator lhs = token1.begin(); + + CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo* info2 = cord2.data.cordz_info(); + + CordzSampleToken token2; + // rhs starts with the CordzInfo corresponding to cord2 at the head. + CordzSampleToken::Iterator rhs = token2.begin(); + + CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo* info3 = cord3.data.cordz_info(); + + // lhs is on cord1 while rhs is on cord2. + EXPECT_THAT(lhs, Ne(rhs)); + + rhs++; + // lhs and rhs are both on cord1, but they didn't come from the same + // CordzSampleToken. + EXPECT_THAT(lhs, Ne(rhs)); + + lhs++; + rhs++; + // Both lhs and rhs are done, so they are on nullptr. + EXPECT_THAT(lhs, Eq(rhs)); + + info1->Untrack(); + info2->Untrack(); + info3->Untrack(); +} + +TEST(CordzSampleTokenTest, MultiThreaded) { + Notification stop; + static constexpr int kNumThreads = 4; + static constexpr int kNumCords = 3; + static constexpr int kNumTokens = 3; + absl::synchronization_internal::ThreadPool pool(kNumThreads); + + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&stop]() { + absl::BitGen gen; + TestCordData cords[kNumCords]; + std::unique_ptr<CordzSampleToken> tokens[kNumTokens]; + + while (!stop.HasBeenNotified()) { + // Randomly perform one of five actions: + // 1) Untrack + // 2) Track + // 3) Iterate over Cords visible to a token. + // 4) Unsample + // 5) Sample + int index = absl::Uniform(gen, 0, kNumCords); + if (absl::Bernoulli(gen, 0.5)) { + TestCordData& cord = cords[index]; + // Track/untrack. + if (cord.data.is_profiled()) { + // 1) Untrack + cord.data.cordz_info()->Untrack(); + cord.data.clear_cordz_info();; + } else { + // 2) Track + CordzInfo::TrackCord(cord.data, kTrackCordMethod); + } + } else { + std::unique_ptr<CordzSampleToken>& token = tokens[index]; + if (token) { + if (absl::Bernoulli(gen, 0.5)) { + // 3) Iterate over Cords visible to a token. + for (const CordzInfo& info : *token) { + // This is trivial work to allow us to compile the loop. + EXPECT_THAT(info.Next(*token), Ne(&info)); + } + } else { + // 4) Unsample + token = nullptr; + } + } else { + // 5) Sample + token = absl::make_unique<CordzSampleToken>(); + } + } + } + for (TestCordData& cord : cords) { + CordzInfo::MaybeUntrackCord(cord.data.cordz_info()); + } + }); + } + // The threads will hammer away. Give it a little bit of time for tsan to + // spot errors. + absl::SleepFor(absl::Seconds(3)); + stop.Notify(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_statistics.h b/third_party/abseil-cpp/absl/strings/internal/cordz_statistics.h new file mode 100644 index 0000000000..da4c7dbb8c --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_statistics.h @@ -0,0 +1,87 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ + +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzStatistics captures some meta information about a Cord's shape. +struct CordzStatistics { + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // Node counts information + struct NodeCounts { + size_t flat = 0; // #flats + size_t flat_64 = 0; // #flats up to 64 bytes + size_t flat_128 = 0; // #flats up to 128 bytes + size_t flat_256 = 0; // #flats up to 256 bytes + size_t flat_512 = 0; // #flats up to 512 bytes + size_t flat_1k = 0; // #flats up to 1K bytes + size_t external = 0; // #external reps + size_t substring = 0; // #substring reps + size_t concat = 0; // #concat reps + size_t ring = 0; // #ring buffer reps + size_t btree = 0; // #btree reps + }; + + // The size of the cord in bytes. This matches the result of Cord::size(). + int64_t size = 0; + + // The estimated memory used by the sampled cord. This value matches the + // value as reported by Cord::EstimatedMemoryUsage(). + // A value of 0 implies the property has not been recorded. + int64_t estimated_memory_usage = 0; + + // The effective memory used by the sampled cord, inversely weighted by the + // effective indegree of each allocated node. This is a representation of the + // fair share of memory usage that should be attributed to the sampled cord. + // This value is more useful for cases where one or more nodes are referenced + // by multiple Cord instances, and for cases where a Cord includes the same + // node multiple times (either directly or indirectly). + // A value of 0 implies the property has not been recorded. + int64_t estimated_fair_share_memory_usage = 0; + + // The total number of nodes referenced by this cord. + // For ring buffer Cords, this includes the 'ring buffer' node. + // For btree Cords, this includes all 'CordRepBtree' tree nodes as well as all + // the substring, flat and external nodes referenced by the tree. + // A value of 0 implies the property has not been recorded. + int64_t node_count = 0; + + // Detailed node counts per type + NodeCounts node_counts; + + // The cord method responsible for sampling the cord. + MethodIdentifier method = MethodIdentifier::kUnknown; + + // The cord method responsible for sampling the parent cord if applicable. + MethodIdentifier parent_method = MethodIdentifier::kUnknown; + + // Update tracker tracking invocation count per cord method. + CordzUpdateTracker update_tracker; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope.h b/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope.h new file mode 100644 index 0000000000..57ba75de93 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope.h @@ -0,0 +1,71 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ + +#include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzUpdateScope scopes an update to the provided CordzInfo. +// The class invokes `info->Lock(method)` and `info->Unlock()` to guard +// cordrep updates. This class does nothing if `info` is null. +// See also the 'Lock`, `Unlock` and `SetCordRep` methods in `CordzInfo`. +class ABSL_SCOPED_LOCKABLE CordzUpdateScope { + public: + CordzUpdateScope(CordzInfo* info, CordzUpdateTracker::MethodIdentifier method) + ABSL_EXCLUSIVE_LOCK_FUNCTION(info) + : info_(info) { + if (ABSL_PREDICT_FALSE(info_)) { + info->Lock(method); + } + } + + // CordzUpdateScope can not be copied or assigned to. + CordzUpdateScope(CordzUpdateScope&& rhs) = delete; + CordzUpdateScope(const CordzUpdateScope&) = delete; + CordzUpdateScope& operator=(CordzUpdateScope&& rhs) = delete; + CordzUpdateScope& operator=(const CordzUpdateScope&) = delete; + + ~CordzUpdateScope() ABSL_UNLOCK_FUNCTION() { + if (ABSL_PREDICT_FALSE(info_)) { + info_->Unlock(); + } + } + + void SetCordRep(CordRep* rep) const { + if (ABSL_PREDICT_FALSE(info_)) { + info_->SetCordRep(rep); + } + } + + CordzInfo* info() const { return info_; } + + private: + CordzInfo* info_; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc new file mode 100644 index 0000000000..3d08c622d0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc @@ -0,0 +1,49 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_update_scope.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +// Used test values +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; + +TEST(CordzUpdateScopeTest, ScopeNullptr) { + CordzUpdateScope scope(nullptr, kTrackCordMethod); +} + +TEST(CordzUpdateScopeTest, ScopeSampledCord) { + TestCordData cord; + CordzInfo::TrackCord(cord.data, kTrackCordMethod); + CordzUpdateScope scope(cord.data.cordz_info(), kTrackCordMethod); + cord.data.cordz_info()->SetCordRep(nullptr); +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace cord_internal + +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker.h b/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker.h new file mode 100644 index 0000000000..1f764486eb --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker.h @@ -0,0 +1,121 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ + +#include <atomic> +#include <cstdint> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzUpdateTracker tracks counters for Cord update methods. +// +// The purpose of CordzUpdateTracker is to track the number of calls to methods +// updating Cord data for sampled cords. The class internally uses 'lossy' +// atomic operations: Cord is thread-compatible, so there is no need to +// synchronize updates. However, Cordz collection threads may call 'Value()' at +// any point, so the class needs to provide thread safe access. +// +// This class is thread-safe. But as per above comments, all non-const methods +// should be used single-threaded only: updates are thread-safe but lossy. +class CordzUpdateTracker { + public: + // Tracked update methods. + enum MethodIdentifier { + kUnknown, + kAppendBuffer, + kAppendCord, + kAppendExternalMemory, + kAppendString, + kAssignCord, + kAssignString, + kClear, + kConstructorCord, + kConstructorString, + kCordReader, + kFlatten, + kGetAppendRegion, + kMakeCordFromExternal, + kMoveAppendCord, + kMoveAssignCord, + kMovePrependCord, + kPrependBuffer, + kPrependCord, + kPrependString, + kRemovePrefix, + kRemoveSuffix, + kSubCord, + + // kNumMethods defines the number of entries: must be the last entry. + kNumMethods, + }; + + // Constructs a new instance. All counters are zero-initialized. + constexpr CordzUpdateTracker() noexcept : values_{} {} + + // Copy constructs a new instance. + CordzUpdateTracker(const CordzUpdateTracker& rhs) noexcept { *this = rhs; } + + // Assigns the provided value to this instance. + CordzUpdateTracker& operator=(const CordzUpdateTracker& rhs) noexcept { + for (int i = 0; i < kNumMethods; ++i) { + values_[i].store(rhs.values_[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + } + return *this; + } + + // Returns the value for the specified method. + int64_t Value(MethodIdentifier method) const { + return values_[method].load(std::memory_order_relaxed); + } + + // Increases the value for the specified method by `n` + void LossyAdd(MethodIdentifier method, int64_t n = 1) { + auto& value = values_[method]; + value.store(value.load(std::memory_order_relaxed) + n, + std::memory_order_relaxed); + } + + // Adds all the values from `src` to this instance + void LossyAdd(const CordzUpdateTracker& src) { + for (int i = 0; i < kNumMethods; ++i) { + MethodIdentifier method = static_cast<MethodIdentifier>(i); + if (int64_t value = src.Value(method)) { + LossyAdd(method, value); + } + } + } + + private: + // Until C++20 std::atomic is not constexpr default-constructible, so we need + // a wrapper for this class to be constexpr constructible. + class Counter : public std::atomic<int64_t> { + public: + constexpr Counter() noexcept : std::atomic<int64_t>(0) {} + }; + + Counter values_[kNumMethods]; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker_test.cc new file mode 100644 index 0000000000..2348a17585 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker_test.cc @@ -0,0 +1,145 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_update_tracker.h" + +#include <array> +#include <thread> // NOLINT + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/synchronization/notification.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +using Method = CordzUpdateTracker::MethodIdentifier; +using Methods = std::array<Method, Method::kNumMethods>; + +// Returns an array of all methods defined in `MethodIdentifier` +Methods AllMethods() { + return Methods{Method::kUnknown, + Method::kAppendBuffer, + Method::kAppendCord, + Method::kAppendExternalMemory, + Method::kAppendString, + Method::kAssignCord, + Method::kAssignString, + Method::kClear, + Method::kConstructorCord, + Method::kConstructorString, + Method::kCordReader, + Method::kFlatten, + Method::kGetAppendRegion, + Method::kMakeCordFromExternal, + Method::kMoveAppendCord, + Method::kMoveAssignCord, + Method::kMovePrependCord, + Method::kPrependBuffer, + Method::kPrependCord, + Method::kPrependString, + Method::kRemovePrefix, + Method::kRemoveSuffix, + Method::kSubCord}; +} + +TEST(CordzUpdateTracker, IsConstExprAndInitializesToZero) { + constexpr CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + ASSERT_THAT(tracker.Value(method), Eq(0)); + } +} + +TEST(CordzUpdateTracker, LossyAdd) { + int64_t n = 1; + CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + tracker.LossyAdd(method, n); + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, CopyConstructor) { + int64_t n = 1; + CordzUpdateTracker src; + for (Method method : AllMethods()) { + src.LossyAdd(method, n); + n += 2; + } + + n = 1; + CordzUpdateTracker tracker(src); + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, OperatorAssign) { + int64_t n = 1; + CordzUpdateTracker src; + CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + src.LossyAdd(method, n); + n += 2; + } + + n = 1; + tracker = src; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, ThreadSanitizedValueCheck) { + absl::Notification done; + CordzUpdateTracker tracker; + + std::thread reader([&done, &tracker] { + while (!done.HasBeenNotified()) { + int n = 1; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), AnyOf(Eq(n), Eq(0))); + n += 2; + } + } + int n = 1; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } + }); + + int64_t n = 1; + for (Method method : AllMethods()) { + tracker.LossyAdd(method, n); + n += 2; + } + done.Notify(); + reader.join(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/numbers_test_common.h b/third_party/abseil-cpp/absl/strings/internal/numbers_test_common.h index 1a1e50c4d8..eaa88a8897 100644 --- a/third_party/abseil-cpp/absl/strings/internal/numbers_test_common.h +++ b/third_party/abseil-cpp/absl/strings/internal/numbers_test_common.h @@ -170,7 +170,7 @@ inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() { {"0x1234", true, 16, 0x1234}, - // Base-10 std::string version. + // Base-10 string version. {"1234", true, 0, 1234}, {nullptr, false, 0, 0}, }}; diff --git a/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h b/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h index e42628e394..49859dcc7d 100644 --- a/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h +++ b/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h @@ -17,6 +17,7 @@ #ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ #define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ +#include <algorithm> #include <string> #include <type_traits> #include <utility> @@ -28,8 +29,9 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { -// Is a subclass of true_type or false_type, depending on whether or not -// T has a __resize_default_init member. +// In this type trait, we look for a __resize_default_init member function, and +// we use it if available, otherwise, we use resize. We provide HasMember to +// indicate whether __resize_default_init is present. template <typename string_type, typename = void> struct ResizeUninitializedTraits { using HasMember = std::false_type; @@ -66,6 +68,50 @@ inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { ResizeUninitializedTraits<string_type>::Resize(s, new_size); } +// Used to ensure exponential growth so that the amortized complexity of +// increasing the string size by a small amount is O(1), in contrast to +// O(str->size()) in the case of precise growth. +template <typename string_type> +void STLStringReserveAmortized(string_type* s, size_t new_size) { + const size_t cap = s->capacity(); + if (new_size > cap) { + // Make sure to always grow by at least a factor of 2x. + s->reserve((std::max)(new_size, 2 * cap)); + } +} + +// In this type trait, we look for an __append_default_init member function, and +// we use it if available, otherwise, we use append. +template <typename string_type, typename = void> +struct AppendUninitializedTraits { + static void Append(string_type* s, size_t n) { + s->append(n, typename string_type::value_type()); + } +}; + +template <typename string_type> +struct AppendUninitializedTraits< + string_type, absl::void_t<decltype(std::declval<string_type&>() + .__append_default_init(237))> > { + static void Append(string_type* s, size_t n) { + s->__append_default_init(n); + } +}; + +// Like STLStringResizeUninitialized(str, new_size), except guaranteed to use +// exponential growth so that the amortized complexity of increasing the string +// size by a small amount is O(1), in contrast to O(str->size()) in the case of +// precise growth. +template <typename string_type> +void STLStringResizeUninitializedAmortized(string_type* s, size_t new_size) { + const size_t size = s->size(); + if (new_size > size) { + AppendUninitializedTraits<string_type>::Append(s, new_size - size); + } else { + s->erase(new_size); + } +} + } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized_test.cc b/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized_test.cc index 0f8b3c2a95..ad1b9c58f3 100644 --- a/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized_test.cc @@ -19,64 +19,115 @@ namespace { int resize_call_count = 0; +int append_call_count = 0; // A mock string class whose only purpose is to track how many times its -// resize() method has been called. +// resize()/append() methods have been called. struct resizable_string { + using value_type = char; size_t size() const { return 0; } + size_t capacity() const { return 0; } char& operator[](size_t) { static char c = '\0'; return c; } void resize(size_t) { resize_call_count += 1; } + void append(size_t, value_type) { append_call_count += 1; } + void reserve(size_t) {} + resizable_string& erase(size_t = 0, size_t = 0) { return *this; } }; int resize_default_init_call_count = 0; +int append_default_init_call_count = 0; // A mock string class whose only purpose is to track how many times its -// resize() and __resize_default_init() methods have been called. -struct resize_default_init_string { +// resize()/__resize_default_init()/append()/__append_default_init() methods +// have been called. +struct default_init_string { size_t size() const { return 0; } + size_t capacity() const { return 0; } char& operator[](size_t) { static char c = '\0'; return c; } void resize(size_t) { resize_call_count += 1; } void __resize_default_init(size_t) { resize_default_init_call_count += 1; } + void __append_default_init(size_t) { append_default_init_call_count += 1; } + void reserve(size_t) {} + default_init_string& erase(size_t = 0, size_t = 0) { return *this; } }; TEST(ResizeUninit, WithAndWithout) { resize_call_count = 0; + append_call_count = 0; resize_default_init_call_count = 0; + append_default_init_call_count = 0; { resizable_string rs; EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); EXPECT_FALSE( absl::strings_internal::STLStringSupportsNontrashingResize(&rs)); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); absl::strings_internal::STLStringResizeUninitialized(&rs, 237); EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitializedAmortized(&rs, 1000); + EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(append_call_count, 1); + EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); } resize_call_count = 0; + append_call_count = 0; resize_default_init_call_count = 0; + append_default_init_call_count = 0; { - resize_default_init_string rus; + default_init_string rus; EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); EXPECT_TRUE( absl::strings_internal::STLStringSupportsNontrashingResize(&rus)); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); absl::strings_internal::STLStringResizeUninitialized(&rus, 237); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 1); + EXPECT_EQ(append_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitializedAmortized(&rus, 1000); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 1); + EXPECT_EQ(append_default_init_call_count, 1); + } +} + +TEST(ResizeUninit, Amortized) { + std::string str; + size_t prev_cap = str.capacity(); + int cap_increase_count = 0; + for (int i = 0; i < 1000; ++i) { + absl::strings_internal::STLStringResizeUninitializedAmortized(&str, i); + size_t new_cap = str.capacity(); + if (new_cap > prev_cap) ++cap_increase_count; + prev_cap = new_cap; } + EXPECT_LT(cap_increase_count, 50); } } // namespace diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/arg.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/arg.cc index 4d0604e00c..e28a29b171 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/arg.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/arg.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // // POSIX spec: // http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html @@ -12,14 +26,13 @@ #include "absl/base/port.h" #include "absl/strings/internal/str_format/float_conversion.h" +#include "absl/strings/numbers.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { -const char kDigit[2][32] = { "0123456789abcdef", "0123456789ABCDEF" }; - // Reduce *capacity by s.size(), clipped to a 0 minimum. void ReducePadding(string_view s, size_t *capacity) { *capacity = Excess(s.size(), *capacity); @@ -48,125 +61,179 @@ struct IsSigned<absl::int128> : std::true_type {}; template <> struct IsSigned<absl::uint128> : std::false_type {}; -class ConvertedIntInfo { +// Integral digit printer. +// Call one of the PrintAs* routines after construction once. +// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results. +class IntDigits { public: + // Print the unsigned integer as octal. + // Supports unsigned integral types and uint128. template <typename T> - ConvertedIntInfo(T v, ConversionChar conv) { - using Unsigned = typename MakeUnsigned<T>::type; - auto u = static_cast<Unsigned>(v); - if (IsNeg(v)) { - is_neg_ = true; - u = Unsigned{} - u; - } else { - is_neg_ = false; + void PrintAsOct(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + do { + *--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7)); + v >>= 3; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the signed or unsigned integer as decimal. + // Supports all integral types. + template <typename T> + void PrintAsDec(T v) { + static_assert(std::is_integral<T>::value, ""); + start_ = storage_; + size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_; + } + + void PrintAsDec(int128 v) { + auto u = static_cast<uint128>(v); + bool add_neg = false; + if (v < 0) { + add_neg = true; + u = uint128{} - u; } - UnsignedToStringRight(u, conv); + PrintAsDec(u, add_neg); } - string_view digits() const { - return {end() - size_, static_cast<size_t>(size_)}; + void PrintAsDec(uint128 v, bool add_neg = false) { + // This function can be sped up if needed. We can call FastIntToBuffer + // twice, or fix FastIntToBuffer to support uint128. + char *p = storage_ + sizeof(storage_); + do { + p -= 2; + numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p); + v /= 100; + } while (v); + if (p[0] == '0') { + // We printed one too many hexits. + ++p; + } + if (add_neg) { + *--p = '-'; + } + size_ = storage_ + sizeof(storage_) - p; + start_ = p; } - bool is_neg() const { return is_neg_; } - private: - template <typename T, bool IsSigned> - struct IsNegImpl { - static bool Eval(T v) { return v < 0; } - }; + // Print the unsigned integer as hex using lowercase. + // Supports unsigned integral types and uint128. template <typename T> - struct IsNegImpl<T, false> { - static bool Eval(T) { - return false; + void PrintAsHexLower(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + do { + p -= 2; + constexpr const char* table = numbers_internal::kHexTable; + std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2); + if (sizeof(T) == 1) break; + v >>= 8; + } while (v); + if (p[0] == '0') { + // We printed one too many digits. + ++p; } - }; + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + // Print the unsigned integer as hex using uppercase. + // Supports unsigned integral types and uint128. template <typename T> - bool IsNeg(T v) { - return IsNegImpl<T, IsSigned<T>::value>::Eval(v); + void PrintAsHexUpper(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + // kHexTable is only lowercase, so do it manually for uppercase. + do { + *--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15]; + v >>= 4; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; } - template <typename T> - void UnsignedToStringRight(T u, ConversionChar conv) { - char *p = end(); - switch (FormatConversionCharRadix(conv)) { - default: - case 10: - for (; u; u /= 10) - *--p = static_cast<char>('0' + static_cast<size_t>(u % 10)); - break; - case 8: - for (; u; u /= 8) - *--p = static_cast<char>('0' + static_cast<size_t>(u % 8)); - break; - case 16: { - const char *digits = kDigit[FormatConversionCharIsUpper(conv) ? 1 : 0]; - for (; u; u /= 16) *--p = digits[static_cast<size_t>(u % 16)]; - break; - } - } - size_ = static_cast<int>(end() - p); + // The printed value including the '-' sign if available. + // For inputs of value `0`, this will return "0" + string_view with_neg_and_zero() const { return {start_, size_}; } + + // The printed value not including the '-' sign. + // For inputs of value `0`, this will return "". + string_view without_neg_or_zero() const { + static_assert('-' < '0', "The check below verifies both."); + size_t advance = start_[0] <= '0' ? 1 : 0; + return {start_ + advance, size_ - advance}; } - const char *end() const { return storage_ + sizeof(storage_); } - char *end() { return storage_ + sizeof(storage_); } + bool is_negative() const { return start_[0] == '-'; } - bool is_neg_; - int size_; - // Max size: 128 bit value as octal -> 43 digits - char storage_[128 / 3 + 1]; + private: + const char *start_; + size_t size_; + // Max size: 128 bit value as octal -> 43 digits, plus sign char + char storage_[128 / 3 + 1 + 1]; }; // Note: 'o' conversions do not have a base indicator, it's just that // the '#' flag is specified to modify the precision for 'o' conversions. -string_view BaseIndicator(const ConvertedIntInfo &info, - const ConversionSpec conv) { - bool alt = conv.flags().alt; - int radix = FormatConversionCharRadix(conv.conv()); - if (conv.conv() == ConversionChar::p) alt = true; // always show 0x for %p. +string_view BaseIndicator(const IntDigits &as_digits, + const FormatConversionSpecImpl conv) { + // always show 0x for %p. + bool alt = conv.has_alt_flag() || + conv.conversion_char() == FormatConversionCharInternal::p; + bool hex = (conv.conversion_char() == FormatConversionCharInternal::x || + conv.conversion_char() == FormatConversionCharInternal::X || + conv.conversion_char() == FormatConversionCharInternal::p); // From the POSIX description of '#' flag: // "For x or X conversion specifiers, a non-zero result shall have // 0x (or 0X) prefixed to it." - if (alt && radix == 16 && !info.digits().empty()) { - if (FormatConversionCharIsUpper(conv.conv())) return "0X"; - return "0x"; + if (alt && hex && !as_digits.without_neg_or_zero().empty()) { + return conv.conversion_char() == FormatConversionCharInternal::X ? "0X" + : "0x"; } return {}; } -string_view SignColumn(bool neg, const ConversionSpec conv) { - if (FormatConversionCharIsSigned(conv.conv())) { +string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) { + if (conv.conversion_char() == FormatConversionCharInternal::d || + conv.conversion_char() == FormatConversionCharInternal::i) { if (neg) return "-"; - if (conv.flags().show_pos) return "+"; - if (conv.flags().sign_col) return " "; + if (conv.has_show_pos_flag()) return "+"; + if (conv.has_sign_col_flag()) return " "; } return {}; } -bool ConvertCharImpl(unsigned char v, const ConversionSpec conv, +bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { size_t fill = 0; if (conv.width() >= 0) fill = conv.width(); ReducePadding(1, &fill); - if (!conv.flags().left) sink->Append(fill, ' '); + if (!conv.has_left_flag()) sink->Append(fill, ' '); sink->Append(1, v); - if (conv.flags().left) sink->Append(fill, ' '); + if (conv.has_left_flag()) sink->Append(fill, ' '); return true; } -bool ConvertIntImplInner(const ConvertedIntInfo &info, - const ConversionSpec conv, FormatSinkImpl *sink) { +bool ConvertIntImplInnerSlow(const IntDigits &as_digits, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { // Print as a sequence of Substrings: // [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces] size_t fill = 0; if (conv.width() >= 0) fill = conv.width(); - string_view formatted = info.digits(); + string_view formatted = as_digits.without_neg_or_zero(); ReducePadding(formatted, &fill); - string_view sign = SignColumn(info.is_neg(), conv); + string_view sign = SignColumn(as_digits.is_negative(), conv); ReducePadding(sign, &fill); - string_view base_indicator = BaseIndicator(info, conv); + string_view base_indicator = BaseIndicator(as_digits, conv); ReducePadding(base_indicator, &fill); int precision = conv.precision(); @@ -174,7 +241,8 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, if (!precision_specified) precision = 1; - if (conv.flags().alt && conv.conv() == ConversionChar::o) { + if (conv.has_alt_flag() && + conv.conversion_char() == FormatConversionCharInternal::o) { // From POSIX description of the '#' (alt) flag: // "For o conversion, it increases the precision (if necessary) to // force the first digit of the result to be zero." @@ -187,13 +255,13 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, size_t num_zeroes = Excess(formatted.size(), precision); ReducePadding(num_zeroes, &fill); - size_t num_left_spaces = !conv.flags().left ? fill : 0; - size_t num_right_spaces = conv.flags().left ? fill : 0; + size_t num_left_spaces = !conv.has_left_flag() ? fill : 0; + size_t num_right_spaces = conv.has_left_flag() ? fill : 0; // From POSIX description of the '0' (zero) flag: // "For d, i, o, u, x, and X conversion specifiers, if a precision // is specified, the '0' flag is ignored." - if (!precision_specified && conv.flags().zero) { + if (!precision_specified && conv.has_zero_flag()) { num_zeroes += num_left_spaces; num_left_spaces = 0; } @@ -208,71 +276,97 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, } template <typename T> -bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - ConvertedIntInfo info(v, conv.conv()); - if (conv.flags().basic && (conv.conv() != ConversionChar::p)) { - if (info.is_neg()) sink->Append(1, '-'); - if (info.digits().empty()) { - sink->Append(1, '0'); - } else { - sink->Append(info.digits()); - } - return true; +bool ConvertIntArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + using U = typename MakeUnsigned<T>::type; + IntDigits as_digits; + + // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes + // it to complain about a switch/case type mismatch, even though both are + // FormatConverionChar. Likely this is because at this point + // FormatConversionChar is declared, but not defined. + switch (static_cast<uint8_t>(conv.conversion_char())) { + case static_cast<uint8_t>(FormatConversionCharInternal::c): + return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); + + case static_cast<uint8_t>(FormatConversionCharInternal::o): + as_digits.PrintAsOct(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::x): + as_digits.PrintAsHexLower(static_cast<U>(v)); + break; + case static_cast<uint8_t>(FormatConversionCharInternal::X): + as_digits.PrintAsHexUpper(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::u): + as_digits.PrintAsDec(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::d): + case static_cast<uint8_t>(FormatConversionCharInternal::i): + as_digits.PrintAsDec(v); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::a): + case static_cast<uint8_t>(FormatConversionCharInternal::e): + case static_cast<uint8_t>(FormatConversionCharInternal::f): + case static_cast<uint8_t>(FormatConversionCharInternal::g): + case static_cast<uint8_t>(FormatConversionCharInternal::A): + case static_cast<uint8_t>(FormatConversionCharInternal::E): + case static_cast<uint8_t>(FormatConversionCharInternal::F): + case static_cast<uint8_t>(FormatConversionCharInternal::G): + return ConvertFloatImpl(static_cast<double>(v), conv, sink); + + default: + ABSL_INTERNAL_ASSUME(false); } - return ConvertIntImplInner(info, conv, sink); -} -template <typename T> -bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (FormatConversionCharIsFloat(conv.conv())) { - return FormatConvertImpl(static_cast<double>(v), conv, sink).value; - } - if (conv.conv() == ConversionChar::c) - return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); - if (!FormatConversionCharIsIntegral(conv.conv())) return false; - if (!FormatConversionCharIsSigned(conv.conv()) && IsSigned<T>::value) { - using U = typename MakeUnsigned<T>::type; - return FormatConvertImpl(static_cast<U>(v), conv, sink).value; + if (conv.is_basic()) { + sink->Append(as_digits.with_neg_and_zero()); + return true; } - return ConvertIntImplInner(v, conv, sink); + return ConvertIntImplInnerSlow(as_digits, conv, sink); } template <typename T> -bool ConvertFloatArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - return FormatConversionCharIsFloat(conv.conv()) && +bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return FormatConversionCharIsFloat(conv.conversion_char()) && ConvertFloatImpl(v, conv, sink); } -inline bool ConvertStringArg(string_view v, const ConversionSpec conv, +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { - if (conv.conv() != ConversionChar::s) return false; - if (conv.flags().basic) { + if (conv.is_basic()) { sink->Append(v); return true; } return sink->PutPaddedString(v, conv.width(), conv.precision(), - conv.flags().left); + conv.has_left_flag()); } } // namespace // ==================== Strings ==================== -ConvertResult<Conv::s> FormatConvertImpl(const std::string &v, - const ConversionSpec conv, - FormatSinkImpl *sink) { +StringConvertResult FormatConvertImpl(const std::string &v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { return {ConvertStringArg(v, conv, sink)}; } -ConvertResult<Conv::s> FormatConvertImpl(string_view v, - const ConversionSpec conv, - FormatSinkImpl *sink) { +StringConvertResult FormatConvertImpl(string_view v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { return {ConvertStringArg(v, conv, sink)}; } -ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, - const ConversionSpec conv, - FormatSinkImpl *sink) { - if (conv.conv() == ConversionChar::p) +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::p) return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; size_t len; if (v == nullptr) { @@ -287,93 +381,99 @@ ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, } // ==================== Raw pointers ==================== -ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, const ConversionSpec conv, - FormatSinkImpl *sink) { - if (conv.conv() != ConversionChar::p) return {false}; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { if (!v.value) { sink->Append("(nil)"); return {true}; } - return {ConvertIntImplInner(v.value, conv, sink)}; + IntDigits as_digits; + as_digits.PrintAsHexLower(v.value); + return {ConvertIntImplInnerSlow(as_digits, conv, sink)}; } // ==================== Floats ==================== -FloatingConvertResult FormatConvertImpl(float v, const ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(float v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } -FloatingConvertResult FormatConvertImpl(double v, const ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(double v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } FloatingConvertResult FormatConvertImpl(long double v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } // ==================== Chars ==================== -IntegralConvertResult FormatConvertImpl(char v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(char v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(signed char v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned char v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } // ==================== Ints ==================== IntegralConvertResult FormatConvertImpl(short v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } -IntegralConvertResult FormatConvertImpl(int v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } -IntegralConvertResult FormatConvertImpl(unsigned v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(long long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(absl::int128 v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(absl::uint128 v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/arg.h b/third_party/abseil-cpp/absl/strings/internal/str_format/arg.h index 7a93756305..3c91be701f 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/arg.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/arg.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ @@ -25,16 +39,37 @@ class Cord; class FormatCountCapture; class FormatSink; +template <absl::FormatConversionCharSet C> +struct FormatConvertResult; +class FormatConversionSpec; + namespace str_format_internal { template <typename T, typename = void> struct HasUserDefinedConvert : std::false_type {}; template <typename T> -struct HasUserDefinedConvert< - T, void_t<decltype(AbslFormatConvert( - std::declval<const T&>(), std::declval<ConversionSpec>(), - std::declval<FormatSink*>()))>> : std::true_type {}; +struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert( + std::declval<const T&>(), + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>()))>> + : std::true_type {}; + +void AbslFormatConvert(); // Stops the lexical name lookup +template <typename T> +auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) + -> decltype(AbslFormatConvert(v, + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>())) { + using FormatConversionSpecT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatConversionSpec>; + using FormatSinkT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; + auto fcs = conv.Wrap<FormatConversionSpecT>(); + auto fs = sink->Wrap<FormatSinkT>(); + return AbslFormatConvert(v, fcs, &fs); +} template <typename T> class StreamedWrapper; @@ -43,6 +78,13 @@ class StreamedWrapper; // then convert it, appending to `sink` and return `true`. // Otherwise fail and return `false`. +// AbslFormatConvert(v, conv, sink) is intended to be found by ADL on 'v' +// as an extension mechanism. These FormatConvertImpl functions are the default +// implementations. +// The ADL search is augmented via the 'Sink*' parameter, which also +// serves as a disambiguator to reject possible unintended 'AbslFormatConvert' +// functions in the namespaces associated with 'v'. + // Raw pointers. struct VoidPtr { VoidPtr() = default; @@ -52,27 +94,53 @@ struct VoidPtr { : value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {} uintptr_t value; }; -ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, ConversionSpec conv, - FormatSinkImpl* sink); + +template <FormatConversionCharSet C> +struct ArgConvertResult { + bool value; +}; + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(FormatConvertResult<C>) { + return C; +} + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) { + return C; +} + +using StringConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::s>; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Strings. -ConvertResult<Conv::s> FormatConvertImpl(const std::string& v, - ConversionSpec conv, - FormatSinkImpl* sink); -ConvertResult<Conv::s> FormatConvertImpl(string_view v, ConversionSpec conv, - FormatSinkImpl* sink); -ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char* v, - ConversionSpec conv, - FormatSinkImpl* sink); -template <class AbslCord, - typename std::enable_if< - std::is_same<AbslCord, absl::Cord>::value>::type* = nullptr> -ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, - ConversionSpec conv, - FormatSinkImpl* sink) { - if (conv.conv() != ConversionChar::s) return {false}; - - bool is_left = conv.flags().left; +StringConvertResult FormatConvertImpl(const std::string& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +StringConvertResult FormatConvertImpl(string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW) +inline StringConvertResult FormatConvertImpl(std::string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(absl::string_view(v.data(), v.size()), conv, sink); +} +#endif // ABSL_HAVE_STD_STRING_VIEW && !ABSL_USES_STD_STRING_VIEW + +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +template <class AbslCord, typename std::enable_if<std::is_same< + AbslCord, absl::Cord>::value>::type* = nullptr> +StringConvertResult FormatConvertImpl(const AbslCord& value, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + bool is_left = conv.has_left_flag(); size_t space_remaining = 0; int width = conv.width(); @@ -105,55 +173,63 @@ ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, return {true}; } -using IntegralConvertResult = - ConvertResult<Conv::c | Conv::numeric | Conv::star>; -using FloatingConvertResult = ConvertResult<Conv::floating>; +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; +using FloatingConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::kFloating>; // Floats. -FloatingConvertResult FormatConvertImpl(float v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -FloatingConvertResult FormatConvertImpl(double v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -FloatingConvertResult FormatConvertImpl(long double v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(long double v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Chars. -IntegralConvertResult FormatConvertImpl(char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(signed char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(signed char v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(unsigned char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned char v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Ints. IntegralConvertResult FormatConvertImpl(short v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(int v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(unsigned v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(long long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(int128 v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(uint128 v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(uint128 v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0> -IntegralConvertResult FormatConvertImpl(T v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink) { return FormatConvertImpl(static_cast<int>(v), conv, sink); } @@ -164,12 +240,12 @@ template <typename T> typename std::enable_if<std::is_enum<T>::value && !HasUserDefinedConvert<T>::value, IntegralConvertResult>::type -FormatConvertImpl(T v, ConversionSpec conv, FormatSinkImpl* sink); +FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); template <typename T> -ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<T>& v, - ConversionSpec conv, - FormatSinkImpl* out) { +StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* out) { std::ostringstream oss; oss << v.v_; if (!oss) return {false}; @@ -180,21 +256,24 @@ ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<T>& v, // until after FormatCountCapture is fully defined. struct FormatCountCaptureHelper { template <class T = int> - static ConvertResult<Conv::n> ConvertHelper(const FormatCountCapture& v, - ConversionSpec conv, - FormatSinkImpl* sink) { + static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; - if (conv.conv() != str_format_internal::ConversionChar::n) return {false}; + if (conv.conversion_char() != + str_format_internal::FormatConversionCharInternal::n) { + return {false}; + } *v2.p_ = static_cast<int>(sink->size()); return {true}; } }; template <class T = int> -ConvertResult<Conv::n> FormatConvertImpl(const FormatCountCapture& v, - ConversionSpec conv, - FormatSinkImpl* sink) { +ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { return FormatCountCaptureHelper::ConvertHelper(v, conv, sink); } @@ -203,13 +282,13 @@ ConvertResult<Conv::n> FormatConvertImpl(const FormatCountCapture& v, struct FormatArgImplFriend { template <typename Arg> static bool ToInt(Arg arg, int* out) { - // A value initialized ConversionSpec has a `none` conv, which tells the - // dispatcher to run the `int` conversion. + // A value initialized FormatConversionSpecImpl has a `none` conv, which + // tells the dispatcher to run the `int` conversion. return arg.dispatcher_(arg.data_, {}, out); } template <typename Arg> - static bool Convert(Arg arg, str_format_internal::ConversionSpec conv, + static bool Convert(Arg arg, FormatConversionSpecImpl conv, FormatSinkImpl* out) { return arg.dispatcher_(arg.data_, conv, out); } @@ -220,6 +299,15 @@ struct FormatArgImplFriend { } }; +template <typename Arg> +constexpr FormatConversionCharSet ArgumentToConv() { + return absl::str_format_internal::ExtractCharSet( + decltype(str_format_internal::FormatConvertImpl( + std::declval<const Arg&>(), + std::declval<const FormatConversionSpecImpl&>(), + std::declval<FormatSinkImpl*>())){}); +} + // A type-erased handle to a format argument. class FormatArgImpl { private: @@ -233,7 +321,7 @@ class FormatArgImpl { char buf[kInlinedSpace]; }; - using Dispatcher = bool (*)(Data, ConversionSpec, void* out); + using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out); template <typename T> struct store_by_value @@ -375,15 +463,20 @@ class FormatArgImpl { } template <typename T> - static bool Dispatch(Data arg, ConversionSpec spec, void* out) { + static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) { // A `none` conv indicates that we want the `int` conversion. - if (ABSL_PREDICT_FALSE(spec.conv() == ConversionChar::none)) { + if (ABSL_PREDICT_FALSE(spec.conversion_char() == + FormatConversionCharInternal::kNone)) { return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), std::is_enum<T>()); } - + if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(), + spec.conversion_char()))) { + return false; + } return str_format_internal::FormatConvertImpl( - Manager<T>::Value(arg), spec, static_cast<FormatSinkImpl*>(out)) + Manager<T>::Value(arg), spec, + static_cast<FormatSinkImpl*>(out)) .value; } @@ -391,8 +484,9 @@ class FormatArgImpl { Dispatcher dispatcher_; }; -#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ - E template bool FormatArgImpl::Dispatch<T>(Data, ConversionSpec, void*) +#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ + E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \ + void*) #define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \ diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/arg_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/arg_test.cc index 8d30d8b8ce..1261937c30 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/arg_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/arg_test.cc @@ -6,6 +6,12 @@ // // https://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/arg.h" #include <ostream> @@ -23,8 +29,17 @@ class FormatArgImplTest : public ::testing::Test { enum Color { kRed, kGreen, kBlue }; static const char *hi() { return "hi"; } + + struct X {}; + + X x_; }; +inline FormatConvertResult<FormatConversionCharSet{}> AbslFormatConvert( + const FormatArgImplTest::X &, const FormatConversionSpec &, FormatSink *) { + return {false}; +} + TEST_F(FormatArgImplTest, ToInt) { int out = 0; EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out)); @@ -59,6 +74,7 @@ TEST_F(FormatArgImplTest, ToInt) { FormatArgImpl(static_cast<int *>(nullptr)), &out)); EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out)); EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(x_), &out)); EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out)); EXPECT_EQ(2, out); } @@ -95,8 +111,9 @@ TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) { TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { std::string s; FormatSinkImpl sink(&s); - ConversionSpec conv; - FormatConversionSpecImplFriend::SetConversionChar(ConversionChar::s, &conv); + FormatConversionSpecImpl conv; + FormatConversionSpecImplFriend::SetConversionChar( + FormatConversionCharInternal::s, &conv); FormatConversionSpecImplFriend::SetFlags(Flags(), &conv); FormatConversionSpecImplFriend::SetWidth(-1, &conv); FormatConversionSpecImplFriend::SetPrecision(-1, &conv); diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/bind.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/bind.cc index 27522fdb4f..c988ba8fd2 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/bind.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/bind.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/bind.h" #include <cerrno> @@ -44,7 +58,7 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound, if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false; arg = &pack_[arg_position - 1]; // 1-based - if (!unbound->flags.basic) { + if (unbound->flags != Flags::kBasic) { int width = unbound->width.value(); bool force_left = false; if (unbound->width.is_from_arg()) { @@ -70,9 +84,8 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound, FormatConversionSpecImplFriend::SetPrecision(precision, bound); if (force_left) { - Flags flags = unbound->flags; - flags.left = true; - FormatConversionSpecImplFriend::SetFlags(flags, bound); + FormatConversionSpecImplFriend::SetFlags(unbound->flags | Flags::kLeft, + bound); } else { FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); } @@ -147,7 +160,7 @@ class SummarizingConverter { << FormatConversionSpecImplFriend::FlagsToString(bound); if (bound.width() >= 0) ss << bound.width(); if (bound.precision() >= 0) ss << "." << bound.precision(); - ss << bound.conv() << "}"; + ss << bound.conversion_char() << "}"; Append(ss.str()); return true; } @@ -221,7 +234,7 @@ int FprintF(std::FILE* output, const UntypedFormatSpecImpl format, errno = sink.error(); return -1; } - if (sink.count() > std::numeric_limits<int>::max()) { + if (sink.count() > static_cast<size_t>(std::numeric_limits<int>::max())) { errno = EFBIG; return -1; } diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/bind.h b/third_party/abseil-cpp/absl/strings/internal/str_format/bind.h index cf41b19748..b26cff6648 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/bind.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/bind.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ @@ -19,7 +33,7 @@ class UntypedFormatSpec; namespace str_format_internal { -class BoundConversion : public ConversionSpec { +class BoundConversion : public FormatConversionSpecImpl { public: const FormatArgImpl* arg() const { return arg_; } void set_arg(const FormatArgImpl* a) { arg_ = a; } @@ -60,7 +74,7 @@ class UntypedFormatSpecImpl { size_t size_; }; -template <typename T, typename...> +template <typename T, FormatConversionCharSet...> struct MakeDependent { using type = T; }; @@ -68,7 +82,7 @@ struct MakeDependent { // Implicitly convertible from `const char*`, `string_view`, and the // `ExtendedParsedFormat` type. This abstraction allows all format functions to // operate on any without providing too many overloads. -template <typename... Args> +template <FormatConversionCharSet... Args> class FormatSpecTemplate : public MakeDependent<UntypedFormatSpec, Args...>::type { using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; @@ -76,17 +90,17 @@ class FormatSpecTemplate public: #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER - // Honeypot overload for when the std::string is not constexpr. + // Honeypot overload for when the string is not constexpr. // We use the 'unavailable' attribute to give a better compiler error than // just 'method is deleted'. FormatSpecTemplate(...) // NOLINT - __attribute__((unavailable("Format std::string is not constexpr."))); + __attribute__((unavailable("Format string is not constexpr."))); // Honeypot overload for when the format is constexpr and invalid. // We use the 'unavailable' attribute to give a better compiler error than // just 'method is deleted'. // To avoid checking the format twice, we just check that the format is - // constexpr. If is it valid, then the overload below will kick in. + // constexpr. If it is valid, then the overload below will kick in. // We add the template here to make this overload have lower priority. template <typename = void> FormatSpecTemplate(const char* s) // NOLINT @@ -105,13 +119,11 @@ class FormatSpecTemplate // Good format overload. FormatSpecTemplate(const char* s) // NOLINT - __attribute__((enable_if(ValidFormatImpl<ArgumentToConv<Args>()...>(s), - "bad format trap"))) + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) : Base(s) {} FormatSpecTemplate(string_view s) // NOLINT - __attribute__((enable_if(ValidFormatImpl<ArgumentToConv<Args>()...>(s), - "bad format trap"))) + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) : Base(s) {} #else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER @@ -121,19 +133,15 @@ class FormatSpecTemplate #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER - template <Conv... C, typename = typename std::enable_if< - AllOf(sizeof...(C) == sizeof...(Args), - Contains(ArgumentToConv<Args>(), - C)...)>::type> + template < + FormatConversionCharSet... C, + typename = typename std::enable_if<sizeof...(C) == sizeof...(Args)>::type, + typename = typename std::enable_if<AllOf(Contains(Args, + C)...)>::type> FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT : Base(&pc) {} }; -template <typename... Args> -struct FormatSpecDeductionBarrier { - using type = FormatSpecTemplate<Args...>; -}; - class Streamable { public: Streamable(const UntypedFormatSpecImpl& format, @@ -196,9 +204,9 @@ class StreamedWrapper { private: template <typename S> - friend ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<S>& v, - ConversionSpec conv, - FormatSinkImpl* out); + friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl( + const StreamedWrapper<S>& v, FormatConversionSpecImpl conv, + FormatSinkImpl* out); const T& v_; }; diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/bind_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/bind_test.cc index 64790a85fd..1eef9c4326 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/bind_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/bind_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/bind.h" #include <string.h> diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/checker.h b/third_party/abseil-cpp/absl/strings/internal/str_format/checker.h index 8993a79b95..2a2601eccf 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/checker.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/checker.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ @@ -24,13 +38,6 @@ constexpr bool AllOf(bool b, T... t) { return b && AllOf(t...); } -template <typename Arg> -constexpr Conv ArgumentToConv() { - return decltype(str_format_internal::FormatConvertImpl( - std::declval<const Arg&>(), std::declval<const ConversionSpec&>(), - std::declval<FormatSinkImpl*>()))::kConv; -} - #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER constexpr bool ContainsChar(const char* chars, char c) { @@ -39,14 +46,14 @@ constexpr bool ContainsChar(const char* chars, char c) { // A constexpr compatible list of Convs. struct ConvList { - const Conv* array; + const FormatConversionCharSet* array; int count; // We do the bound check here to avoid having to do it on the callers. - // Returning an empty Conv has the same effect as short circuiting because it - // will never match any conversion. - constexpr Conv operator[](int i) const { - return i < count ? array[i] : Conv{}; + // Returning an empty FormatConversionCharSet has the same effect as + // short circuiting because it will never match any conversion. + constexpr FormatConversionCharSet operator[](int i) const { + return i < count ? array[i] : FormatConversionCharSet{}; } constexpr ConvList without_front() const { @@ -57,7 +64,7 @@ struct ConvList { template <size_t count> struct ConvListT { // Make sure the array has size > 0. - Conv list[count ? count : 1]; + FormatConversionCharSet list[count ? count : 1]; }; constexpr char GetChar(string_view str, size_t index) { @@ -310,7 +317,7 @@ class FormatParser { ConvList args_; }; -template <Conv... C> +template <FormatConversionCharSet... C> constexpr bool ValidFormatImpl(string_view format) { return FormatParser(format, {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/checker_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/checker_test.cc index ea2a7681a6..7c70f47d68 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/checker_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/checker_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include <string> #include "gmock/gmock.h" @@ -9,18 +23,22 @@ ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { -std::string ConvToString(Conv conv) { +std::string ConvToString(FormatConversionCharSet conv) { std::string out; -#define CONV_SET_CASE(c) \ - if (Contains(conv, Conv::c)) out += #c; +#define CONV_SET_CASE(c) \ + if (Contains(conv, FormatConversionCharSetInternal::c)) { \ + out += #c; \ + } ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) #undef CONV_SET_CASE - if (Contains(conv, Conv::star)) out += "*"; + if (Contains(conv, FormatConversionCharSetInternal::kStar)) { + out += "*"; + } return out; } TEST(StrFormatChecker, ArgumentToConv) { - Conv conv = ArgumentToConv<std::string>(); + FormatConversionCharSet conv = ArgumentToConv<std::string>(); EXPECT_EQ(ConvToString(conv), "s"); conv = ArgumentToConv<const char*>(); diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/convert_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/convert_test.cc index cbcd7caf46..91e0360901 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/convert_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/convert_test.cc @@ -1,20 +1,46 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include <errno.h> #include <stdarg.h> #include <stdio.h> + #include <cctype> #include <cmath> +#include <limits> #include <string> +#include <thread> // NOLINT #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/internal/raw_logging.h" #include "absl/strings/internal/str_format/bind.h" +#include "absl/strings/match.h" +#include "absl/types/optional.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { +struct NativePrintfTraits { + bool hex_float_has_glibc_rounding; + bool hex_float_prefers_denormal_repr; + bool hex_float_uses_minimal_precision_when_not_specified; + bool hex_float_optimizes_leading_digit_bit_count; +}; + template <typename T, size_t N> size_t ArraySize(T (&)[N]) { return N; @@ -57,7 +83,7 @@ std::string Esc(const T &v) { return oss.str(); } -void StrAppend(std::string *dst, const char *format, va_list ap) { +void StrAppendV(std::string *dst, const char *format, va_list ap) { // First try with a small fixed size buffer static const int kSpaceLength = 1024; char space[kSpaceLength]; @@ -98,15 +124,79 @@ void StrAppend(std::string *dst, const char *format, va_list ap) { delete[] buf; } +void StrAppend(std::string *out, const char *format, ...) { + va_list ap; + va_start(ap, format); + StrAppendV(out, format, ap); + va_end(ap); +} + std::string StrPrint(const char *format, ...) { va_list ap; va_start(ap, format); std::string result; - StrAppend(&result, format, ap); + StrAppendV(&result, format, ap); va_end(ap); return result; } +NativePrintfTraits VerifyNativeImplementationImpl() { + NativePrintfTraits result; + + // >>> hex_float_has_glibc_rounding. To have glibc's rounding behavior we need + // to meet three requirements: + // + // - The threshold for rounding up is 8 (for e.g. MSVC uses 9). + // - If the digits lower than than the 8 are non-zero then we round up. + // - If the digits lower than the 8 are all zero then we round toward even. + // + // The numbers below represent all the cases covering {below,at,above} the + // threshold (8) with both {zero,non-zero} lower bits and both {even,odd} + // preceding digits. + const double d0079 = 65657.0; // 0x1.0079p+16 + const double d0179 = 65913.0; // 0x1.0179p+16 + const double d0080 = 65664.0; // 0x1.0080p+16 + const double d0180 = 65920.0; // 0x1.0180p+16 + const double d0081 = 65665.0; // 0x1.0081p+16 + const double d0181 = 65921.0; // 0x1.0181p+16 + result.hex_float_has_glibc_rounding = + StartsWith(StrPrint("%.2a", d0079), "0x1.00") && + StartsWith(StrPrint("%.2a", d0179), "0x1.01") && + StartsWith(StrPrint("%.2a", d0080), "0x1.00") && + StartsWith(StrPrint("%.2a", d0180), "0x1.02") && + StartsWith(StrPrint("%.2a", d0081), "0x1.01") && + StartsWith(StrPrint("%.2a", d0181), "0x1.02"); + + // >>> hex_float_prefers_denormal_repr. Formatting `denormal` on glibc yields + // "0x0.0000000000001p-1022", whereas on std libs that don't use denormal + // representation it would either be 0x1p-1074 or 0x1.0000000000000-1074. + const double denormal = std::numeric_limits<double>::denorm_min(); + result.hex_float_prefers_denormal_repr = + StartsWith(StrPrint("%a", denormal), "0x0.0000000000001"); + + // >>> hex_float_uses_minimal_precision_when_not_specified. Some (non-glibc) + // libs will format the following as "0x1.0079000000000p+16". + result.hex_float_uses_minimal_precision_when_not_specified = + (StrPrint("%a", d0079) == "0x1.0079p+16"); + + // >>> hex_float_optimizes_leading_digit_bit_count. The number 1.5, when + // formatted by glibc should yield "0x1.8p+0" for `double` and "0xcp-3" for + // `long double`, i.e., number of bits in the leading digit is adapted to the + // number of bits in the mantissa. + const double d_15 = 1.5; + const long double ld_15 = 1.5; + result.hex_float_optimizes_leading_digit_bit_count = + StartsWith(StrPrint("%a", d_15), "0x1.8") && + StartsWith(StrPrint("%La", ld_15), "0xc"); + + return result; +} + +const NativePrintfTraits &VerifyNativeImplementation() { + static NativePrintfTraits native_traits = VerifyNativeImplementationImpl(); + return native_traits; +} + class FormatConvertTest : public ::testing::Test { }; template <typename T> @@ -139,6 +229,9 @@ TEST_F(FormatConvertTest, BasicString) { TestStringConvert(static_cast<const char*>("hello")); TestStringConvert(std::string("hello")); TestStringConvert(string_view("hello")); +#if defined(ABSL_HAVE_STD_STRING_VIEW) + TestStringConvert(std::string_view("hello")); +#endif // ABSL_HAVE_STD_STRING_VIEW } TEST_F(FormatConvertTest, NullString) { @@ -463,17 +556,130 @@ TEST_F(FormatConvertTest, Uint128) { } } -TEST_F(FormatConvertTest, Float) { -#ifdef _MSC_VER - // MSVC has a different rounding policy than us so we can't test our - // implementation against the native one there. - return; -#endif // _MSC_VER +template <typename Floating> +void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats, + const std::set<Floating> &skip_verify) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + // Reserve the space to ensure we don't allocate memory in the output itself. + std::string str_format_result; + str_format_result.reserve(1 << 20); + std::string string_printf_result; + string_printf_result.reserve(1 << 20); const char *const kFormats[] = { - "%", "%.3", "%8.5", "%9", "%.60", "%.30", "%03", "%+", - "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + "%", "%.3", "%8.5", "%500", "%.5000", "%.60", "%.30", "%03", + "%+", "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' && + f != 'a' && f != 'A') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + if ((f == 'a' || f == 'A') && + !native_traits.hex_float_has_glibc_rounding) { + continue; + } + for (Floating d : floats) { + if (!native_traits.hex_float_prefers_denormal_repr && + (f == 'a' || f == 'A') && std::fpclassify(d) == FP_SUBNORMAL) { + continue; + } + int i = -10; + FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; + UntypedFormatSpecImpl format(fmt_str); + + string_printf_result.clear(); + StrAppend(&string_printf_result, fmt_str.c_str(), d, i); + str_format_result.clear(); + + { + AppendPack(&str_format_result, format, absl::MakeSpan(args)); + } + +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + continue; +#elif defined(__APPLE__) + // Apple formats NaN differently (+nan) vs. (nan) + if (std::isnan(d)) continue; +#endif + if (string_printf_result != str_format_result && + skip_verify.find(d) == skip_verify.end()) { + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test + // to time out. + ASSERT_EQ(string_printf_result, str_format_result) + << fmt_str << " " << StrPrint("%.18g", d) << " " + << StrPrint("%a", d) << " " << StrPrint("%.50f", d); + } + } + } + } +} + +TEST_F(FormatConvertTest, Float) { + std::vector<float> floats = {0.0f, + -0.0f, + .9999999f, + 9999999.f, + std::numeric_limits<float>::max(), + -std::numeric_limits<float>::max(), + std::numeric_limits<float>::min(), + -std::numeric_limits<float>::min(), + std::numeric_limits<float>::lowest(), + -std::numeric_limits<float>::lowest(), + std::numeric_limits<float>::epsilon(), + std::numeric_limits<float>::epsilon() + 1.0f, + std::numeric_limits<float>::infinity(), + -std::numeric_limits<float>::infinity(), + std::nanf("")}; + + // Some regression tests. + floats.push_back(0.999999989f); + + if (std::numeric_limits<float>::has_denorm != std::denorm_absent) { + floats.push_back(std::numeric_limits<float>::denorm_min()); + floats.push_back(-std::numeric_limits<float>::denorm_min()); + } + + for (float base : + {1.f, 12.f, 123.f, 1234.f, 12345.f, 123456.f, 1234567.f, 12345678.f, + 123456789.f, 1234567890.f, 12345678901.f, 12345678.f, 12345678.f}) { + for (int exp = -123; exp <= 123; ++exp) { + for (int sign : {1, -1}) { + floats.push_back(sign * std::ldexp(base, exp)); + } + } + } + + for (int exp = -300; exp <= 300; ++exp) { + const float all_ones_mantissa = 0xffffff; + floats.push_back(std::ldexp(all_ones_mantissa, exp)); + } + + // Remove duplicates to speed up the logic below. + std::sort(floats.begin(), floats.end()); + floats.erase(std::unique(floats.begin(), floats.end()), floats.end()); + + TestWithMultipleFormatsHelper(floats, {}); +} + +TEST_F(FormatConvertTest, Double) { + // For values that we know won't match the standard library implementation we + // skip verification, but still run the algorithm to catch asserts/sanitizer + // bugs. + std::set<double> skip_verify; std::vector<double> doubles = {0.0, -0.0, .99999999999999, @@ -487,12 +693,8 @@ TEST_F(FormatConvertTest, Float) { std::numeric_limits<double>::epsilon(), std::numeric_limits<double>::epsilon() + 1, std::numeric_limits<double>::infinity(), - -std::numeric_limits<double>::infinity()}; - -#ifndef __APPLE__ - // Apple formats NaN differently (+nan) vs. (nan) - doubles.push_back(std::nan("")); -#endif + -std::numeric_limits<double>::infinity(), + std::nan("")}; // Some regression tests. doubles.push_back(0.99999999999999989); @@ -512,43 +714,366 @@ TEST_F(FormatConvertTest, Float) { } } - for (const char *fmt : kFormats) { - for (char f : {'f', 'F', // - 'g', 'G', // - 'a', 'A', // - 'e', 'E'}) { - std::string fmt_str = std::string(fmt) + f; - for (double d : doubles) { - int i = -10; - FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; - UntypedFormatSpecImpl format(fmt_str); - // We use ASSERT_EQ here because failures are usually correlated and a - // bug would print way too many failed expectations causing the test to - // time out. - ASSERT_EQ(StrPrint(fmt_str.c_str(), d, i), - FormatPack(format, absl::MakeSpan(args))) - << fmt_str << " " << StrPrint("%.18g", d) << " " - << StrPrint("%.999f", d); - } + // Workaround libc bug. + // https://sourceware.org/bugzilla/show_bug.cgi?id=22142 + const bool gcc_bug_22142 = + StrPrint("%f", std::numeric_limits<double>::max()) != + "1797693134862315708145274237317043567980705675258449965989174768031" + "5726078002853876058955863276687817154045895351438246423432132688946" + "4182768467546703537516986049910576551282076245490090389328944075868" + "5084551339423045832369032229481658085593321233482747978262041447231" + "68738177180919299881250404026184124858368.000000"; + + for (int exp = -300; exp <= 300; ++exp) { + const double all_ones_mantissa = 0x1fffffffffffff; + doubles.push_back(std::ldexp(all_ones_mantissa, exp)); + if (gcc_bug_22142) { + skip_verify.insert(doubles.back()); + } + } + + if (gcc_bug_22142) { + using L = std::numeric_limits<double>; + skip_verify.insert(L::max()); + skip_verify.insert(L::min()); // NOLINT + skip_verify.insert(L::denorm_min()); + skip_verify.insert(-L::max()); + skip_verify.insert(-L::min()); // NOLINT + skip_verify.insert(-L::denorm_min()); + } + + // Remove duplicates to speed up the logic below. + std::sort(doubles.begin(), doubles.end()); + doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end()); + + TestWithMultipleFormatsHelper(doubles, skip_verify); +} + +TEST_F(FormatConvertTest, DoubleRound) { + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +#if !defined(_MSC_VER) + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + EXPECT_EQ(StrPrint(fmt, d), s); +#endif // _MSC_VER + + return s; + }; + // All of these values have to be exactly represented. + // Otherwise we might not be testing what we think we are testing. + + // These values can fit in a 64bit "fast" representation. + const double exact_value = 0.00000000000005684341886080801486968994140625; + assert(exact_value == std::pow(2, -44)); + // Round up at a 5xx. + EXPECT_EQ(format("%.13f", exact_value), "0.0000000000001"); + // Round up at a >5 + EXPECT_EQ(format("%.14f", exact_value), "0.00000000000006"); + // Round down at a <5 + EXPECT_EQ(format("%.16f", exact_value), "0.0000000000000568"); + // Nine handling + EXPECT_EQ(format("%.35f", exact_value), + "0.00000000000005684341886080801486969"); + EXPECT_EQ(format("%.36f", exact_value), + "0.000000000000056843418860808014869690"); + // Round down the last nine. + EXPECT_EQ(format("%.37f", exact_value), + "0.0000000000000568434188608080148696899"); + EXPECT_EQ(format("%.10f", 0.000003814697265625), "0.0000038147"); + // Round up the last nine + EXPECT_EQ(format("%.11f", 0.000003814697265625), "0.00000381470"); + EXPECT_EQ(format("%.12f", 0.000003814697265625), "0.000003814697"); + + // Round to even (down) + EXPECT_EQ(format("%.43f", exact_value), + "0.0000000000000568434188608080148696899414062"); + // Exact + EXPECT_EQ(format("%.44f", exact_value), + "0.00000000000005684341886080801486968994140625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.43f", exact_value + std::pow(2, -43)), + "0.0000000000001705302565824240446090698242188"); + // Exact, just to check. + EXPECT_EQ(format("%.44f", exact_value + std::pow(2, -43)), + "0.00000000000017053025658242404460906982421875"); + + // This value has to be small enough that it won't fit in the uint128 + // representation for printing. + const double small_exact_value = + 0.000000000000000000000000000000000000752316384526264005099991383822237233803945956334136013765601092018187046051025390625; // NOLINT + assert(small_exact_value == std::pow(2, -120)); + // Round up at a 5xx. + EXPECT_EQ(format("%.37f", small_exact_value), + "0.0000000000000000000000000000000000008"); + // Round down at a <5 + EXPECT_EQ(format("%.38f", small_exact_value), + "0.00000000000000000000000000000000000075"); + // Round up at a >5 + EXPECT_EQ(format("%.41f", small_exact_value), + "0.00000000000000000000000000000000000075232"); + // Nine handling + EXPECT_EQ(format("%.55f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051"); + EXPECT_EQ(format("%.56f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400510"); + EXPECT_EQ(format("%.57f", small_exact_value), + "0.000000000000000000000000000000000000752316384526264005100"); + EXPECT_EQ(format("%.58f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051000"); + // Round down the last nine + EXPECT_EQ(format("%.59f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400509999"); + // Round up the last nine + EXPECT_EQ(format("%.79f", small_exact_value), + "0.000000000000000000000000000000000000" + "7523163845262640050999913838222372338039460"); + + // Round to even (down) + EXPECT_EQ(format("%.119f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "394595633413601376560109201818704605102539062"); + // Exact + EXPECT_EQ(format("%.120f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "3945956334136013765601092018187046051025390625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.119f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "183786900240804129680327605456113815307617188"); + // Exact, just to check. + EXPECT_EQ(format("%.120f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "1837869002408041296803276054561138153076171875"); +} + +TEST_F(FormatConvertTest, DoubleRoundA) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); + if (native_traits.hex_float_has_glibc_rounding) { + EXPECT_EQ(StrPrint(fmt, d), s); + } + return s; + }; + + // 0x1.00018000p+100 + const double on_boundary_odd = 1267679614447900152596896153600.0; + EXPECT_EQ(format("%.0a", on_boundary_odd), "0x1p+100"); + EXPECT_EQ(format("%.1a", on_boundary_odd), "0x1.0p+100"); + EXPECT_EQ(format("%.2a", on_boundary_odd), "0x1.00p+100"); + EXPECT_EQ(format("%.3a", on_boundary_odd), "0x1.000p+100"); + EXPECT_EQ(format("%.4a", on_boundary_odd), "0x1.0002p+100"); // round + EXPECT_EQ(format("%.5a", on_boundary_odd), "0x1.00018p+100"); + EXPECT_EQ(format("%.6a", on_boundary_odd), "0x1.000180p+100"); + + // 0x1.00028000p-2 + const double on_boundary_even = 0.250009536743164062500; + EXPECT_EQ(format("%.0a", on_boundary_even), "0x1p-2"); + EXPECT_EQ(format("%.1a", on_boundary_even), "0x1.0p-2"); + EXPECT_EQ(format("%.2a", on_boundary_even), "0x1.00p-2"); + EXPECT_EQ(format("%.3a", on_boundary_even), "0x1.000p-2"); + EXPECT_EQ(format("%.4a", on_boundary_even), "0x1.0002p-2"); // no round + EXPECT_EQ(format("%.5a", on_boundary_even), "0x1.00028p-2"); + EXPECT_EQ(format("%.6a", on_boundary_even), "0x1.000280p-2"); + + // 0x1.00018001p+1 + const double slightly_over = 2.00004577683284878730773925781250; + EXPECT_EQ(format("%.0a", slightly_over), "0x1p+1"); + EXPECT_EQ(format("%.1a", slightly_over), "0x1.0p+1"); + EXPECT_EQ(format("%.2a", slightly_over), "0x1.00p+1"); + EXPECT_EQ(format("%.3a", slightly_over), "0x1.000p+1"); + EXPECT_EQ(format("%.4a", slightly_over), "0x1.0002p+1"); + EXPECT_EQ(format("%.5a", slightly_over), "0x1.00018p+1"); + EXPECT_EQ(format("%.6a", slightly_over), "0x1.000180p+1"); + + // 0x1.00017fffp+0 + const double slightly_under = 1.000022887950763106346130371093750; + EXPECT_EQ(format("%.0a", slightly_under), "0x1p+0"); + EXPECT_EQ(format("%.1a", slightly_under), "0x1.0p+0"); + EXPECT_EQ(format("%.2a", slightly_under), "0x1.00p+0"); + EXPECT_EQ(format("%.3a", slightly_under), "0x1.000p+0"); + EXPECT_EQ(format("%.4a", slightly_under), "0x1.0001p+0"); + EXPECT_EQ(format("%.5a", slightly_under), "0x1.00018p+0"); + EXPECT_EQ(format("%.6a", slightly_under), "0x1.000180p+0"); + EXPECT_EQ(format("%.7a", slightly_under), "0x1.0001800p+0"); + + // 0x1.1b3829ac28058p+3 + const double hex_value = 8.85060580848964661981881363317370414733886718750; + EXPECT_EQ(format("%.0a", hex_value), "0x1p+3"); + EXPECT_EQ(format("%.1a", hex_value), "0x1.2p+3"); + EXPECT_EQ(format("%.2a", hex_value), "0x1.1bp+3"); + EXPECT_EQ(format("%.3a", hex_value), "0x1.1b4p+3"); + EXPECT_EQ(format("%.4a", hex_value), "0x1.1b38p+3"); + EXPECT_EQ(format("%.5a", hex_value), "0x1.1b383p+3"); + EXPECT_EQ(format("%.6a", hex_value), "0x1.1b382ap+3"); + EXPECT_EQ(format("%.7a", hex_value), "0x1.1b3829bp+3"); + EXPECT_EQ(format("%.8a", hex_value), "0x1.1b3829acp+3"); + EXPECT_EQ(format("%.9a", hex_value), "0x1.1b3829ac3p+3"); + EXPECT_EQ(format("%.10a", hex_value), "0x1.1b3829ac28p+3"); + EXPECT_EQ(format("%.11a", hex_value), "0x1.1b3829ac280p+3"); + EXPECT_EQ(format("%.12a", hex_value), "0x1.1b3829ac2806p+3"); + EXPECT_EQ(format("%.13a", hex_value), "0x1.1b3829ac28058p+3"); + EXPECT_EQ(format("%.14a", hex_value), "0x1.1b3829ac280580p+3"); + EXPECT_EQ(format("%.15a", hex_value), "0x1.1b3829ac2805800p+3"); + EXPECT_EQ(format("%.16a", hex_value), "0x1.1b3829ac28058000p+3"); + EXPECT_EQ(format("%.17a", hex_value), "0x1.1b3829ac280580000p+3"); + EXPECT_EQ(format("%.18a", hex_value), "0x1.1b3829ac2805800000p+3"); + EXPECT_EQ(format("%.19a", hex_value), "0x1.1b3829ac28058000000p+3"); + EXPECT_EQ(format("%.20a", hex_value), "0x1.1b3829ac280580000000p+3"); + EXPECT_EQ(format("%.21a", hex_value), "0x1.1b3829ac2805800000000p+3"); + + // 0x1.0818283848586p+3 + const double hex_value2 = 8.2529488658208371987257123691961169242858886718750; + EXPECT_EQ(format("%.0a", hex_value2), "0x1p+3"); + EXPECT_EQ(format("%.1a", hex_value2), "0x1.1p+3"); + EXPECT_EQ(format("%.2a", hex_value2), "0x1.08p+3"); + EXPECT_EQ(format("%.3a", hex_value2), "0x1.082p+3"); + EXPECT_EQ(format("%.4a", hex_value2), "0x1.0818p+3"); + EXPECT_EQ(format("%.5a", hex_value2), "0x1.08183p+3"); + EXPECT_EQ(format("%.6a", hex_value2), "0x1.081828p+3"); + EXPECT_EQ(format("%.7a", hex_value2), "0x1.0818284p+3"); + EXPECT_EQ(format("%.8a", hex_value2), "0x1.08182838p+3"); + EXPECT_EQ(format("%.9a", hex_value2), "0x1.081828385p+3"); + EXPECT_EQ(format("%.10a", hex_value2), "0x1.0818283848p+3"); + EXPECT_EQ(format("%.11a", hex_value2), "0x1.08182838486p+3"); + EXPECT_EQ(format("%.12a", hex_value2), "0x1.081828384858p+3"); + EXPECT_EQ(format("%.13a", hex_value2), "0x1.0818283848586p+3"); + EXPECT_EQ(format("%.14a", hex_value2), "0x1.08182838485860p+3"); + EXPECT_EQ(format("%.15a", hex_value2), "0x1.081828384858600p+3"); + EXPECT_EQ(format("%.16a", hex_value2), "0x1.0818283848586000p+3"); + EXPECT_EQ(format("%.17a", hex_value2), "0x1.08182838485860000p+3"); + EXPECT_EQ(format("%.18a", hex_value2), "0x1.081828384858600000p+3"); + EXPECT_EQ(format("%.19a", hex_value2), "0x1.0818283848586000000p+3"); + EXPECT_EQ(format("%.20a", hex_value2), "0x1.08182838485860000000p+3"); + EXPECT_EQ(format("%.21a", hex_value2), "0x1.081828384858600000000p+3"); +} + +TEST_F(FormatConvertTest, LongDoubleRoundA) { + if (std::numeric_limits<long double>::digits % 4 != 0) { + // This test doesn't really make sense to run on platforms where a long + // double has a different mantissa size (mod 4) than Prod, since then the + // leading digit will be formatted differently. + return; + } + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + std::string s; + const auto format = [&](const char *fmt, long double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); + if (native_traits.hex_float_has_glibc_rounding && + native_traits.hex_float_optimizes_leading_digit_bit_count) { + EXPECT_EQ(StrPrint(fmt, d), s); + } + return s; + }; + + // 0x8.8p+4 + const long double on_boundary_even = 136.0; + EXPECT_EQ(format("%.0La", on_boundary_even), "0x8p+4"); + EXPECT_EQ(format("%.1La", on_boundary_even), "0x8.8p+4"); + EXPECT_EQ(format("%.2La", on_boundary_even), "0x8.80p+4"); + EXPECT_EQ(format("%.3La", on_boundary_even), "0x8.800p+4"); + EXPECT_EQ(format("%.4La", on_boundary_even), "0x8.8000p+4"); + EXPECT_EQ(format("%.5La", on_boundary_even), "0x8.80000p+4"); + EXPECT_EQ(format("%.6La", on_boundary_even), "0x8.800000p+4"); + + // 0x9.8p+4 + const long double on_boundary_odd = 152.0; + EXPECT_EQ(format("%.0La", on_boundary_odd), "0xap+4"); + EXPECT_EQ(format("%.1La", on_boundary_odd), "0x9.8p+4"); + EXPECT_EQ(format("%.2La", on_boundary_odd), "0x9.80p+4"); + EXPECT_EQ(format("%.3La", on_boundary_odd), "0x9.800p+4"); + EXPECT_EQ(format("%.4La", on_boundary_odd), "0x9.8000p+4"); + EXPECT_EQ(format("%.5La", on_boundary_odd), "0x9.80000p+4"); + EXPECT_EQ(format("%.6La", on_boundary_odd), "0x9.800000p+4"); + + // 0x8.80001p+24 + const long double slightly_over = 142606352.0; + EXPECT_EQ(format("%.0La", slightly_over), "0x9p+24"); + EXPECT_EQ(format("%.1La", slightly_over), "0x8.8p+24"); + EXPECT_EQ(format("%.2La", slightly_over), "0x8.80p+24"); + EXPECT_EQ(format("%.3La", slightly_over), "0x8.800p+24"); + EXPECT_EQ(format("%.4La", slightly_over), "0x8.8000p+24"); + EXPECT_EQ(format("%.5La", slightly_over), "0x8.80001p+24"); + EXPECT_EQ(format("%.6La", slightly_over), "0x8.800010p+24"); + + // 0x8.7ffffp+24 + const long double slightly_under = 142606320.0; + EXPECT_EQ(format("%.0La", slightly_under), "0x8p+24"); + EXPECT_EQ(format("%.1La", slightly_under), "0x8.8p+24"); + EXPECT_EQ(format("%.2La", slightly_under), "0x8.80p+24"); + EXPECT_EQ(format("%.3La", slightly_under), "0x8.800p+24"); + EXPECT_EQ(format("%.4La", slightly_under), "0x8.8000p+24"); + EXPECT_EQ(format("%.5La", slightly_under), "0x8.7ffffp+24"); + EXPECT_EQ(format("%.6La", slightly_under), "0x8.7ffff0p+24"); + EXPECT_EQ(format("%.7La", slightly_under), "0x8.7ffff00p+24"); + + // 0xc.0828384858688000p+128 + const long double eights = 4094231060438608800781871108094404067328.0; + EXPECT_EQ(format("%.0La", eights), "0xcp+128"); + EXPECT_EQ(format("%.1La", eights), "0xc.1p+128"); + EXPECT_EQ(format("%.2La", eights), "0xc.08p+128"); + EXPECT_EQ(format("%.3La", eights), "0xc.083p+128"); + EXPECT_EQ(format("%.4La", eights), "0xc.0828p+128"); + EXPECT_EQ(format("%.5La", eights), "0xc.08284p+128"); + EXPECT_EQ(format("%.6La", eights), "0xc.082838p+128"); + EXPECT_EQ(format("%.7La", eights), "0xc.0828385p+128"); + EXPECT_EQ(format("%.8La", eights), "0xc.08283848p+128"); + EXPECT_EQ(format("%.9La", eights), "0xc.082838486p+128"); + EXPECT_EQ(format("%.10La", eights), "0xc.0828384858p+128"); + EXPECT_EQ(format("%.11La", eights), "0xc.08283848587p+128"); + EXPECT_EQ(format("%.12La", eights), "0xc.082838485868p+128"); + EXPECT_EQ(format("%.13La", eights), "0xc.0828384858688p+128"); + EXPECT_EQ(format("%.14La", eights), "0xc.08283848586880p+128"); + EXPECT_EQ(format("%.15La", eights), "0xc.082838485868800p+128"); + EXPECT_EQ(format("%.16La", eights), "0xc.0828384858688000p+128"); +} + +// We don't actually store the results. This is just to exercise the rest of the +// machinery. +struct NullSink { + friend void AbslFormatFlush(NullSink *sink, string_view str) {} +}; + +template <typename... T> +bool FormatWithNullSink(absl::string_view fmt, const T &... a) { + NullSink sink; + FormatArgImpl args[] = {FormatArgImpl(a)...}; + return FormatUntyped(&sink, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +} + +TEST_F(FormatConvertTest, ExtremeWidthPrecision) { + for (const char *fmt : {"f"}) { + for (double d : {1e-100, 1.0, 1e100}) { + constexpr int max = std::numeric_limits<int>::max(); + EXPECT_TRUE(FormatWithNullSink(std::string("%.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%1.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*.*") + fmt, max, max, d)); } } } TEST_F(FormatConvertTest, LongDouble) { - const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", "%.5000", "%.60", "%+", "% ", "%-10"}; - // This value is not representable in double, but it is in long double that - // uses the extended format. - // This is to verify that we are not truncating the value mistakenly through a - // double. - long double very_precise = 10000000000000000.25L; - std::vector<long double> doubles = { 0.0, -0.0, - very_precise, - 1 / very_precise, std::numeric_limits<long double>::max(), -std::numeric_limits<long double>::max(), std::numeric_limits<long double>::min(), @@ -556,28 +1081,73 @@ TEST_F(FormatConvertTest, LongDouble) { std::numeric_limits<long double>::infinity(), -std::numeric_limits<long double>::infinity()}; + for (long double base : {1.L, 12.L, 123.L, 1234.L, 12345.L, 123456.L, + 1234567.L, 12345678.L, 123456789.L, 1234567890.L, + 12345678901.L, 123456789012.L, 1234567890123.L, + // This value is not representable in double, but it + // is in long double that uses the extended format. + // This is to verify that we are not truncating the + // value mistakenly through a double. + 10000000000000000.25L}) { + for (int exp : {-1000, -500, 0, 500, 1000}) { + for (int sign : {1, -1}) { + doubles.push_back(sign * std::ldexp(base, exp)); + doubles.push_back(sign / std::ldexp(base, exp)); + } + } + } + + // Regression tests + // + // Using a string literal because not all platforms support hex literals or it + // might be out of range. + doubles.push_back(std::strtold("-0xf.ffffffb5feafffbp-16324L", nullptr)); + for (const char *fmt : kFormats) { for (char f : {'f', 'F', // 'g', 'G', // 'a', 'A', // 'e', 'E'}) { std::string fmt_str = std::string(fmt) + 'L' + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' && + f != 'a' && f != 'A') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + if (f == 'a' || f == 'A') { + if (!native_traits.hex_float_has_glibc_rounding || + !native_traits.hex_float_optimizes_leading_digit_bit_count) { + continue; + } + } + for (auto d : doubles) { FormatArgImpl arg(d); UntypedFormatSpecImpl format(fmt_str); + std::string result = FormatPack(format, {&arg, 1}); + +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + continue; +#endif // _MSC_VER + // We use ASSERT_EQ here because failures are usually correlated and a // bug would print way too many failed expectations causing the test to // time out. - ASSERT_EQ(StrPrint(fmt_str.c_str(), d), - FormatPack(format, {&arg, 1})) + ASSERT_EQ(StrPrint(fmt_str.c_str(), d), result) << fmt_str << " " << StrPrint("%.18Lg", d) << " " - << StrPrint("%.999Lf", d); + << StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d); } } } } -TEST_F(FormatConvertTest, IntAsFloat) { +TEST_F(FormatConvertTest, IntAsDouble) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); const int kMin = std::numeric_limits<int>::min(); const int kMax = std::numeric_limits<int>::max(); const int ia[] = { @@ -593,14 +1163,17 @@ TEST_F(FormatConvertTest, IntAsFloat) { const char *fmt; }; const double dx = static_cast<double>(fx); - const Expectation kExpect[] = { - { __LINE__, StrPrint("%f", dx), "%f" }, - { __LINE__, StrPrint("%12f", dx), "%12f" }, - { __LINE__, StrPrint("%.12f", dx), "%.12f" }, - { __LINE__, StrPrint("%12a", dx), "%12a" }, - { __LINE__, StrPrint("%.12a", dx), "%.12a" }, + std::vector<Expectation> expect = { + {__LINE__, StrPrint("%f", dx), "%f"}, + {__LINE__, StrPrint("%12f", dx), "%12f"}, + {__LINE__, StrPrint("%.12f", dx), "%.12f"}, + {__LINE__, StrPrint("%.12a", dx), "%.12a"}, }; - for (const Expectation &e : kExpect) { + if (native_traits.hex_float_uses_minimal_precision_when_not_specified) { + Expectation ex = {__LINE__, StrPrint("%12a", dx), "%12a"}; + expect.push_back(ex); + } + for (const Expectation &e : expect) { SCOPED_TRACE(e.line); SCOPED_TRACE(e.fmt); UntypedFormatSpecImpl format(e.fmt); @@ -645,6 +1218,25 @@ TEST_F(FormatConvertTest, ExpectedFailures) { EXPECT_TRUE(FormatFails("%*d", "")); } +// Sanity check to make sure that we are testing what we think we're testing on +// e.g. the x86_64+glibc platform. +TEST_F(FormatConvertTest, GlibcHasCorrectTraits) { +#if !defined(__GLIBC__) || !defined(__x86_64__) + return; +#endif + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + // If one of the following tests break then it is either because the above PP + // macro guards failed to exclude a new platform (likely) or because something + // has changed in the implemention of glibc sprintf float formatting behavior. + // If the latter, then the code that computes these flags needs to be + // revisited and/or possibly the StrFormat implementation. + EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding); + EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr); + EXPECT_TRUE( + native_traits.hex_float_uses_minimal_precision_when_not_specified); + EXPECT_TRUE(native_traits.hex_float_optimizes_leading_digit_bit_count); +} + } // namespace } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/extension.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/extension.cc index 2e5bc2ce0b..484f6ebfc1 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/extension.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/extension.cc @@ -23,26 +23,50 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -std::string Flags::ToString() const { +std::string FlagsToString(Flags v) { std::string s; - s.append(left ? "-" : ""); - s.append(show_pos ? "+" : ""); - s.append(sign_col ? " " : ""); - s.append(alt ? "#" : ""); - s.append(zero ? "0" : ""); + s.append(FlagsContains(v, Flags::kLeft) ? "-" : ""); + s.append(FlagsContains(v, Flags::kShowPos) ? "+" : ""); + s.append(FlagsContains(v, Flags::kSignCol) ? " " : ""); + s.append(FlagsContains(v, Flags::kAlt) ? "#" : ""); + s.append(FlagsContains(v, Flags::kZero) ? "0" : ""); return s; } -bool FormatSinkImpl::PutPaddedString(string_view v, int w, int p, bool l) { +#define ABSL_INTERNAL_X_VAL(id) \ + constexpr absl::FormatConversionChar FormatConversionCharInternal::id; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr absl::FormatConversionChar FormatConversionCharInternal::kNone; + +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + constexpr FormatConversionCharSet FormatConversionCharSetInternal::c; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kStar; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kIntegral; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kFloating; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kNumeric; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kPointer; + +bool FormatSinkImpl::PutPaddedString(string_view value, int width, + int precision, bool left) { size_t space_remaining = 0; - if (w >= 0) space_remaining = w; - size_t n = v.size(); - if (p >= 0) n = std::min(n, static_cast<size_t>(p)); - string_view shown(v.data(), n); + if (width >= 0) space_remaining = width; + size_t n = value.size(); + if (precision >= 0) n = std::min(n, static_cast<size_t>(precision)); + string_view shown(value.data(), n); space_remaining = Excess(shown.size(), space_remaining); - if (!l) Append(space_remaining, ' '); + if (!left) Append(space_remaining, ' '); Append(shown); - if (l) Append(space_remaining, ' '); + if (left) Append(space_remaining, ' '); return true; } diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/extension.h b/third_party/abseil-cpp/absl/strings/internal/str_format/extension.h index d1665753d1..55cbb56d0a 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/extension.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/extension.h @@ -24,11 +24,16 @@ #include "absl/base/config.h" #include "absl/base/port.h" +#include "absl/meta/type_traits.h" #include "absl/strings/internal/str_format/output.h" #include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN + +enum class FormatConversionChar : uint8_t; +enum class FormatConversionCharSet : uint64_t; + namespace str_format_internal { class FormatRawSinkImpl { @@ -102,7 +107,7 @@ class FormatSinkImpl { size_t size() const { return size_; } // Put 'v' to 'sink' with specified width, precision, and left flag. - bool PutPaddedString(string_view v, int w, int p, bool l); + bool PutPaddedString(string_view v, int width, int precision, bool left); template <typename T> T Wrap() { @@ -123,23 +128,37 @@ class FormatSinkImpl { char buf_[1024]; }; -struct Flags { - bool basic : 1; // fastest conversion: no flags, width, or precision - bool left : 1; // "-" - bool show_pos : 1; // "+" - bool sign_col : 1; // " " - bool alt : 1; // "#" - bool zero : 1; // "0" - std::string ToString() const; - friend std::ostream& operator<<(std::ostream& os, const Flags& v) { - return os << v.ToString(); - } +enum class Flags : uint8_t { + kBasic = 0, + kLeft = 1 << 0, + kShowPos = 1 << 1, + kSignCol = 1 << 2, + kAlt = 1 << 3, + kZero = 1 << 4, + // This is not a real flag. It just exists to turn off kBasic when no other + // flags are set. This is for when width/precision are specified. + kNonBasic = 1 << 5, }; +constexpr Flags operator|(Flags a, Flags b) { + return static_cast<Flags>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b)); +} + +constexpr bool FlagsContains(Flags haystack, Flags needle) { + return (static_cast<uint8_t>(haystack) & static_cast<uint8_t>(needle)) == + static_cast<uint8_t>(needle); +} + +std::string FlagsToString(Flags v); + +inline std::ostream& operator<<(std::ostream& os, Flags v) { + return os << FlagsToString(v); +} + // clang-format off #define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ /* text */ \ - X_VAL(c) X_SEP X_VAL(C) X_SEP X_VAL(s) X_SEP X_VAL(S) X_SEP \ + X_VAL(c) X_SEP X_VAL(s) X_SEP \ /* ints */ \ X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \ X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \ @@ -148,14 +167,39 @@ struct Flags { X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ /* misc */ \ X_VAL(n) X_SEP X_VAL(p) +// clang-format on + +// This type should not be referenced, it exists only to provide labels +// internally that match the values declared in FormatConversionChar in +// str_format.h. This is meant to allow internal libraries to use the same +// declared interface type as the public interface +// (absl::StrFormatConversionChar) while keeping the definition in a public +// header. +// Internal libraries should use the form +// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for +// comparisons. Use in switch statements is not recommended due to a bug in how +// gcc 4.9 -Wswitch handles declared but undefined enums. +struct FormatConversionCharInternal { + FormatConversionCharInternal() = delete; -enum class FormatConversionChar : uint8_t { - c, C, s, S, // text + private: + // clang-format off + enum class Enum : uint8_t { + c, s, // text d, i, o, u, x, X, // int f, F, e, E, g, G, a, A, // float n, p, // misc - kNone, - none = kNone + kNone + }; + // clang-format on + public: +#define ABSL_INTERNAL_X_VAL(id) \ + static constexpr FormatConversionChar id = \ + static_cast<FormatConversionChar>(Enum::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + static constexpr FormatConversionChar kNone = + static_cast<FormatConversionChar>(Enum::kNone); }; // clang-format on @@ -163,95 +207,56 @@ inline FormatConversionChar FormatConversionCharFromChar(char c) { switch (c) { #define ABSL_INTERNAL_X_VAL(id) \ case #id[0]: \ - return FormatConversionChar::id; + return FormatConversionCharInternal::id; ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) #undef ABSL_INTERNAL_X_VAL } - return FormatConversionChar::kNone; -} - -inline int FormatConversionCharRadix(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::x: - case FormatConversionChar::X: - case FormatConversionChar::a: - case FormatConversionChar::A: - case FormatConversionChar::p: - return 16; - case FormatConversionChar::o: - return 8; - default: - return 10; - } + return FormatConversionCharInternal::kNone; } inline bool FormatConversionCharIsUpper(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::X: - case FormatConversionChar::F: - case FormatConversionChar::E: - case FormatConversionChar::G: - case FormatConversionChar::A: - return true; - default: - return false; - } -} - -inline bool FormatConversionCharIsSigned(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::d: - case FormatConversionChar::i: - return true; - default: - return false; - } -} - -inline bool FormatConversionCharIsIntegral(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::d: - case FormatConversionChar::i: - case FormatConversionChar::u: - case FormatConversionChar::o: - case FormatConversionChar::x: - case FormatConversionChar::X: - return true; - default: - return false; + if (c == FormatConversionCharInternal::X || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::G || + c == FormatConversionCharInternal::A) { + return true; + } else { + return false; } } inline bool FormatConversionCharIsFloat(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::a: - case FormatConversionChar::e: - case FormatConversionChar::f: - case FormatConversionChar::g: - case FormatConversionChar::A: - case FormatConversionChar::E: - case FormatConversionChar::F: - case FormatConversionChar::G: - return true; - default: - return false; + if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::A || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::G) { + return true; + } else { + return false; } } inline char FormatConversionCharToChar(FormatConversionChar c) { - switch (c) { -#define ABSL_INTERNAL_X_VAL(e) \ - case FormatConversionChar::e: \ + if (c == FormatConversionCharInternal::kNone) { + return '\0'; + +#define ABSL_INTERNAL_X_VAL(e) \ + } else if (c == FormatConversionCharInternal::e) { \ return #e[0]; #define ABSL_INTERNAL_X_SEP - ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, - ABSL_INTERNAL_X_SEP) - case FormatConversionChar::kNone: - return '\0'; + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, + ABSL_INTERNAL_X_SEP) + } else { + return '\0'; + } + #undef ABSL_INTERNAL_X_VAL #undef ABSL_INTERNAL_X_SEP - } - return '\0'; } // The associated char. @@ -263,20 +268,24 @@ inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { struct FormatConversionSpecImplFriend; -class FormatConversionSpec { +class FormatConversionSpecImpl { public: // Width and precison are not specified, no flags are set. - bool is_basic() const { return flags_.basic; } - bool has_left_flag() const { return flags_.left; } - bool has_show_pos_flag() const { return flags_.show_pos; } - bool has_sign_col_flag() const { return flags_.sign_col; } - bool has_alt_flag() const { return flags_.alt; } - bool has_zero_flag() const { return flags_.zero; } + bool is_basic() const { return flags_ == Flags::kBasic; } + bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); } + bool has_show_pos_flag() const { + return FlagsContains(flags_, Flags::kShowPos); + } + bool has_sign_col_flag() const { + return FlagsContains(flags_, Flags::kSignCol); + } + bool has_alt_flag() const { return FlagsContains(flags_, Flags::kAlt); } + bool has_zero_flag() const { return FlagsContains(flags_, Flags::kZero); } FormatConversionChar conversion_char() const { // Keep this field first in the struct . It generates better code when // accessing it when ConversionSpec is passed by value in registers. - static_assert(offsetof(FormatConversionSpec, conv_) == 0, ""); + static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, ""); return conv_; } @@ -287,41 +296,65 @@ class FormatConversionSpec { // negative value. int precision() const { return precision_; } - // Deprecated (use has_x_flag() instead). - Flags flags() const { return flags_; } - // Deprecated - FormatConversionChar conv() const { return conversion_char(); } + template <typename T> + T Wrap() { + return T(*this); + } private: friend struct str_format_internal::FormatConversionSpecImplFriend; - FormatConversionChar conv_ = FormatConversionChar::kNone; + FormatConversionChar conv_ = FormatConversionCharInternal::kNone; Flags flags_; int width_; int precision_; }; struct FormatConversionSpecImplFriend final { - static void SetFlags(Flags f, FormatConversionSpec* conv) { + static void SetFlags(Flags f, FormatConversionSpecImpl* conv) { conv->flags_ = f; } static void SetConversionChar(FormatConversionChar c, - FormatConversionSpec* conv) { + FormatConversionSpecImpl* conv) { conv->conv_ = c; } - static void SetWidth(int w, FormatConversionSpec* conv) { conv->width_ = w; } - static void SetPrecision(int p, FormatConversionSpec* conv) { + static void SetWidth(int w, FormatConversionSpecImpl* conv) { + conv->width_ = w; + } + static void SetPrecision(int p, FormatConversionSpecImpl* conv) { conv->precision_ = p; } - static std::string FlagsToString(const FormatConversionSpec& spec) { - return spec.flags_.ToString(); + static std::string FlagsToString(const FormatConversionSpecImpl& spec) { + return str_format_internal::FlagsToString(spec.flags_); } }; -constexpr uint64_t FormatConversionCharToConvValue(char conv) { +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a) { + return a; +} + +template <typename... CharSet> +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a, CharSet... rest) { + return static_cast<FormatConversionCharSet>( + static_cast<uint64_t>(a) | + static_cast<uint64_t>(FormatConversionCharSetUnion(rest...))); +} + +constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) { + return uint64_t{1} << (1 + static_cast<uint8_t>(c)); +} + +constexpr uint64_t FormatConversionCharToConvInt(char conv) { return -#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ - conv == #c[0] \ - ? (uint64_t{1} << (1 + static_cast<uint8_t>(FormatConversionChar::c))) \ +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + conv == #c[0] \ + ? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \ : ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) #undef ABSL_INTERNAL_CHAR_SET_CASE @@ -330,28 +363,29 @@ constexpr uint64_t FormatConversionCharToConvValue(char conv) { : 0; } -enum class FormatConversionCharSet : uint64_t { -#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ - c = FormatConversionCharToConvValue(#c[0]), +constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvInt(conv)); +} + +struct FormatConversionCharSetInternal { +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + static constexpr FormatConversionCharSet c = \ + FormatConversionCharToConvValue(#c[0]); ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) #undef ABSL_INTERNAL_CHAR_SET_CASE // Used for width/precision '*' specification. - kStar = FormatConversionCharToConvValue('*'), - // Some predefined values: - kIntegral = d | i | u | o | x | X, - kFloating = a | e | f | g | A | E | F | G, - kNumeric = kIntegral | kFloating, - kString = s, - kPointer = p, - - // The following are deprecated - star = kStar, - integral = kIntegral, - floating = kFloating, - numeric = kNumeric, - string = kString, - pointer = kPointer + static constexpr FormatConversionCharSet kStar = + FormatConversionCharToConvValue('*'); + + static constexpr FormatConversionCharSet kIntegral = + FormatConversionCharSetUnion(d, i, u, o, x, X); + static constexpr FormatConversionCharSet kFloating = + FormatConversionCharSetUnion(a, e, f, g, A, E, F, G); + static constexpr FormatConversionCharSet kNumeric = + FormatConversionCharSetUnion(kIntegral, kFloating); + static constexpr FormatConversionCharSet kPointer = p; }; // Type safe OR operator. @@ -361,18 +395,29 @@ enum class FormatConversionCharSet : uint64_t { // 2. We use "enum class" which would not work even if we accepted the decay. constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, FormatConversionCharSet b) { - return FormatConversionCharSet(static_cast<uint64_t>(a) | - static_cast<uint64_t>(b)); + return FormatConversionCharSetUnion(a, b); } +// Overloaded conversion functions to support absl::ParsedFormat. // Get a conversion with a single character in it. -constexpr FormatConversionCharSet ConversionCharToConv(char c) { - return FormatConversionCharSet(FormatConversionCharToConvValue(c)); +constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvValue(c)); } +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet( + FormatConversionCharSet c) { + return c; +} + +template <typename T> +void ToFormatConversionCharSet(T) = delete; + // Checks whether `c` exists in `set`. constexpr bool Contains(FormatConversionCharSet set, char c) { - return (static_cast<uint64_t>(set) & FormatConversionCharToConvValue(c)) != 0; + return (static_cast<uint64_t>(set) & + static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0; } // Checks whether all the characters in `c` are contained in `set` @@ -382,31 +427,16 @@ constexpr bool Contains(FormatConversionCharSet set, static_cast<uint64_t>(c); } -// Return type of the AbslFormatConvert() functions. -// The FormatConversionCharSet template parameter is used to inform the -// framework of what conversion characters are supported by that -// AbslFormatConvert routine. -template <FormatConversionCharSet C> -struct FormatConvertResult { - static constexpr FormatConversionCharSet kConv = C; - bool value; -}; - -template <FormatConversionCharSet C> -constexpr FormatConversionCharSet FormatConvertResult<C>::kConv; +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) { + return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0; +} // Return capacity - used, clipped to a minimum of 0. inline size_t Excess(size_t used, size_t capacity) { return used < capacity ? capacity - used : 0; } -// Type alias for use during migration. -using ConversionChar = FormatConversionChar; -using ConversionSpec = FormatConversionSpec; -using Conv = FormatConversionCharSet; -template <FormatConversionCharSet C> -using ConvertResult = FormatConvertResult<C>; - } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/extension_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/extension_test.cc index 4e23fefbd5..1c93fdb1c7 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/extension_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/extension_test.cc @@ -19,9 +19,26 @@ #include <random> #include <string> +#include "gtest/gtest.h" #include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" -#include "gtest/gtest.h" +namespace my_namespace { +class UserDefinedType { + public: + UserDefinedType() = default; + + void Append(absl::string_view str) { value_.append(str.data(), str.size()); } + const std::string& Value() const { return value_; } + + friend void AbslFormatFlush(UserDefinedType* x, absl::string_view str) { + x->Append(str); + } + + private: + std::string value_; +}; +} // namespace my_namespace namespace { @@ -63,4 +80,19 @@ TEST(FormatExtensionTest, SinkAppendChars) { EXPECT_EQ(actual, expected); } } + +TEST(FormatExtensionTest, VerifyEnumEquality) { +#define X_VAL(id) \ + EXPECT_EQ(absl::FormatConversionChar::id, \ + absl::str_format_internal::FormatConversionCharInternal::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, ); +#undef X_VAL + +#define X_VAL(id) \ + EXPECT_EQ(absl::FormatConversionCharSet::id, \ + absl::str_format_internal::FormatConversionCharSetInternal::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, ); +#undef X_VAL +} + } // namespace diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.cc index d4c647c3ed..b1c4068475 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.cc @@ -1,12 +1,38 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/float_conversion.h" #include <string.h> + #include <algorithm> #include <cassert> #include <cmath> +#include <limits> #include <string> +#include "absl/base/attributes.h" #include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/numeric/bits.h" +#include "absl/numeric/int128.h" +#include "absl/numeric/internal/representation.h" +#include "absl/strings/numbers.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -14,13 +40,905 @@ namespace str_format_internal { namespace { -char *CopyStringTo(string_view v, char *out) { +using ::absl::numeric_internal::IsDoubleDouble; + +// The code below wants to avoid heap allocations. +// To do so it needs to allocate memory on the stack. +// `StackArray` will allocate memory on the stack in the form of a uint32_t +// array and call the provided callback with said memory. +// It will allocate memory in increments of 512 bytes. We could allocate the +// largest needed unconditionally, but that is more than we need in most of +// cases. This way we use less stack in the common cases. +class StackArray { + using Func = absl::FunctionRef<void(absl::Span<uint32_t>)>; + static constexpr size_t kStep = 512 / sizeof(uint32_t); + // 5 steps is 2560 bytes, which is enough to hold a long double with the + // largest/smallest exponents. + // The operations below will static_assert their particular maximum. + static constexpr size_t kNumSteps = 5; + + // We do not want this function to be inlined. + // Otherwise the caller will allocate the stack space unnecessarily for all + // the variants even though it only calls one. + template <size_t steps> + ABSL_ATTRIBUTE_NOINLINE static void RunWithCapacityImpl(Func f) { + uint32_t values[steps * kStep]{}; + f(absl::MakeSpan(values)); + } + + public: + static constexpr size_t kMaxCapacity = kStep * kNumSteps; + + static void RunWithCapacity(size_t capacity, Func f) { + assert(capacity <= kMaxCapacity); + const size_t step = (capacity + kStep - 1) / kStep; + assert(step <= kNumSteps); + switch (step) { + case 1: + return RunWithCapacityImpl<1>(f); + case 2: + return RunWithCapacityImpl<2>(f); + case 3: + return RunWithCapacityImpl<3>(f); + case 4: + return RunWithCapacityImpl<4>(f); + case 5: + return RunWithCapacityImpl<5>(f); + } + + assert(false && "Invalid capacity"); + } +}; + +// Calculates `10 * (*v) + carry` and stores the result in `*v` and returns +// the carry. +template <typename Int> +inline Int MultiplyBy10WithCarry(Int *v, Int carry) { + using BiggerInt = absl::conditional_t<sizeof(Int) == 4, uint64_t, uint128>; + BiggerInt tmp = 10 * static_cast<BiggerInt>(*v) + carry; + *v = static_cast<Int>(tmp); + return static_cast<Int>(tmp >> (sizeof(Int) * 8)); +} + +// Calculates `(2^64 * carry + *v) / 10`. +// Stores the quotient in `*v` and returns the remainder. +// Requires: `0 <= carry <= 9` +inline uint64_t DivideBy10WithCarry(uint64_t *v, uint64_t carry) { + constexpr uint64_t divisor = 10; + // 2^64 / divisor = chunk_quotient + chunk_remainder / divisor + constexpr uint64_t chunk_quotient = (uint64_t{1} << 63) / (divisor / 2); + constexpr uint64_t chunk_remainder = uint64_t{} - chunk_quotient * divisor; + + const uint64_t mod = *v % divisor; + const uint64_t next_carry = chunk_remainder * carry + mod; + *v = *v / divisor + carry * chunk_quotient + next_carry / divisor; + return next_carry % divisor; +} + +using MaxFloatType = + typename std::conditional<IsDoubleDouble(), double, long double>::type; + +// Generates the decimal representation for an integer of the form `v * 2^exp`, +// where `v` and `exp` are both positive integers. +// It generates the digits from the left (ie the most significant digit first) +// to allow for direct printing into the sink. +// +// Requires `0 <= exp` and `exp <= numeric_limits<MaxFloatType>::max_exponent`. +class BinaryToDecimal { + static constexpr int ChunksNeeded(int exp) { + // We will left shift a uint128 by `exp` bits, so we need `128+exp` total + // bits. Round up to 32. + // See constructor for details about adding `10%` to the value. + return (128 + exp + 31) / 32 * 11 / 10; + } + + public: + // Run the conversion for `v * 2^exp` and call `f(binary_to_decimal)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion(uint128 v, int exp, + absl::FunctionRef<void(BinaryToDecimal)> f) { + assert(exp > 0); + assert(exp <= std::numeric_limits<MaxFloatType>::max_exponent); + static_assert( + static_cast<int>(StackArray::kMaxCapacity) >= + ChunksNeeded(std::numeric_limits<MaxFloatType>::max_exponent), + ""); + + StackArray::RunWithCapacity( + ChunksNeeded(exp), + [=](absl::Span<uint32_t> input) { f(BinaryToDecimal(input, v, exp)); }); + } + + int TotalDigits() const { + return static_cast<int>((decimal_end_ - decimal_start_) * kDigitsPerChunk + + CurrentDigits().size()); + } + + // See the current block of digits. + absl::string_view CurrentDigits() const { + return absl::string_view(digits_ + kDigitsPerChunk - size_, size_); + } + + // Advance the current view of digits. + // Returns `false` when no more digits are available. + bool AdvanceDigits() { + if (decimal_start_ >= decimal_end_) return false; + + uint32_t w = data_[decimal_start_++]; + for (size_ = 0; size_ < kDigitsPerChunk; w /= 10) { + digits_[kDigitsPerChunk - ++size_] = w % 10 + '0'; + } + return true; + } + + private: + BinaryToDecimal(absl::Span<uint32_t> data, uint128 v, int exp) : data_(data) { + // We need to print the digits directly into the sink object without + // buffering them all first. To do this we need two things: + // - to know the total number of digits to do padding when necessary + // - to generate the decimal digits from the left. + // + // In order to do this, we do a two pass conversion. + // On the first pass we convert the binary representation of the value into + // a decimal representation in which each uint32_t chunk holds up to 9 + // decimal digits. In the second pass we take each decimal-holding-uint32_t + // value and generate the ascii decimal digits into `digits_`. + // + // The binary and decimal representations actually share the same memory + // region. As we go converting the chunks from binary to decimal we free + // them up and reuse them for the decimal representation. One caveat is that + // the decimal representation is around 7% less efficient in space than the + // binary one. We allocate an extra 10% memory to account for this. See + // ChunksNeeded for this calculation. + int chunk_index = exp / 32; + decimal_start_ = decimal_end_ = ChunksNeeded(exp); + const int offset = exp % 32; + // Left shift v by exp bits. + data_[chunk_index] = static_cast<uint32_t>(v << offset); + for (v >>= (32 - offset); v; v >>= 32) + data_[++chunk_index] = static_cast<uint32_t>(v); + + while (chunk_index >= 0) { + // While we have more than one chunk available, go in steps of 1e9. + // `data_[chunk_index]` holds the highest non-zero binary chunk, so keep + // the variable updated. + uint32_t carry = 0; + for (int i = chunk_index; i >= 0; --i) { + uint64_t tmp = uint64_t{data_[i]} + (uint64_t{carry} << 32); + data_[i] = static_cast<uint32_t>(tmp / uint64_t{1000000000}); + carry = static_cast<uint32_t>(tmp % uint64_t{1000000000}); + } + + // If the highest chunk is now empty, remove it from view. + if (data_[chunk_index] == 0) --chunk_index; + + --decimal_start_; + assert(decimal_start_ != chunk_index); + data_[decimal_start_] = carry; + } + + // Fill the first set of digits. The first chunk might not be complete, so + // handle differently. + for (uint32_t first = data_[decimal_start_++]; first != 0; first /= 10) { + digits_[kDigitsPerChunk - ++size_] = first % 10 + '0'; + } + } + + private: + static constexpr int kDigitsPerChunk = 9; + + int decimal_start_; + int decimal_end_; + + char digits_[kDigitsPerChunk]; + int size_ = 0; + + absl::Span<uint32_t> data_; +}; + +// Converts a value of the form `x * 2^-exp` into a sequence of decimal digits. +// Requires `-exp < 0` and +// `-exp >= limits<MaxFloatType>::min_exponent - limits<MaxFloatType>::digits`. +class FractionalDigitGenerator { + public: + // Run the conversion for `v * 2^exp` and call `f(generator)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion( + uint128 v, int exp, absl::FunctionRef<void(FractionalDigitGenerator)> f) { + using Limits = std::numeric_limits<MaxFloatType>; + assert(-exp < 0); + assert(-exp >= Limits::min_exponent - 128); + static_assert(StackArray::kMaxCapacity >= + (Limits::digits + 128 - Limits::min_exponent + 31) / 32, + ""); + StackArray::RunWithCapacity((Limits::digits + exp + 31) / 32, + [=](absl::Span<uint32_t> input) { + f(FractionalDigitGenerator(input, v, exp)); + }); + } + + // Returns true if there are any more non-zero digits left. + bool HasMoreDigits() const { return next_digit_ != 0 || chunk_index_ >= 0; } + + // Returns true if the remainder digits are greater than 5000... + bool IsGreaterThanHalf() const { + return next_digit_ > 5 || (next_digit_ == 5 && chunk_index_ >= 0); + } + // Returns true if the remainder digits are exactly 5000... + bool IsExactlyHalf() const { return next_digit_ == 5 && chunk_index_ < 0; } + + struct Digits { + int digit_before_nine; + int num_nines; + }; + + // Get the next set of digits. + // They are composed by a non-9 digit followed by a runs of zero or more 9s. + Digits GetDigits() { + Digits digits{next_digit_, 0}; + + next_digit_ = GetOneDigit(); + while (next_digit_ == 9) { + ++digits.num_nines; + next_digit_ = GetOneDigit(); + } + + return digits; + } + + private: + // Return the next digit. + int GetOneDigit() { + if (chunk_index_ < 0) return 0; + + uint32_t carry = 0; + for (int i = chunk_index_; i >= 0; --i) { + carry = MultiplyBy10WithCarry(&data_[i], carry); + } + // If the lowest chunk is now empty, remove it from view. + if (data_[chunk_index_] == 0) --chunk_index_; + return carry; + } + + FractionalDigitGenerator(absl::Span<uint32_t> data, uint128 v, int exp) + : chunk_index_(exp / 32), data_(data) { + const int offset = exp % 32; + // Right shift `v` by `exp` bits. + data_[chunk_index_] = static_cast<uint32_t>(v << (32 - offset)); + v >>= offset; + // Make sure we don't overflow the data. We already calculated that + // non-zero bits fit, so we might not have space for leading zero bits. + for (int pos = chunk_index_; v; v >>= 32) + data_[--pos] = static_cast<uint32_t>(v); + + // Fill next_digit_, as GetDigits expects it to be populated always. + next_digit_ = GetOneDigit(); + } + + int next_digit_; + int chunk_index_; + absl::Span<uint32_t> data_; +}; + +// Count the number of leading zero bits. +int LeadingZeros(uint64_t v) { return countl_zero(v); } +int LeadingZeros(uint128 v) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + return high != 0 ? countl_zero(high) : 64 + countl_zero(low); +} + +// Round up the text digits starting at `p`. +// The buffer must have an extra digit that is known to not need rounding. +// This is done below by having an extra '0' digit on the left. +void RoundUp(char *p) { + while (*p == '9' || *p == '.') { + if (*p == '9') *p = '0'; + --p; + } + ++*p; +} + +// Check the previous digit and round up or down to follow the round-to-even +// policy. +void RoundToEven(char *p) { + if (*p == '.') --p; + if (*p % 2 == 1) RoundUp(p); +} + +// Simple integral decimal digit printing for values that fit in 64-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint64_t v, char *p) { + do { + *--p = DivideBy10WithCarry(&v, 0) + '0'; + } while (v != 0); + return p; +} + +// Simple integral decimal digit printing for values that fit in 128-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint128 v, char *p) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + while (high != 0) { + uint64_t carry = DivideBy10WithCarry(&high, 0); + carry = DivideBy10WithCarry(&low, carry); + *--p = carry + '0'; + } + return PrintIntegralDigitsFromRightFast(low, p); +} + +// Simple fractional decimal digit printing for values that fir in 64-bits after +// shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint64_t v, char *start, int exp, + int precision) { + char *p = start; + v <<= (64 - exp); + while (precision > 0) { + if (!v) return p; + *p++ = MultiplyBy10WithCarry(&v, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (v < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (v > 0x8000000000000000) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +// Simple fractional decimal digit printing for values that fir in 128-bits +// after shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint128 v, char *start, int exp, + int precision) { + char *p = start; + v <<= (128 - exp); + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + // While we have digits to print and `low` is not empty, do the long + // multiplication. + while (precision > 0 && low != 0) { + uint64_t carry = MultiplyBy10WithCarry(&low, uint64_t{0}); + carry = MultiplyBy10WithCarry(&high, carry); + + *p++ = carry + '0'; + --precision; + } + + // Now `low` is empty, so use a faster approach for the rest of the digits. + // This block is pretty much the same as the main loop for the 64-bit case + // above. + while (precision > 0) { + if (!high) return p; + *p++ = MultiplyBy10WithCarry(&high, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (high < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (high > 0x8000000000000000 || low != 0) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +struct FormatState { + char sign_char; + int precision; + const FormatConversionSpecImpl &conv; + FormatSinkImpl *sink; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + bool ShouldPrintDot() const { return precision != 0 || conv.has_alt_flag(); } +}; + +struct Padding { + int left_spaces; + int zeros; + int right_spaces; +}; + +Padding ExtraWidthToPadding(size_t total_size, const FormatState &state) { + if (state.conv.width() < 0 || + static_cast<size_t>(state.conv.width()) <= total_size) { + return {0, 0, 0}; + } + int missing_chars = state.conv.width() - total_size; + if (state.conv.has_left_flag()) { + return {0, 0, missing_chars}; + } else if (state.conv.has_zero_flag()) { + return {0, missing_chars, 0}; + } else { + return {missing_chars, 0, 0}; + } +} + +void FinalPrint(const FormatState &state, absl::string_view data, + int padding_offset, int trailing_zeros, + absl::string_view data_postfix) { + if (state.conv.width() < 0) { + // No width specified. Fast-path. + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(data); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + return; + } + + auto padding = ExtraWidthToPadding((state.sign_char != '\0' ? 1 : 0) + + data.size() + data_postfix.size() + + static_cast<size_t>(trailing_zeros), + state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + // Padding in general needs to be inserted somewhere in the middle of `data`. + state.sink->Append(data.substr(0, padding_offset)); + state.sink->Append(padding.zeros, '0'); + state.sink->Append(data.substr(padding_offset)); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + state.sink->Append(padding.right_spaces, ' '); +} + +// Fastpath %f formatter for when the shifted value fits in a simple integral +// type. +// Prints `v*2^exp` with the options from `state`. +template <typename Int> +void FormatFFast(Int v, int exp, const FormatState &state) { + constexpr int input_bits = sizeof(Int) * 8; + + static constexpr size_t integral_size = + /* in case we need to round up an extra digit */ 1 + + /* decimal digits for uint128 */ 40 + 1; + char buffer[integral_size + /* . */ 1 + /* max digits uint128 */ 128]; + buffer[integral_size] = '.'; + char *const integral_digits_end = buffer + integral_size; + char *integral_digits_start; + char *const fractional_digits_start = buffer + integral_size + 1; + char *fractional_digits_end = fractional_digits_start; + + if (exp >= 0) { + const int total_bits = input_bits - LeadingZeros(v) + exp; + integral_digits_start = + total_bits <= 64 + ? PrintIntegralDigitsFromRightFast(static_cast<uint64_t>(v) << exp, + integral_digits_end) + : PrintIntegralDigitsFromRightFast(static_cast<uint128>(v) << exp, + integral_digits_end); + } else { + exp = -exp; + + integral_digits_start = PrintIntegralDigitsFromRightFast( + exp < input_bits ? v >> exp : 0, integral_digits_end); + // PrintFractionalDigits may pull a carried 1 all the way up through the + // integral portion. + integral_digits_start[-1] = '0'; + + fractional_digits_end = + exp <= 64 ? PrintFractionalDigitsFast(v, fractional_digits_start, exp, + state.precision) + : PrintFractionalDigitsFast(static_cast<uint128>(v), + fractional_digits_start, exp, + state.precision); + // There was a carry, so include the first digit too. + if (integral_digits_start[-1] != '0') --integral_digits_start; + } + + size_t size = fractional_digits_end - integral_digits_start; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + if (!state.ShouldPrintDot()) --size; + FinalPrint(state, absl::string_view(integral_digits_start, size), + /*padding_offset=*/0, + static_cast<int>(state.precision - (fractional_digits_end - + fractional_digits_start)), + /*data_postfix=*/""); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp > 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to not have fractional digits, so we don't have to +// worry about anything after the `.`. +void FormatFPositiveExpSlow(uint128 v, int exp, const FormatState &state) { + BinaryToDecimal::RunConversion(v, exp, [&](BinaryToDecimal btd) { + const size_t total_digits = + btd.TotalDigits() + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + + const auto padding = ExtraWidthToPadding( + total_digits + (state.sign_char != '\0' ? 1 : 0), state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + do { + state.sink->Append(btd.CurrentDigits()); + } while (btd.AdvanceDigits()); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + state.sink->Append(state.precision, '0'); + state.sink->Append(padding.right_spaces, ' '); + }); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp < 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to be < 1.0, so we don't have to worry about integral +// digits. +void FormatFNegativeExpSlow(uint128 v, int exp, const FormatState &state) { + const size_t total_digits = + /* 0 */ 1 + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + auto padding = + ExtraWidthToPadding(total_digits + (state.sign_char ? 1 : 0), state); + padding.zeros += 1; + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + + // Print digits + int digits_to_go = state.precision; + + FractionalDigitGenerator::RunConversion( + v, exp, [&](FractionalDigitGenerator digit_gen) { + // There are no digits to print here. + if (state.precision == 0) return; + + // We go one digit at a time, while keeping track of runs of nines. + // The runs of nines are used to perform rounding when necessary. + + while (digits_to_go > 0 && digit_gen.HasMoreDigits()) { + auto digits = digit_gen.GetDigits(); + + // Now we have a digit and a run of nines. + // See if we can print them all. + if (digits.num_nines + 1 < digits_to_go) { + // We don't have to round yet, so print them. + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits.num_nines, '9'); + digits_to_go -= digits.num_nines + 1; + + } else { + // We can't print all the nines, see where we have to truncate. + + bool round_up = false; + if (digits.num_nines + 1 > digits_to_go) { + // We round up at a nine. No need to print them. + round_up = true; + } else { + // We can fit all the nines, but truncate just after it. + if (digit_gen.IsGreaterThanHalf()) { + round_up = true; + } else if (digit_gen.IsExactlyHalf()) { + // Round to even + round_up = + digits.num_nines != 0 || digits.digit_before_nine % 2 == 1; + } + } + + if (round_up) { + state.sink->Append(1, digits.digit_before_nine + '1'); + --digits_to_go; + // The rest will be zeros. + } else { + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits_to_go - 1, '9'); + digits_to_go = 0; + } + return; + } + } + }); + + state.sink->Append(digits_to_go, '0'); + state.sink->Append(padding.right_spaces, ' '); +} + +template <typename Int> +void FormatF(Int mantissa, int exp, const FormatState &state) { + if (exp >= 0) { + const int total_bits = sizeof(Int) * 8 - LeadingZeros(mantissa) + exp; + + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(total_bits > 128)) { + return FormatFPositiveExpSlow(mantissa, exp, state); + } + } else { + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(exp < -128)) { + return FormatFNegativeExpSlow(mantissa, -exp, state); + } + } + return FormatFFast(mantissa, exp, state); +} + +// Grab the group of four bits (nibble) from `n`. E.g., nibble 1 corresponds to +// bits 4-7. +template <typename Int> +uint8_t GetNibble(Int n, int nibble_index) { + constexpr Int mask_low_nibble = Int{0xf}; + int shift = nibble_index * 4; + n &= mask_low_nibble << shift; + return static_cast<uint8_t>((n >> shift) & 0xf); +} + +// Add one to the given nibble, applying carry to higher nibbles. Returns true +// if overflow, false otherwise. +template <typename Int> +bool IncrementNibble(int nibble_index, Int *n) { + constexpr int kShift = sizeof(Int) * 8 - 1; + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + Int before = *n >> kShift; + // Here we essentially want to take the number 1 and move it into the requsted + // nibble, then add it to *n to effectively increment the nibble. However, + // ASan will complain if we try to shift the 1 beyond the limits of the Int, + // i.e., if the nibble_index is out of range. So therefore we check for this + // and if we are out of range we just add 0 which leaves *n unchanged, which + // seems like the reasonable thing to do in that case. + *n += ((nibble_index >= kNumNibbles) ? 0 : (Int{1} << (nibble_index * 4))); + Int after = *n >> kShift; + return (before && !after) || (nibble_index >= kNumNibbles); +} + +// Return a mask with 1's in the given nibble and all lower nibbles. +template <typename Int> +Int MaskUpToNibbleInclusive(int nibble_index) { + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + static const Int ones = ~Int{0}; + return ones >> std::max(0, 4 * (kNumNibbles - nibble_index - 1)); +} + +// Return a mask with 1's below the given nibble. +template <typename Int> +Int MaskUpToNibbleExclusive(int nibble_index) { + return nibble_index <= 0 ? 0 : MaskUpToNibbleInclusive<Int>(nibble_index - 1); +} + +template <typename Int> +Int MoveToNibble(uint8_t nibble, int nibble_index) { + return Int{nibble} << (4 * nibble_index); +} + +// Given mantissa size, find optimal # of mantissa bits to put in initial digit. +// +// In the hex representation we keep a single hex digit to the left of the dot. +// However, the question as to how many bits of the mantissa should be put into +// that hex digit in theory is arbitrary, but in practice it is optimal to +// choose based on the size of the mantissa. E.g., for a `double`, there are 53 +// mantissa bits, so that means that we should put 1 bit to the left of the dot, +// thereby leaving 52 bits to the right, which is evenly divisible by four and +// thus all fractional digits represent actual precision. For a `long double`, +// on the other hand, there are 64 bits of mantissa, thus we can use all four +// bits for the initial hex digit and still have a number left over (60) that is +// a multiple of four. Once again, the goal is to have all fractional digits +// represent real precision. +template <typename Float> +constexpr int HexFloatLeadingDigitSizeInBits() { + return std::numeric_limits<Float>::digits % 4 > 0 + ? std::numeric_limits<Float>::digits % 4 + : 4; +} + +// This function captures the rounding behavior of glibc for hex float +// representations. E.g. when rounding 0x1.ab800000 to a precision of .2 +// ("%.2a") glibc will round up because it rounds toward the even number (since +// 0xb is an odd number, it will round up to 0xc). However, when rounding at a +// point that is not followed by 800000..., it disregards the parity and rounds +// up if > 8 and rounds down if < 8. +template <typename Int> +bool HexFloatNeedsRoundUp(Int mantissa, int final_nibble_displayed, + uint8_t leading) { + // If the last nibble (hex digit) to be displayed is the lowest on in the + // mantissa then that means that we don't have any further nibbles to inform + // rounding, so don't round. + if (final_nibble_displayed <= 0) { + return false; + } + int rounding_nibble_idx = final_nibble_displayed - 1; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + assert(final_nibble_displayed <= kTotalNibbles); + Int mantissa_up_to_rounding_nibble_inclusive = + mantissa & MaskUpToNibbleInclusive<Int>(rounding_nibble_idx); + Int eight = MoveToNibble<Int>(8, rounding_nibble_idx); + if (mantissa_up_to_rounding_nibble_inclusive != eight) { + return mantissa_up_to_rounding_nibble_inclusive > eight; + } + // Nibble in question == 8. + uint8_t round_if_odd = (final_nibble_displayed == kTotalNibbles) + ? leading + : GetNibble(mantissa, final_nibble_displayed); + return round_if_odd % 2 == 1; +} + +// Stores values associated with a Float type needed by the FormatA +// implementation in order to avoid templatizing that function by the Float +// type. +struct HexFloatTypeParams { + template <typename Float> + explicit HexFloatTypeParams(Float) + : min_exponent(std::numeric_limits<Float>::min_exponent - 1), + leading_digit_size_bits(HexFloatLeadingDigitSizeInBits<Float>()) { + assert(leading_digit_size_bits >= 1 && leading_digit_size_bits <= 4); + } + + int min_exponent; + int leading_digit_size_bits; +}; + +// Hex Float Rounding. First check if we need to round; if so, then we do that +// by manipulating (incrementing) the mantissa, that way we can later print the +// mantissa digits by iterating through them in the same way regardless of +// whether a rounding happened. +template <typename Int> +void FormatARound(bool precision_specified, const FormatState &state, + uint8_t *leading, Int *mantissa, int *exp) { + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Index of the last nibble that we could display given precision. + int final_nibble_displayed = + precision_specified ? std::max(0, (kTotalNibbles - state.precision)) : 0; + if (HexFloatNeedsRoundUp(*mantissa, final_nibble_displayed, *leading)) { + // Need to round up. + bool overflow = IncrementNibble(final_nibble_displayed, mantissa); + *leading += (overflow ? 1 : 0); + if (ABSL_PREDICT_FALSE(*leading > 15)) { + // We have overflowed the leading digit. This would mean that we would + // need two hex digits to the left of the dot, which is not allowed. So + // adjust the mantissa and exponent so that the result is always 1.0eXXX. + *leading = 1; + *mantissa = 0; + *exp += 4; + } + } + // Now that we have handled a possible round-up we can go ahead and zero out + // all the nibbles of the mantissa that we won't need. + if (precision_specified) { + *mantissa &= ~MaskUpToNibbleExclusive<Int>(final_nibble_displayed); + } +} + +template <typename Int> +void FormatANormalize(const HexFloatTypeParams float_traits, uint8_t *leading, + Int *mantissa, int *exp) { + constexpr int kIntBits = sizeof(Int) * 8; + static const Int kHighIntBit = Int{1} << (kIntBits - 1); + const int kLeadDigitBitsCount = float_traits.leading_digit_size_bits; + // Normalize mantissa so that highest bit set is in MSB position, unless we + // get interrupted by the exponent threshold. + while (*mantissa && !(*mantissa & kHighIntBit)) { + if (ABSL_PREDICT_FALSE(*exp - 1 < float_traits.min_exponent)) { + *mantissa >>= (float_traits.min_exponent - *exp); + *exp = float_traits.min_exponent; + return; + } + *mantissa <<= 1; + --*exp; + } + // Extract bits for leading digit then shift them away leaving the + // fractional part. + *leading = + static_cast<uint8_t>(*mantissa >> (kIntBits - kLeadDigitBitsCount)); + *exp -= (*mantissa != 0) ? kLeadDigitBitsCount : *exp; + *mantissa <<= kLeadDigitBitsCount; +} + +template <typename Int> +void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp, + bool uppercase, const FormatState &state) { + // Int properties. + constexpr int kIntBits = sizeof(Int) * 8; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Did the user specify a precision explicitly? + const bool precision_specified = state.conv.precision() >= 0; + + // ========== Normalize/Denormalize ========== + exp += kIntBits; // make all digits fractional digits. + // This holds the (up to four) bits of leading digit, i.e., the '1' in the + // number 0x1.e6fp+2. It's always > 0 unless number is zero or denormal. + uint8_t leading = 0; + FormatANormalize(float_traits, &leading, &mantissa, &exp); + + // =============== Rounding ================== + // Check if we need to round; if so, then we do that by manipulating + // (incrementing) the mantissa before beginning to print characters. + FormatARound(precision_specified, state, &leading, &mantissa, &exp); + + // ============= Format Result =============== + // This buffer holds the "0x1.ab1de3" portion of "0x1.ab1de3pe+2". Compute the + // size with long double which is the largest of the floats. + constexpr size_t kBufSizeForHexFloatRepr = + 2 // 0x + + std::numeric_limits<MaxFloatType>::digits / 4 // number of hex digits + + 1 // round up + + 1; // "." (dot) + char digits_buffer[kBufSizeForHexFloatRepr]; + char *digits_iter = digits_buffer; + const char *const digits = + static_cast<const char *>("0123456789ABCDEF0123456789abcdef") + + (uppercase ? 0 : 16); + + // =============== Hex Prefix ================ + *digits_iter++ = '0'; + *digits_iter++ = uppercase ? 'X' : 'x'; + + // ========== Non-Fractional Digit =========== + *digits_iter++ = digits[leading]; + + // ================== Dot ==================== + // There are three reasons we might need a dot. Keep in mind that, at this + // point, the mantissa holds only the fractional part. + if ((precision_specified && state.precision > 0) || + (!precision_specified && mantissa > 0) || state.conv.has_alt_flag()) { + *digits_iter++ = '.'; + } + + // ============ Fractional Digits ============ + int digits_emitted = 0; + while (mantissa > 0) { + *digits_iter++ = digits[GetNibble(mantissa, kTotalNibbles - 1)]; + mantissa <<= 4; + ++digits_emitted; + } + int trailing_zeros = + precision_specified ? state.precision - digits_emitted : 0; + assert(trailing_zeros >= 0); + auto digits_result = string_view(digits_buffer, digits_iter - digits_buffer); + + // =============== Exponent ================== + constexpr size_t kBufSizeForExpDecRepr = + numbers_internal::kFastToBufferSize // requred for FastIntToBuffer + + 1 // 'p' or 'P' + + 1; // '+' or '-' + char exp_buffer[kBufSizeForExpDecRepr]; + exp_buffer[0] = uppercase ? 'P' : 'p'; + exp_buffer[1] = exp >= 0 ? '+' : '-'; + numbers_internal::FastIntToBuffer(exp < 0 ? -exp : exp, exp_buffer + 2); + + // ============ Assemble Result ============== + FinalPrint(state, // + digits_result, // 0xN.NNN... + 2, // offset in `data` to start padding if needed. + trailing_zeros, // num remaining mantissa padding zeros + exp_buffer); // exponent +} + +char *CopyStringTo(absl::string_view v, char *out) { std::memcpy(out, v.data(), v.size()); return out + v.size(); } template <typename Float> -bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, +bool FallbackToSnprintf(const Float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { int w = conv.width() >= 0 ? conv.width() : 0; int p = conv.precision() >= 0 ? conv.precision() : -1; @@ -33,17 +951,17 @@ bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, if (std::is_same<long double, Float>()) { *fp++ = 'L'; } - *fp++ = FormatConversionCharToChar(conv.conv()); + *fp++ = FormatConversionCharToChar(conv.conversion_char()); *fp = 0; assert(fp < fmt + sizeof(fmt)); } std::string space(512, '\0'); - string_view result; + absl::string_view result; while (true) { int n = snprintf(&space[0], space.size(), fmt, w, p, v); if (n < 0) return false; if (static_cast<size_t>(n) < space.size()) { - result = string_view(space.data(), n); + result = absl::string_view(space.data(), n); break; } space.resize(n + 1); @@ -96,21 +1014,24 @@ enum class FormatStyle { Fixed, Precision }; // Otherwise, return false. template <typename Float> bool ConvertNonNumericFloats(char sign_char, Float v, - const ConversionSpec &conv, FormatSinkImpl *sink) { + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { char text[4], *ptr = text; - if (sign_char) *ptr++ = sign_char; + if (sign_char != '\0') *ptr++ = sign_char; if (std::isnan(v)) { - ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "NAN" : "nan", - 3, ptr); + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "NAN" : "nan", 3, + ptr); } else if (std::isinf(v)) { - ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "INF" : "inf", - 3, ptr); + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "INF" : "inf", 3, + ptr); } else { return false; } return sink->PutPaddedString(string_view(text, ptr - text), conv.width(), -1, - conv.flags().left); + conv.has_left_flag()); } // Round up the last digit of the value. @@ -170,7 +1091,12 @@ constexpr bool CanFitMantissa() { template <typename Float> struct Decomposed { - Float mantissa; + using MantissaType = + absl::conditional_t<std::is_same<long double, Float>::value, uint128, + uint64_t>; + static_assert(std::numeric_limits<Float>::digits <= sizeof(MantissaType) * 8, + ""); + MantissaType mantissa; int exponent; }; @@ -181,7 +1107,8 @@ Decomposed<Float> Decompose(Float v) { Float m = std::frexp(v, &exp); m = std::ldexp(m, std::numeric_limits<Float>::digits); exp -= std::numeric_limits<Float>::digits; - return {m, exp}; + + return {static_cast<typename Decomposed<Float>::MantissaType>(m), exp}; } // Print 'digits' as decimal. @@ -350,31 +1277,32 @@ bool FloatToBuffer(Decomposed<Float> decomposed, int precision, Buffer *out, return false; } -void WriteBufferToSink(char sign_char, string_view str, - const ConversionSpec &conv, FormatSinkImpl *sink) { +void WriteBufferToSink(char sign_char, absl::string_view str, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { int left_spaces = 0, zeros = 0, right_spaces = 0; int missing_chars = conv.width() >= 0 ? std::max(conv.width() - static_cast<int>(str.size()) - static_cast<int>(sign_char != 0), 0) : 0; - if (conv.flags().left) { + if (conv.has_left_flag()) { right_spaces = missing_chars; - } else if (conv.flags().zero) { + } else if (conv.has_zero_flag()) { zeros = missing_chars; } else { left_spaces = missing_chars; } sink->Append(left_spaces, ' '); - if (sign_char) sink->Append(1, sign_char); + if (sign_char != '\0') sink->Append(1, sign_char); sink->Append(zeros, '0'); sink->Append(str); sink->Append(right_spaces, ' '); } template <typename Float> -bool FloatToSink(const Float v, const ConversionSpec &conv, +bool FloatToSink(const Float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { // Print the sign or the sign column. Float abs_v = v; @@ -382,9 +1310,9 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, if (std::signbit(abs_v)) { sign_char = '-'; abs_v = -abs_v; - } else if (conv.flags().show_pos) { + } else if (conv.has_show_pos_flag()) { sign_char = '+'; - } else if (conv.flags().sign_col) { + } else if (conv.has_sign_col_flag()) { sign_char = ' '; } @@ -401,89 +1329,91 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, Buffer buffer; - switch (conv.conv()) { - case ConversionChar::f: - case ConversionChar::F: - if (!FloatToBuffer<FormatStyle::Fixed>(decomposed, precision, &buffer, - nullptr)) { - return FallbackToSnprintf(v, conv, sink); - } - if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - break; - - case ConversionChar::e: - case ConversionChar::E: - if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, - &exp)) { - return FallbackToSnprintf(v, conv, sink); - } - if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', - &buffer); - break; - - case ConversionChar::g: - case ConversionChar::G: - precision = std::max(0, precision - 1); - if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, - &exp)) { - return FallbackToSnprintf(v, conv, sink); - } - if (precision + 1 > exp && exp >= -4) { - if (exp < 0) { - // Have 1.23456, needs 0.00123456 - // Move the first digit - buffer.begin[1] = *buffer.begin; - // Add some zeros - for (; exp < -1; ++exp) *buffer.begin-- = '0'; - *buffer.begin-- = '.'; - *buffer.begin = '0'; - } else if (exp > 0) { - // Have 1.23456, needs 1234.56 - // Move the '.' exp positions to the right. - std::rotate(buffer.begin + 1, buffer.begin + 2, - buffer.begin + exp + 2); - } - exp = 0; - } - if (!conv.flags().alt) { - while (buffer.back() == '0') buffer.pop_back(); - if (buffer.back() == '.') buffer.pop_back(); - } - if (exp) { - PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', - &buffer); - } - break; + FormatConversionChar c = conv.conversion_char(); - case ConversionChar::a: - case ConversionChar::A: + if (c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::F) { + FormatF(decomposed.mantissa, decomposed.exponent, + {sign_char, precision, conv, sink}); + return true; + } else if (c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::E) { + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { return FallbackToSnprintf(v, conv, sink); - - default: - return false; + } + if (!conv.has_alt_flag() && buffer.back() == '.') buffer.pop_back(); + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } else if (c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::G) { + precision = std::max(0, precision - 1); + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (precision + 1 > exp && exp >= -4) { + if (exp < 0) { + // Have 1.23456, needs 0.00123456 + // Move the first digit + buffer.begin[1] = *buffer.begin; + // Add some zeros + for (; exp < -1; ++exp) *buffer.begin-- = '0'; + *buffer.begin-- = '.'; + *buffer.begin = '0'; + } else if (exp > 0) { + // Have 1.23456, needs 1234.56 + // Move the '.' exp positions to the right. + std::rotate(buffer.begin + 1, buffer.begin + 2, buffer.begin + exp + 2); + } + exp = 0; + } + if (!conv.has_alt_flag()) { + while (buffer.back() == '0') buffer.pop_back(); + if (buffer.back() == '.') buffer.pop_back(); + } + if (exp) { + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } + } else if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::A) { + bool uppercase = (c == FormatConversionCharInternal::A); + FormatA(HexFloatTypeParams(Float{}), decomposed.mantissa, + decomposed.exponent, uppercase, {sign_char, precision, conv, sink}); + return true; + } else { + return false; } WriteBufferToSink(sign_char, - string_view(buffer.begin, buffer.end - buffer.begin), conv, - sink); + absl::string_view(buffer.begin, buffer.end - buffer.begin), + conv, sink); return true; } } // namespace -bool ConvertFloatImpl(long double v, const ConversionSpec &conv, +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { + if (IsDoubleDouble()) { + // This is the `double-double` representation of `long double`. We do not + // handle it natively. Fallback to snprintf. + return FallbackToSnprintf(v, conv, sink); + } + return FloatToSink(v, conv, sink); } -bool ConvertFloatImpl(float v, const ConversionSpec &conv, +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { - return FloatToSink(v, conv, sink); + return FloatToSink(static_cast<double>(v), conv, sink); } -bool ConvertFloatImpl(double v, const ConversionSpec &conv, +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { return FloatToSink(v, conv, sink); } diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.h b/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.h index 49a6a63630..71100e7142 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ @@ -7,13 +21,13 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -bool ConvertFloatImpl(float v, const ConversionSpec &conv, +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); -bool ConvertFloatImpl(double v, const ConversionSpec &conv, +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); -bool ConvertFloatImpl(long double v, const ConversionSpec &conv, +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); } // namespace str_format_internal diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/output.h b/third_party/abseil-cpp/absl/strings/internal/str_format/output.h index 28b288b7dd..8030dae00f 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/output.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/output.h @@ -30,9 +30,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN - -class Cord; - namespace str_format_internal { // RawSink implementation that writes into a char* buffer. @@ -77,12 +74,6 @@ inline void AbslFormatFlush(std::ostream* out, string_view s) { out->write(s.data(), s.size()); } -template <class AbslCord, typename = typename std::enable_if< - std::is_same<AbslCord, absl::Cord>::value>::type> -inline void AbslFormatFlush(AbslCord* out, string_view s) { - out->Append(s); -} - inline void AbslFormatFlush(FILERawSink* sink, string_view v) { sink->Write(v); } @@ -91,10 +82,11 @@ inline void AbslFormatFlush(BufferRawSink* sink, string_view v) { sink->Write(v); } +// This is a SFINAE to get a better compiler error message when the type +// is not supported. template <typename T> -auto InvokeFlush(T* out, string_view s) - -> decltype(str_format_internal::AbslFormatFlush(out, s)) { - str_format_internal::AbslFormatFlush(out, s); +auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) { + AbslFormatFlush(out, s); } } // namespace str_format_internal diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/output_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/output_test.cc index e54e6f70a5..ce2e91a0bb 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/output_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/output_test.cc @@ -19,6 +19,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/strings/cord.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -37,6 +38,12 @@ TEST(InvokeFlush, Stream) { EXPECT_EQ(str.str(), "ABCDEF"); } +TEST(InvokeFlush, Cord) { + absl::Cord str("ABC"); + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str, "ABCDEF"); +} + TEST(BufferRawSink, Limits) { char buf[16]; { @@ -70,4 +77,3 @@ TEST(BufferRawSink, Limits) { } // namespace ABSL_NAMESPACE_END } // namespace absl - diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/parser.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/parser.cc index aab68db94b..2c9c07dacc 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/parser.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/parser.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/parser.h" #include <assert.h> @@ -17,63 +31,70 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = ConversionChar; +using CC = FormatConversionCharInternal; using LM = LengthMod; +// Abbreviations to fit in the table below. +constexpr auto f_sign = Flags::kSignCol; +constexpr auto f_alt = Flags::kAlt; +constexpr auto f_pos = Flags::kShowPos; +constexpr auto f_left = Flags::kLeft; +constexpr auto f_zero = Flags::kZero; + ABSL_CONST_INIT const ConvTag kTags[256] = { - {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 - {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f - {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 - {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f - {}, {}, {}, {}, {}, {}, {}, {}, // 20-27 - {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f - {}, {}, {}, {}, {}, {}, {}, {}, // 30-37 - {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f - {}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG - {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO - {}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW - CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ - {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg - LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno - CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw - CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! - {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 - {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f - {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 - {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f - {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 - {}, {}, {}, {}, {}, {}, {}, {}, // a8-af - {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 - {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf - {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 - {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf - {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 - {}, {}, {}, {}, {}, {}, {}, {}, // d8-df - {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 - {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef - {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 - {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&' + {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./ + f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567 + {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff }; namespace { bool CheckFastPathSetting(const UnboundConversion& conv) { - bool should_be_basic = !conv.flags.left && // - !conv.flags.show_pos && // - !conv.flags.sign_col && // - !conv.flags.alt && // - !conv.flags.zero && // - (conv.width.value() == -1) && - (conv.precision.value() == -1); - if (should_be_basic != conv.flags.basic) { + bool width_precision_needed = + conv.width.value() >= 0 || conv.precision.value() >= 0; + if (width_precision_needed && conv.flags == Flags::kBasic) { fprintf(stderr, "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " "width=%d precision=%d\n", - conv.flags.basic, conv.flags.left, conv.flags.show_pos, - conv.flags.sign_col, conv.flags.alt, conv.flags.zero, - conv.width.value(), conv.precision.value()); + conv.flags == Flags::kBasic ? 1 : 0, + FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, + FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, + FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, + FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, + FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), + conv.precision.value()); + return false; } - return should_be_basic == conv.flags.basic; + return true; } template <bool is_positional> @@ -117,40 +138,21 @@ const char *ConsumeConversion(const char *pos, const char *const end, ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); // We should start with the basic flag on. - assert(conv->flags.basic); + assert(conv->flags == Flags::kBasic); // Any non alpha character makes this conversion not basic. // This includes flags (-+ #0), width (1-9, *) or precision (.). // All conversion characters and length modifiers are alpha characters. if (c < 'A') { - conv->flags.basic = false; - - for (; c <= '0';) { - // FIXME: We might be able to speed this up reusing the lookup table from - // above. It might require changing Flags to be a plain integer where we - // can |= a value. - switch (c) { - case '-': - conv->flags.left = true; - break; - case '+': - conv->flags.show_pos = true; - break; - case ' ': - conv->flags.sign_col = true; - break; - case '#': - conv->flags.alt = true; - break; - case '0': - conv->flags.zero = true; - break; - default: - goto flags_done; + while (c <= '0') { + auto tag = GetTagForChar(c); + if (tag.is_flags()) { + conv->flags = conv->flags | tag.as_flags(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + break; } - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); } -flags_done: if (c <= '9') { if (c >= '0') { @@ -159,12 +161,12 @@ flags_done: if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; // Positional conversion. *next_arg = -1; - conv->flags = Flags(); - conv->flags.basic = true; return ConsumeConversion<true>(original_pos, end, conv, next_arg); } + conv->flags = conv->flags | Flags::kNonBasic; conv->width.set_value(maybe_width); } else if (c == '*') { + conv->flags = conv->flags | Flags::kNonBasic; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (is_positional) { if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; @@ -178,6 +180,7 @@ flags_done: } if (c == '.') { + conv->flags = conv->flags | Flags::kNonBasic; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (std::isdigit(c)) { conv->precision.set_value(parse_digits()); @@ -296,15 +299,17 @@ struct ParsedFormatBase::ParsedFormatConsumer { char* data_pos; }; -ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored, - std::initializer_list<Conv> convs) +ParsedFormatBase::ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) : data_(format.empty() ? nullptr : new char[format.size()]) { has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) || !MatchesConversions(allow_ignored, convs); } bool ParsedFormatBase::MatchesConversions( - bool allow_ignored, std::initializer_list<Conv> convs) const { + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const { std::unordered_set<int> used; auto add_if_valid_conv = [&](int pos, char c) { if (static_cast<size_t>(pos) > convs.size() || diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/parser.h b/third_party/abseil-cpp/absl/strings/internal/str_format/parser.h index 45c90d1df0..ad8646edff 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/parser.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/parser.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ @@ -27,10 +41,7 @@ std::string LengthModToString(LengthMod v); // The analyzed properties of a single specified conversion. struct UnboundConversion { - UnboundConversion() - : flags() /* This is required to zero all the fields of flags. */ { - flags.basic = true; - } + UnboundConversion() {} class InputValue { public: @@ -65,9 +76,9 @@ struct UnboundConversion { InputValue width; InputValue precision; - Flags flags; + Flags flags = Flags::kBasic; LengthMod length_mod = LengthMod::none; - ConversionChar conv = FormatConversionChar::kNone; + FormatConversionChar conv = FormatConversionCharInternal::kNone; }; // Consume conversion spec prefix (not including '%') of [p, end) if valid. @@ -79,32 +90,43 @@ const char* ConsumeUnboundConversion(const char* p, const char* end, UnboundConversion* conv, int* next_arg); // Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod` checking and +// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and // conversions. class ConvTag { public: - constexpr ConvTag(ConversionChar conversion_char) // NOLINT - : tag_(static_cast<int8_t>(conversion_char)) {} - // We invert the length modifiers to make them negative so that we can easily - // test for them. + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT + : tag_(static_cast<uint8_t>(conversion_char)) {} constexpr ConvTag(LengthMod length_mod) // NOLINT - : tag_(~static_cast<std::int8_t>(length_mod)) {} - // Everything else is -128, which is negative to make is_conv() simpler. - constexpr ConvTag() : tag_(-128) {} + : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} + constexpr ConvTag(Flags flags) // NOLINT + : tag_(0xc0 | static_cast<uint8_t>(flags)) {} + constexpr ConvTag() : tag_(0xFF) {} + + bool is_conv() const { return (tag_ & 0x80) == 0; } + bool is_length() const { return (tag_ & 0xC0) == 0x80; } + bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } - bool is_conv() const { return tag_ >= 0; } - bool is_length() const { return tag_ < 0 && tag_ != -128; } - ConversionChar as_conv() const { + FormatConversionChar as_conv() const { assert(is_conv()); - return static_cast<ConversionChar>(tag_); + assert(!is_length()); + assert(!is_flags()); + return static_cast<FormatConversionChar>(tag_); } LengthMod as_length() const { + assert(!is_conv()); assert(is_length()); - return static_cast<LengthMod>(~tag_); + assert(!is_flags()); + return static_cast<LengthMod>(tag_ & 0x3F); + } + Flags as_flags() const { + assert(!is_conv()); + assert(!is_length()); + assert(is_flags()); + return static_cast<Flags>(tag_ & 0x1F); } private: - std::int8_t tag_; + uint8_t tag_; }; extern const ConvTag kTags[256]; @@ -143,7 +165,7 @@ bool ParseFormatString(string_view src, Consumer consumer) { auto tag = GetTagForChar(percent[1]); if (tag.is_conv()) { if (ABSL_PREDICT_FALSE(next_arg < 0)) { - // This indicates an error in the format std::string. + // This indicates an error in the format string. // The only way to get `next_arg < 0` here is to have a positional // argument first which sets next_arg to -1 and then a non-positional // argument. @@ -186,8 +208,9 @@ constexpr bool EnsureConstexpr(string_view s) { class ParsedFormatBase { public: - explicit ParsedFormatBase(string_view format, bool allow_ignored, - std::initializer_list<Conv> convs); + explicit ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs); ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } @@ -234,8 +257,9 @@ class ParsedFormatBase { private: // Returns whether the conversions match and if !allow_ignored it verifies // that all conversions are used by the format. - bool MatchesConversions(bool allow_ignored, - std::initializer_list<Conv> convs) const; + bool MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const; struct ParsedFormatConsumer; @@ -280,14 +304,14 @@ class ParsedFormatBase { // This is the only API that allows the user to pass a runtime specified format // string. These factory functions will return NULL if the format does not match // the conversions requested by the user. -template <str_format_internal::Conv... C> +template <FormatConversionCharSet... C> class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { public: explicit ExtendedParsedFormat(string_view format) #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER __attribute__(( enable_if(str_format_internal::EnsureConstexpr(format), - "Format std::string is not constexpr."), + "Format string is not constexpr."), enable_if(str_format_internal::ValidFormatImpl<C...>(format), "Format specified does not match the template arguments."))) #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc index 1b1ee030f1..fe0d296360 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/parser.h" #include <string.h> @@ -41,23 +55,23 @@ TEST(LengthModTest, Names) { TEST(ConversionCharTest, Names) { struct Expectation { - ConversionChar id; + FormatConversionChar id; char name; }; // clang-format off const Expectation kExpect[] = { -#define X(c) {ConversionChar::c, #c[0]} - X(c), X(C), X(s), X(S), // text +#define X(c) {FormatConversionCharInternal::c, #c[0]} + X(c), X(s), // text X(d), X(i), X(o), X(u), X(x), X(X), // int X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float X(n), X(p), // misc #undef X - {ConversionChar::none, '\0'}, + {FormatConversionCharInternal::kNone, '\0'}, }; // clang-format on for (auto e : kExpect) { SCOPED_TRACE(e.name); - ConversionChar v = e.id; + FormatConversionChar v = e.id; EXPECT_EQ(e.name, FormatConversionCharToChar(v)); } } @@ -256,15 +270,22 @@ TEST_F(ConsumeUnboundConversionTest, Flags) { for (int k = 0; k < kNumFlags; ++k) if ((i >> k) & 1) fmt += kAllFlags[k]; // flag order shouldn't matter - if (rev == 1) { std::reverse(fmt.begin(), fmt.end()); } + if (rev == 1) { + std::reverse(fmt.begin(), fmt.end()); + } fmt += 'd'; SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt.c_str())); - EXPECT_EQ(fmt.find('-') == std::string::npos, !o.flags.left); - EXPECT_EQ(fmt.find('+') == std::string::npos, !o.flags.show_pos); - EXPECT_EQ(fmt.find(' ') == std::string::npos, !o.flags.sign_col); - EXPECT_EQ(fmt.find('#') == std::string::npos, !o.flags.alt); - EXPECT_EQ(fmt.find('0') == std::string::npos, !o.flags.zero); + EXPECT_EQ(fmt.find('-') == std::string::npos, + !FlagsContains(o.flags, Flags::kLeft)); + EXPECT_EQ(fmt.find('+') == std::string::npos, + !FlagsContains(o.flags, Flags::kShowPos)); + EXPECT_EQ(fmt.find(' ') == std::string::npos, + !FlagsContains(o.flags, Flags::kSignCol)); + EXPECT_EQ(fmt.find('#') == std::string::npos, + !FlagsContains(o.flags, Flags::kAlt)); + EXPECT_EQ(fmt.find('0') == std::string::npos, + !FlagsContains(o.flags, Flags::kZero)); } } } @@ -274,14 +295,14 @@ TEST_F(ConsumeUnboundConversionTest, BasicFlag) { for (const char* fmt : {"d", "llx", "G", "1$X"}) { SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt)); - EXPECT_TRUE(o.flags.basic); + EXPECT_EQ(o.flags, Flags::kBasic); } // Flag is off for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) { SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt)); - EXPECT_FALSE(o.flags.basic); + EXPECT_NE(o.flags, Flags::kBasic); } } @@ -349,7 +370,8 @@ TEST_F(ParsedFormatTest, ValueSemantics) { ParsedFormatBase p2 = p1; // copy construct (empty) EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); - p1 = ParsedFormatBase("hello%s", true, {Conv::s}); // move assign + p1 = ParsedFormatBase("hello%s", true, + {FormatConversionCharSetInternal::s}); // move assign EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1)); ParsedFormatBase p3 = p1; // copy construct (nonempty) @@ -367,7 +389,7 @@ TEST_F(ParsedFormatTest, ValueSemantics) { struct ExpectParse { const char* in; - std::initializer_list<Conv> conv_set; + std::initializer_list<FormatConversionCharSet> conv_set; const char* out; }; @@ -377,9 +399,9 @@ TEST_F(ParsedFormatTest, Parsing) { const ExpectParse kExpect[] = { {"", {}, ""}, {"ab", {}, "[ab]"}, - {"a%d", {Conv::d}, "[a]{d:1$d}"}, - {"a%+d", {Conv::d}, "[a]{+d:1$d}"}, - {"a% d", {Conv::d}, "[a]{ d:1$d}"}, + {"a%d", {FormatConversionCharSetInternal::d}, "[a]{d:1$d}"}, + {"a%+d", {FormatConversionCharSetInternal::d}, "[a]{+d:1$d}"}, + {"a% d", {FormatConversionCharSetInternal::d}, "[a]{ d:1$d}"}, {"a%b %d", {}, "[a]!"}, // stop after error }; for (const auto& e : kExpect) { @@ -391,13 +413,13 @@ TEST_F(ParsedFormatTest, Parsing) { TEST_F(ParsedFormatTest, ParsingFlagOrder) { const ExpectParse kExpect[] = { - {"a%+ 0d", {Conv::d}, "[a]{+ 0d:1$d}"}, - {"a%+0 d", {Conv::d}, "[a]{+0 d:1$d}"}, - {"a%0+ d", {Conv::d}, "[a]{0+ d:1$d}"}, - {"a% +0d", {Conv::d}, "[a]{ +0d:1$d}"}, - {"a%0 +d", {Conv::d}, "[a]{0 +d:1$d}"}, - {"a% 0+d", {Conv::d}, "[a]{ 0+d:1$d}"}, - {"a%+ 0+d", {Conv::d}, "[a]{+ 0+d:1$d}"}, + {"a%+ 0d", {FormatConversionCharSetInternal::d}, "[a]{+ 0d:1$d}"}, + {"a%+0 d", {FormatConversionCharSetInternal::d}, "[a]{+0 d:1$d}"}, + {"a%0+ d", {FormatConversionCharSetInternal::d}, "[a]{0+ d:1$d}"}, + {"a% +0d", {FormatConversionCharSetInternal::d}, "[a]{ +0d:1$d}"}, + {"a%0 +d", {FormatConversionCharSetInternal::d}, "[a]{0 +d:1$d}"}, + {"a% 0+d", {FormatConversionCharSetInternal::d}, "[a]{ 0+d:1$d}"}, + {"a%+ 0+d", {FormatConversionCharSetInternal::d}, "[a]{+ 0+d:1$d}"}, }; for (const auto& e : kExpect) { SCOPED_TRACE(e.in); diff --git a/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h b/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h index b54f6ebe09..e766421617 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h @@ -32,7 +32,7 @@ #include <array> #include <initializer_list> #include <iterator> -#include <map> +#include <tuple> #include <type_traits> #include <utility> #include <vector> @@ -51,9 +51,9 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { // This class is implicitly constructible from everything that absl::string_view -// is implicitly constructible from. If it's constructed from a temporary -// string, the data is moved into a data member so its lifetime matches that of -// the ConvertibleToStringView instance. +// is implicitly constructible from, except for rvalue strings. This means it +// can be used as a function parameter in places where passing a temporary +// string might cause memory lifetime issues. class ConvertibleToStringView { public: ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) @@ -64,42 +64,13 @@ class ConvertibleToStringView { ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) : value_(s) {} - // Matches rvalue strings and moves their data to a member. -ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) - : copy_(std::move(s)), value_(copy_) {} - - ConvertibleToStringView(const ConvertibleToStringView& other) - : copy_(other.copy_), - value_(other.IsSelfReferential() ? copy_ : other.value_) {} - - ConvertibleToStringView(ConvertibleToStringView&& other) { - StealMembers(std::move(other)); - } - - ConvertibleToStringView& operator=(ConvertibleToStringView other) { - StealMembers(std::move(other)); - return *this; - } + // Disable conversion from rvalue strings. + ConvertibleToStringView(std::string&& s) = delete; + ConvertibleToStringView(const std::string&& s) = delete; absl::string_view value() const { return value_; } private: - // Returns true if ctsp's value refers to its internal copy_ member. - bool IsSelfReferential() const { return value_.data() == copy_.data(); } - - void StealMembers(ConvertibleToStringView&& other) { - if (other.IsSelfReferential()) { - copy_ = std::move(other.copy_); - value_ = copy_; - other.value_ = other.copy_; - } else { - value_ = other.value_; - } - } - - // Holds the data moved from temporary std::string arguments. Declared first - // so that 'value' can refer to 'copy_'. - std::string copy_; absl::string_view value_; }; @@ -211,6 +182,13 @@ template <typename T> struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> : std::true_type {}; +// HasEmplace<T>::value is true iff there exists a method T::emplace(). +template <typename T, typename = void> +struct HasEmplace : std::false_type {}; +template <typename T> +struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>> + : std::true_type {}; + // IsInitializerList<T>::value is true iff T is an std::initializer_list. More // details below in Splitter<> where this is used. std::false_type IsInitializerListDispatch(...); // default: No @@ -273,7 +251,11 @@ struct SplitterIsConvertibleTo // the split strings: only strings for which the predicate returns true will be // kept. A Predicate object is any unary functor that takes an absl::string_view // and returns bool. -template <typename Delimiter, typename Predicate> +// +// The StringType parameter can be either string_view or string, depending on +// whether the Splitter refers to a string stored elsewhere, or if the string +// resides inside the Splitter itself. +template <typename Delimiter, typename Predicate, typename StringType> class Splitter { public: using DelimiterType = Delimiter; @@ -281,12 +263,12 @@ class Splitter { using const_iterator = strings_internal::SplitIterator<Splitter>; using value_type = typename std::iterator_traits<const_iterator>::value_type; - Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p) + Splitter(StringType input_text, Delimiter d, Predicate p) : text_(std::move(input_text)), delimiter_(std::move(d)), predicate_(std::move(p)) {} - absl::string_view text() const { return text_.value(); } + absl::string_view text() const { return text_; } const Delimiter& delimiter() const { return delimiter_; } const Predicate& predicate() const { return predicate_; } @@ -336,7 +318,7 @@ class Splitter { Container operator()(const Splitter& splitter) const { Container c; auto it = std::inserter(c, c.end()); - for (const auto sp : splitter) { + for (const auto& sp : splitter) { *it++ = ValueType(sp); } return c; @@ -397,53 +379,46 @@ class Splitter { // value. template <typename Container, typename First, typename Second> struct ConvertToContainer<Container, std::pair<const First, Second>, true> { + using iterator = typename Container::iterator; + Container operator()(const Splitter& splitter) const { Container m; - typename Container::iterator it; + iterator it; bool insert = true; - for (const auto sp : splitter) { + for (const absl::string_view sv : splitter) { if (insert) { - it = Inserter<Container>::Insert(&m, First(sp), Second()); + it = InsertOrEmplace(&m, sv); } else { - it->second = Second(sp); + it->second = Second(sv); } insert = !insert; } return m; } - // Inserts the key and value into the given map, returning an iterator to - // the inserted item. Specialized for std::map and std::multimap to use - // emplace() and adapt emplace()'s return value. - template <typename Map> - struct Inserter { - using M = Map; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->insert(std::make_pair(std::forward<Args>(args)...)).first; - } - }; - - template <typename... Ts> - struct Inserter<std::map<Ts...>> { - using M = std::map<Ts...>; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; - } - }; - - template <typename... Ts> - struct Inserter<std::multimap<Ts...>> { - using M = std::multimap<Ts...>; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->emplace(std::make_pair(std::forward<Args>(args)...)); - } - }; + // Inserts the key and an empty value into the map, returning an iterator to + // the inserted item. We use emplace() if available, otherwise insert(). + template <typename M> + static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace( + M* m, absl::string_view key) { + // Use piecewise_construct to support old versions of gcc in which pair + // constructor can't otherwise construct string from string_view. + return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key), + std::tuple<>())); + } + template <typename M> + static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace( + M* m, absl::string_view key) { + return ToIter(m->insert(std::make_pair(First(key), Second("")))); + } + + static iterator ToIter(std::pair<iterator, bool> pair) { + return pair.first; + } + static iterator ToIter(iterator iter) { return iter; } }; - ConvertibleToStringView text_; + StringType text_; Delimiter delimiter_; Predicate predicate_; }; diff --git a/third_party/abseil-cpp/absl/strings/internal/string_constant.h b/third_party/abseil-cpp/absl/strings/internal/string_constant.h new file mode 100644 index 0000000000..a11336b7f0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/string_constant.h @@ -0,0 +1,64 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ +#define ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ + +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// StringConstant<T> represents a compile time string constant. +// It can be accessed via its `absl::string_view value` static member. +// It is guaranteed that the `string_view` returned has constant `.data()`, +// constant `.size()` and constant `value[i]` for all `0 <= i < .size()` +// +// The `T` is an opaque type. It is guaranteed that different string constants +// will have different values of `T`. This allows users to associate the string +// constant with other static state at compile time. +// +// Instances should be made using the `MakeStringConstant()` factory function +// below. +template <typename T> +struct StringConstant { + static constexpr absl::string_view value = T{}(); + constexpr absl::string_view operator()() const { return value; } + + // Check to be sure `view` points to constant data. + // Otherwise, it can't be constant evaluated. + static_assert(value.empty() || 2 * value[0] != 1, + "The input string_view must point to constant data."); +}; + +template <typename T> +constexpr absl::string_view StringConstant<T>::value; // NOLINT + +// Factory function for `StringConstant` instances. +// It supports callables that have a constexpr default constructor and a +// constexpr operator(). +// It must return an `absl::string_view` or `const char*` pointing to constant +// data. This is validated at compile time. +template <typename T> +constexpr StringConstant<T> MakeStringConstant(T) { + return {}; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/string_constant_test.cc b/third_party/abseil-cpp/absl/strings/internal/string_constant_test.cc new file mode 100644 index 0000000000..392833cf15 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/string_constant_test.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/string_constant.h" + +#include "absl/meta/type_traits.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +using absl::strings_internal::MakeStringConstant; + +struct Callable { + constexpr absl::string_view operator()() const { + return absl::string_view("Callable", 8); + } +}; + +TEST(StringConstant, Traits) { + constexpr auto str = MakeStringConstant(Callable{}); + using T = decltype(str); + + EXPECT_TRUE(std::is_empty<T>::value); + EXPECT_TRUE(std::is_trivial<T>::value); + EXPECT_TRUE(absl::is_trivially_default_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_copy_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_move_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_destructible<T>::value); +} + +TEST(StringConstant, MakeFromCallable) { + constexpr auto str = MakeStringConstant(Callable{}); + using T = decltype(str); + EXPECT_EQ(Callable{}(), T::value); + EXPECT_EQ(Callable{}(), str()); +} + +TEST(StringConstant, MakeFromStringConstant) { + // We want to make sure the StringConstant itself is a valid input to the + // factory function. + constexpr auto str = MakeStringConstant(Callable{}); + constexpr auto str2 = MakeStringConstant(str); + using T = decltype(str2); + EXPECT_EQ(Callable{}(), T::value); + EXPECT_EQ(Callable{}(), str2()); +} + +} // namespace |