aboutsummaryrefslogtreecommitdiff
path: root/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc')
-rw-r--r--third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc1128
1 files changed, 1128 insertions, 0 deletions
diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc
new file mode 100644
index 0000000000..4404f33a12
--- /dev/null
+++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc
@@ -0,0 +1,1128 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/cord_rep_btree.h"
+
+#include <cassert>
+#include <cstdint>
+#include <iostream>
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_consume.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+constexpr size_t CordRepBtree::kMaxCapacity; // NOLINT: needed for c++ < c++17
+
+namespace {
+
+using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
+using EdgeType = CordRepBtree::EdgeType;
+using OpResult = CordRepBtree::OpResult;
+using CopyResult = CordRepBtree::CopyResult;
+
+constexpr auto kFront = CordRepBtree::kFront;
+constexpr auto kBack = CordRepBtree::kBack;
+
+inline bool exhaustive_validation() {
+ return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
+}
+
+// Implementation of the various 'Dump' functions.
+// Prints the entire tree structure or 'rep'. External callers should
+// not specify 'depth' and leave it to its default (0) value.
+// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
+void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
+ int depth = 0) {
+ // Allow for full height trees + substring -> flat / external nodes.
+ assert(depth <= CordRepBtree::kMaxDepth + 2);
+ std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
+ ? std::string("Private")
+ : absl::StrCat("Shared(", rep->refcount.Get(), ")");
+ std::string sptr = absl::StrCat("0x", absl::Hex(rep));
+
+ // Dumps the data contents of `rep` if `include_contents` is true.
+ // Always emits a new line character.
+ auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
+ if (include_contents) {
+ // Allow for up to 60 wide display of content data, which with some
+ // indentation and prefix / labels keeps us within roughly 80-100 wide.
+ constexpr size_t kMaxDataLength = 60;
+ stream << ", data = \""
+ << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
+ << (r->length > kMaxDataLength ? "\"..." : "\"");
+ }
+ stream << '\n';
+ };
+
+ // For each level, we print the 'shared/private' state and the rep pointer,
+ // indented by two spaces per recursive depth.
+ stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
+
+ if (rep->IsBtree()) {
+ const CordRepBtree* node = rep->btree();
+ std::string label =
+ node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
+ stream << label << ", len = " << node->length
+ << ", begin = " << node->begin() << ", end = " << node->end()
+ << "\n";
+ for (CordRep* edge : node->Edges()) {
+ DumpAll(edge, include_contents, stream, depth + 1);
+ }
+ } else if (rep->tag == SUBSTRING) {
+ const CordRepSubstring* substring = rep->substring();
+ stream << "Substring, len = " << rep->length
+ << ", start = " << substring->start;
+ maybe_dump_data(rep);
+ DumpAll(substring->child, include_contents, stream, depth + 1);
+ } else if (rep->tag >= FLAT) {
+ stream << "Flat, len = " << rep->length
+ << ", cap = " << rep->flat()->Capacity();
+ maybe_dump_data(rep);
+ } else if (rep->tag == EXTERNAL) {
+ stream << "Extn, len = " << rep->length;
+ maybe_dump_data(rep);
+ }
+}
+
+// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
+// small data out of large reps, and general efficiency of 'always copy small
+// data'. Consider making this a cord rep internal library function.
+CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
+ assert(n != 0);
+ assert(offset + n <= rep->length);
+ assert(offset != 0 || n != rep->length);
+
+ if (rep->tag == SUBSTRING) {
+ CordRepSubstring* substring = rep->substring();
+ offset += substring->start;
+ rep = CordRep::Ref(substring->child);
+ CordRep::Unref(substring);
+ }
+ CordRepSubstring* substring = new CordRepSubstring();
+ substring->length = n;
+ substring->tag = SUBSTRING;
+ substring->start = offset;
+ substring->child = rep;
+ return substring;
+}
+
+// TODO(b/192061034): consider making this a cord rep library function.
+inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
+ if (n == rep->length) return rep;
+ if (n == 0) return CordRep::Unref(rep), nullptr;
+ return CreateSubstring(rep, offset, n);
+}
+
+// TODO(b/192061034): consider making this a cord rep library function.
+inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
+ if (offset == 0) return rep;
+ return CreateSubstring(rep, offset, rep->length - offset);
+}
+
+// Resizes `edge` to the provided `length`. Adopts a reference on `edge`.
+// This method directly returns `edge` if `length` equals `edge->length`.
+// If `is_mutable` is set to true, this function may return `edge` with
+// `edge->length` set to the new length depending on the type and size of
+// `edge`. Otherwise, this function returns a new CordRepSubstring value.
+// Requires `length > 0 && length <= edge->length`.
+CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) {
+ assert(length > 0);
+ assert(length <= edge->length);
+ assert(CordRepBtree::IsDataEdge(edge));
+ if (length >= edge->length) return edge;
+
+ if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) {
+ edge->length = length;
+ return edge;
+ }
+
+ return CreateSubstring(edge, 0, length);
+}
+
+template <EdgeType edge_type>
+inline absl::string_view Consume(absl::string_view s, size_t n) {
+ return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
+}
+
+template <EdgeType edge_type>
+inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
+ if (edge_type == kBack) {
+ memcpy(dst, s.data(), n);
+ return s.substr(n);
+ } else {
+ const size_t offset = s.size() - n;
+ memcpy(dst, s.data() + offset, n);
+ return s.substr(0, offset);
+ }
+}
+
+// Known issue / optimization weirdness: the store associated with the
+// decrement introduces traffic between cpus (even if the result of that
+// traffic does nothing), making this faster than a single call to
+// refcount.Decrement() checking the zero refcount condition.
+template <typename R, typename Fn>
+inline void FastUnref(R* r, Fn&& fn) {
+ if (r->refcount.IsOne()) {
+ fn(r);
+ } else if (!r->refcount.DecrementExpectHighRefcount()) {
+ fn(r);
+ }
+}
+
+// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
+// node, or a SUBSTRING of an EXTERNAL or FLAT node.
+void DeleteLeafEdge(CordRep* rep) {
+ for (;;) {
+ if (rep->tag >= FLAT) {
+ CordRepFlat::Delete(rep->flat());
+ return;
+ }
+ if (rep->tag == EXTERNAL) {
+ CordRepExternal::Delete(rep->external());
+ return;
+ }
+ assert(rep->tag == SUBSTRING);
+ CordRepSubstring* substring = rep->substring();
+ rep = substring->child;
+ assert(rep->tag == EXTERNAL || rep->tag >= FLAT);
+ delete substring;
+ if (rep->refcount.Decrement()) return;
+ }
+}
+
+// StackOperations contains the logic to build a left-most or right-most stack
+// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
+// propagate node changes up the stack.
+template <EdgeType edge_type>
+struct StackOperations {
+ // Returns true if the node at 'depth' is mutable, i.e. has a refcount
+ // of one, carries no CRC, and all of its parent nodes have a refcount of one.
+ inline bool owned(int depth) const { return depth < share_depth; }
+
+ // Returns the node at 'depth'.
+ inline CordRepBtree* node(int depth) const { return stack[depth]; }
+
+ // Builds a `depth` levels deep stack starting at `tree` recording which nodes
+ // are private in the form of the 'share depth' where nodes are shared.
+ inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
+ assert(depth <= tree->height());
+ int current_depth = 0;
+ while (current_depth < depth && tree->refcount.IsMutable()) {
+ stack[current_depth++] = tree;
+ tree = tree->Edge(edge_type)->btree();
+ }
+ share_depth = current_depth + (tree->refcount.IsMutable() ? 1 : 0);
+ while (current_depth < depth) {
+ stack[current_depth++] = tree;
+ tree = tree->Edge(edge_type)->btree();
+ }
+ return tree;
+ }
+
+ // Builds a stack with the invariant that all nodes are private owned / not
+ // shared and carry no CRC data. This is used in iterative updates where a
+ // previous propagation guaranteed all nodes have this property.
+ inline void BuildOwnedStack(CordRepBtree* tree, int height) {
+ assert(height <= CordRepBtree::kMaxHeight);
+ int depth = 0;
+ while (depth < height) {
+ assert(tree->refcount.IsMutable());
+ stack[depth++] = tree;
+ tree = tree->Edge(edge_type)->btree();
+ }
+ assert(tree->refcount.IsMutable());
+ share_depth = depth + 1;
+ }
+
+ // Processes the final 'top level' result action for the tree.
+ // See the 'Action' enum for the various action implications.
+ static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
+ switch (result.action) {
+ case CordRepBtree::kPopped:
+ tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree)
+ : CordRepBtree::New(result.tree, tree);
+ if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) {
+ tree = CordRepBtree::Rebuild(tree);
+ ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight,
+ "Max height exceeded");
+ }
+ return tree;
+ case CordRepBtree::kCopied:
+ CordRep::Unref(tree);
+ ABSL_FALLTHROUGH_INTENDED;
+ case CordRepBtree::kSelf:
+ return result.tree;
+ }
+ ABSL_INTERNAL_UNREACHABLE;
+ return result.tree;
+ }
+
+ // Propagate the action result in 'result' up into all nodes of the stack
+ // starting at depth 'depth'. 'length' contains the extra length of data that
+ // was added at the lowest level, and is updated into all nodes of the stack.
+ // See the 'Action' enum for the various action implications.
+ // If 'propagate' is true, then any copied node values are updated into the
+ // stack, which is used for iterative processing on the same stack.
+ template <bool propagate = false>
+ inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
+ OpResult result) {
+ // TODO(mvels): revisit the below code to check if 3 loops with 3
+ // (incremental) conditions is faster than 1 loop with a switch.
+ // Benchmarking and perf recordings indicate the loop with switch is
+ // fastest, likely because of indirect jumps on the tight case values and
+ // dense branches. But it's worth considering 3 loops, as the `action`
+ // transitions are mono directional. E.g.:
+ // while (action == kPopped) {
+ // ...
+ // }
+ // while (action == kCopied) {
+ // ...
+ // }
+ // ...
+ // We also found that an "if () do {}" loop here seems faster, possibly
+ // because it allows the branch predictor more granular heuristics on
+ // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
+ // which appear to be the most common use cases.
+ if (depth != 0) {
+ do {
+ CordRepBtree* node = stack[--depth];
+ const bool owned = depth < share_depth;
+ switch (result.action) {
+ case CordRepBtree::kPopped:
+ assert(!propagate);
+ result = node->AddEdge<edge_type>(owned, result.tree, length);
+ break;
+ case CordRepBtree::kCopied:
+ result = node->SetEdge<edge_type>(owned, result.tree, length);
+ if (propagate) stack[depth] = result.tree;
+ break;
+ case CordRepBtree::kSelf:
+ node->length += length;
+ while (depth > 0) {
+ node = stack[--depth];
+ node->length += length;
+ }
+ return node;
+ }
+ } while (depth > 0);
+ }
+ return Finalize(tree, result);
+ }
+
+ // Invokes `Unwind` with `propagate=true` to update the stack node values.
+ inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
+ OpResult result) {
+ return Unwind</*propagate=*/true>(tree, depth, length, result);
+ }
+
+ // `share_depth` contains the depth at which the nodes in the stack cannot
+ // be mutated. I.e., if the top most level is shared (i.e.:
+ // `!refcount.IsMutable()`), then `share_depth` is 0. If the 2nd node
+ // is shared (and implicitly all nodes below that) then `share_depth` is 1,
+ // etc. A `share_depth` greater than the depth of the stack indicates that
+ // none of the nodes in the stack are shared.
+ int share_depth;
+
+ NodeStack stack;
+};
+
+} // namespace
+
+void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
+ bool include_contents, std::ostream& stream) {
+ stream << "===================================\n";
+ if (!label.empty()) {
+ stream << label << '\n';
+ stream << "-----------------------------------\n";
+ }
+ if (rep) {
+ DumpAll(rep, include_contents, stream);
+ } else {
+ stream << "NULL\n";
+ }
+}
+
+void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
+ std::ostream& stream) {
+ Dump(rep, label, false, stream);
+}
+
+void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
+ Dump(rep, absl::string_view(), false, stream);
+}
+
+void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) {
+ for (CordRep* edge : tree->Edges(begin, end)) {
+ FastUnref(edge, DeleteLeafEdge);
+ }
+ Delete(tree);
+}
+
+void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
+ size_t end) {
+ for (CordRep* edge : tree->Edges(begin, end)) {
+ FastUnref(edge->btree(), Destroy);
+ }
+ Delete(tree);
+}
+
+bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
+#define NODE_CHECK_VALID(x) \
+ if (!(x)) { \
+ ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
+ return false; \
+ }
+#define NODE_CHECK_EQ(x, y) \
+ if ((x) != (y)) { \
+ ABSL_RAW_LOG(ERROR, \
+ "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
+ #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \
+ return false; \
+ }
+
+ NODE_CHECK_VALID(tree != nullptr);
+ NODE_CHECK_VALID(tree->IsBtree());
+ NODE_CHECK_VALID(tree->height() <= kMaxHeight);
+ NODE_CHECK_VALID(tree->begin() < tree->capacity());
+ NODE_CHECK_VALID(tree->end() <= tree->capacity());
+ NODE_CHECK_VALID(tree->begin() <= tree->end());
+ size_t child_length = 0;
+ for (CordRep* edge : tree->Edges()) {
+ NODE_CHECK_VALID(edge != nullptr);
+ if (tree->height() > 0) {
+ NODE_CHECK_VALID(edge->IsBtree());
+ NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
+ } else {
+ NODE_CHECK_VALID(IsDataEdge(edge));
+ }
+ child_length += edge->length;
+ }
+ NODE_CHECK_EQ(child_length, tree->length);
+ if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
+ for (CordRep* edge : tree->Edges()) {
+ if (!IsValid(edge->btree(), shallow)) return false;
+ }
+ }
+ return true;
+
+#undef NODE_CHECK_VALID
+#undef NODE_CHECK_EQ
+}
+
+#ifndef NDEBUG
+
+CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) {
+ if (!IsValid(tree, shallow)) {
+ Dump(tree, "CordRepBtree validation failed:", false, std::cout);
+ ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
+ }
+ return tree;
+}
+
+const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
+ bool shallow) {
+ if (!IsValid(tree, shallow)) {
+ Dump(tree, "CordRepBtree validation failed:", false, std::cout);
+ ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
+ }
+ return tree;
+}
+
+#endif // NDEBUG
+
+template <EdgeType edge_type>
+inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
+ if (size() >= kMaxCapacity) return {New(edge), kPopped};
+ OpResult result = ToOpResult(owned);
+ result.tree->Add<edge_type>(edge);
+ result.tree->length += delta;
+ return result;
+}
+
+template <EdgeType edge_type>
+OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
+ OpResult result;
+ const size_t idx = index(edge_type);
+ if (owned) {
+ result = {this, kSelf};
+ CordRep::Unref(edges_[idx]);
+ } else {
+ // Create a copy containing all unchanged edges. Unchanged edges are the
+ // open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
+ // We conveniently cover both case using a constexpr `shift` being 0 or 1
+ // as `end :== back + 1`.
+ result = {CopyRaw(), kCopied};
+ constexpr int shift = edge_type == kFront ? 1 : 0;
+ for (CordRep* r : Edges(begin() + shift, back() + shift)) {
+ CordRep::Ref(r);
+ }
+ }
+ result.tree->edges_[idx] = edge;
+ result.tree->length += delta;
+ return result;
+}
+
+template <EdgeType edge_type>
+CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
+ const int depth = tree->height();
+ const size_t length = rep->length;
+ StackOperations<edge_type> ops;
+ CordRepBtree* leaf = ops.BuildStack(tree, depth);
+ const OpResult result =
+ leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
+ return ops.Unwind(tree, depth, length, result);
+}
+
+template <>
+CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
+ size_t extra) {
+ CordRepBtree* leaf = CordRepBtree::New(0);
+ size_t length = 0;
+ size_t end = 0;
+ const size_t cap = leaf->capacity();
+ while (!data.empty() && end != cap) {
+ auto* flat = CordRepFlat::New(data.length() + extra);
+ flat->length = (std::min)(data.length(), flat->Capacity());
+ length += flat->length;
+ leaf->edges_[end++] = flat;
+ data = Consume<kBack>(flat->Data(), data, flat->length);
+ }
+ leaf->length = length;
+ leaf->set_end(end);
+ return leaf;
+}
+
+template <>
+CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
+ size_t extra) {
+ CordRepBtree* leaf = CordRepBtree::New(0);
+ size_t length = 0;
+ size_t begin = leaf->capacity();
+ leaf->set_end(leaf->capacity());
+ while (!data.empty() && begin != 0) {
+ auto* flat = CordRepFlat::New(data.length() + extra);
+ flat->length = (std::min)(data.length(), flat->Capacity());
+ length += flat->length;
+ leaf->edges_[--begin] = flat;
+ data = Consume<kFront>(flat->Data(), data, flat->length);
+ }
+ leaf->length = length;
+ leaf->set_begin(begin);
+ return leaf;
+}
+
+template <>
+absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
+ size_t extra) {
+ assert(!data.empty());
+ assert(size() < capacity());
+ AlignBegin();
+ const size_t cap = capacity();
+ do {
+ CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
+ const size_t n = (std::min)(data.length(), flat->Capacity());
+ flat->length = n;
+ edges_[fetch_add_end(1)] = flat;
+ data = Consume<kBack>(flat->Data(), data, n);
+ } while (!data.empty() && end() != cap);
+ return data;
+}
+
+template <>
+absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
+ size_t extra) {
+ assert(!data.empty());
+ assert(size() < capacity());
+ AlignEnd();
+ do {
+ CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
+ const size_t n = (std::min)(data.length(), flat->Capacity());
+ flat->length = n;
+ edges_[sub_fetch_begin(1)] = flat;
+ data = Consume<kFront>(flat->Data(), data, n);
+ } while (!data.empty() && begin() != 0);
+ return data;
+}
+
+template <EdgeType edge_type>
+CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
+ size_t extra) {
+ if (ABSL_PREDICT_FALSE(data.empty())) return tree;
+
+ const size_t original_data_size = data.size();
+ int depth = tree->height();
+ StackOperations<edge_type> ops;
+ CordRepBtree* leaf = ops.BuildStack(tree, depth);
+
+ // If there is capacity in the last edge, append as much data
+ // as possible into this last edge.
+ if (leaf->size() < leaf->capacity()) {
+ OpResult result = leaf->ToOpResult(ops.owned(depth));
+ data = result.tree->AddData<edge_type>(data, extra);
+ if (data.empty()) {
+ result.tree->length += original_data_size;
+ return ops.Unwind(tree, depth, original_data_size, result);
+ }
+
+ // We added some data into this leaf, but not all. Propagate the added
+ // length to the top most node, and rebuild the stack with any newly copied
+ // or updated nodes. From this point on, the path (leg) from the top most
+ // node to the right-most node towards the leaf node is privately owned.
+ size_t delta = original_data_size - data.size();
+ assert(delta > 0);
+ result.tree->length += delta;
+ tree = ops.Propagate(tree, depth, delta, result);
+ ops.share_depth = depth + 1;
+ }
+
+ // We were unable to append all data into the existing right-most leaf node.
+ // This means all remaining data must be put into (a) new leaf node(s) which
+ // we append to the tree. To make this efficient, we iteratively build full
+ // leaf nodes from `data` until the created leaf contains all remaining data.
+ // We utilize the `Unwind` method to merge the created leaf into the first
+ // level towards root that has capacity. On each iteration with remaining
+ // data, we rebuild the stack in the knowledge that right-most nodes are
+ // privately owned after the first `Unwind` completes.
+ for (;;) {
+ OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
+ if (result.tree->length == data.size()) {
+ return ops.Unwind(tree, depth, result.tree->length, result);
+ }
+ data = Consume<edge_type>(data, result.tree->length);
+ tree = ops.Unwind(tree, depth, result.tree->length, result);
+ depth = tree->height();
+ ops.BuildOwnedStack(tree, depth);
+ }
+}
+
+template <EdgeType edge_type>
+CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
+ assert(dst->height() >= src->height());
+
+ // Capture source length as we may consume / destroy `src`.
+ const size_t length = src->length;
+
+ // We attempt to merge `src` at its corresponding height in `dst`.
+ const int depth = dst->height() - src->height();
+ StackOperations<edge_type> ops;
+ CordRepBtree* merge_node = ops.BuildStack(dst, depth);
+
+ // If there is enough space in `merge_node` for all edges from `src`, add all
+ // edges to this node, making a fresh copy as needed if not privately owned.
+ // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
+ // `Finalize` to merge `src` into the first level towards `root` where there
+ // is capacity for another edge, or create a new top level node.
+ OpResult result;
+ if (merge_node->size() + src->size() <= kMaxCapacity) {
+ result = merge_node->ToOpResult(ops.owned(depth));
+ result.tree->Add<edge_type>(src->Edges());
+ result.tree->length += src->length;
+ if (src->refcount.IsOne()) {
+ Delete(src);
+ } else {
+ for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
+ CordRepBtree::Unref(src);
+ }
+ } else {
+ result = {src, kPopped};
+ }
+
+ // Unless we merged at the top level (i.e.: src and dst are equal height),
+ // unwind the result towards the top level, and finalize the result.
+ if (depth) {
+ return ops.Unwind(dst, depth, length, result);
+ }
+ return ops.Finalize(dst, result);
+}
+
+CopyResult CordRepBtree::CopySuffix(size_t offset) {
+ assert(offset < this->length);
+
+ // As long as `offset` starts inside the last edge, we can 'drop' the current
+ // depth. For the most extreme example: if offset references the last data
+ // edge in the tree, there is only a single edge / path from the top of the
+ // tree to that last edge, so we can drop all the nodes except that edge.
+ // The fast path check for this is `back->length >= length - offset`.
+ int height = this->height();
+ CordRepBtree* node = this;
+ size_t len = node->length - offset;
+ CordRep* back = node->Edge(kBack);
+ while (back->length >= len) {
+ offset = back->length - len;
+ if (--height < 0) {
+ return {MakeSubstring(CordRep::Ref(back), offset), height};
+ }
+ node = back->btree();
+ back = node->Edge(kBack);
+ }
+ if (offset == 0) return {CordRep::Ref(node), height};
+
+ // Offset does not point into the last edge, so we span at least two edges.
+ // Find the index of offset with `IndexBeyond` which provides us the edge
+ // 'beyond' the offset if offset is not a clean starting point of an edge.
+ Position pos = node->IndexBeyond(offset);
+ CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
+ const CopyResult result = {sub, height};
+
+ // `pos.n` contains a non zero value if the offset is not an exact starting
+ // point of an edge. In this case, `pos.n` contains the 'trailing' amount of
+ // bytes of the edge preceding that in `pos.index`. We need to iteratively
+ // adjust the preceding edge with the 'broken' offset until we have a perfect
+ // start of the edge.
+ while (pos.n != 0) {
+ assert(pos.index >= 1);
+ const size_t begin = pos.index - 1;
+ sub->set_begin(begin);
+ CordRep* const edge = node->Edge(begin);
+
+ len = pos.n;
+ offset = edge->length - len;
+
+ if (--height < 0) {
+ sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
+ return result;
+ }
+
+ node = edge->btree();
+ pos = node->IndexBeyond(offset);
+
+ CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
+ sub->edges_[begin] = nsub;
+ sub = nsub;
+ }
+ sub->set_begin(pos.index);
+ return result;
+}
+
+CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) {
+ assert(n > 0);
+ assert(n <= this->length);
+
+ // As long as `n` does not exceed the length of the first edge, we can 'drop'
+ // the current depth. For the most extreme example: if we'd copy a 1 byte
+ // prefix from a tree, there is only a single edge / path from the top of the
+ // tree to the single data edge containing this byte, so we can drop all the
+ // nodes except the data node.
+ int height = this->height();
+ CordRepBtree* node = this;
+ CordRep* front = node->Edge(kFront);
+ if (allow_folding) {
+ while (front->length >= n) {
+ if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
+ node = front->btree();
+ front = node->Edge(kFront);
+ }
+ }
+ if (node->length == n) return {CordRep::Ref(node), height};
+
+ // `n` spans at least two nodes, find the end point of the span.
+ Position pos = node->IndexOf(n);
+
+ // Create a partial copy of the node up to `pos.index`, with a defined length
+ // of `n`. Any 'partial last edge' is added further below as needed.
+ CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
+ const CopyResult result = {sub, height};
+
+ // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
+ // this is not zero, we don't have a 'clean cut', so we need to make a
+ // (partial) copy of that last edge, and repeat this until pos.n is zero.
+ while (pos.n != 0) {
+ size_t end = pos.index;
+ n = pos.n;
+
+ CordRep* edge = node->Edge(pos.index);
+ if (--height < 0) {
+ sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
+ sub->set_end(end);
+ AssertValid(result.edge->btree());
+ return result;
+ }
+
+ node = edge->btree();
+ pos = node->IndexOf(n);
+ CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
+ sub->edges_[end++] = nsub;
+ sub->set_end(end);
+ sub = nsub;
+ }
+ sub->set_end(pos.index);
+ AssertValid(result.edge->btree());
+ return result;
+}
+
+CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) {
+ CordRep* front = tree->Edge(tree->begin());
+ if (tree->refcount.IsMutable()) {
+ Unref(tree->Edges(tree->begin() + 1, tree->end()));
+ CordRepBtree::Delete(tree);
+ } else {
+ CordRep::Ref(front);
+ CordRep::Unref(tree);
+ }
+ return front;
+}
+
+CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end,
+ size_t new_length) {
+ assert(end <= tree->end());
+ if (tree->refcount.IsMutable()) {
+ Unref(tree->Edges(end, tree->end()));
+ tree->set_end(end);
+ tree->length = new_length;
+ } else {
+ CordRepBtree* old = tree;
+ tree = tree->CopyBeginTo(end, new_length);
+ CordRep::Unref(old);
+ }
+ return tree;
+}
+
+CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) {
+ // Check input and deal with trivial cases 'Remove all/none'
+ assert(tree != nullptr);
+ assert(n <= tree->length);
+ const size_t len = tree->length;
+ if (ABSL_PREDICT_FALSE(n == 0)) {
+ return tree;
+ }
+ if (ABSL_PREDICT_FALSE(n >= len)) {
+ CordRepBtree::Unref(tree);
+ return nullptr;
+ }
+
+ size_t length = len - n;
+ int height = tree->height();
+ bool is_mutable = tree->refcount.IsMutable();
+
+ // Extract all top nodes which are reduced to size = 1
+ Position pos = tree->IndexOfLength(length);
+ while (pos.index == tree->begin()) {
+ CordRep* edge = ExtractFront(tree);
+ is_mutable &= edge->refcount.IsMutable();
+ if (height-- == 0) return ResizeEdge(edge, length, is_mutable);
+ tree = edge->btree();
+ pos = tree->IndexOfLength(length);
+ }
+
+ // Repeat the following sequence traversing down the tree:
+ // - Crop the top node to the 'last remaining edge' adjusting length.
+ // - Set the length for down edges to the partial length in that last edge.
+ // - Repeat this until the last edge is 'included in full'
+ // - If we hit the data edge level, resize and return the last data edge
+ CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length);
+ CordRep* edge = tree->Edge(pos.index);
+ length = pos.n;
+ while (length != edge->length) {
+ // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy.
+ assert(tree->refcount.IsMutable());
+ const bool edge_is_mutable = edge->refcount.IsMutable();
+
+ if (height-- == 0) {
+ tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable);
+ return AssertValid(top);
+ }
+
+ if (!edge_is_mutable) {
+ // We can't 'in place' remove any suffixes down this edge.
+ // Replace this edge with a prefix copy instead.
+ tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge;
+ CordRep::Unref(edge);
+ return AssertValid(top);
+ }
+
+ // Move down one level, rinse repeat.
+ tree = edge->btree();
+ pos = tree->IndexOfLength(length);
+ tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length);
+ edge = tree->Edge(pos.index);
+ length = pos.n;
+ }
+
+ return AssertValid(top);
+}
+
+CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
+ assert(n <= this->length);
+ assert(offset <= this->length - n);
+ if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
+
+ CordRepBtree* node = this;
+ int height = node->height();
+ Position front = node->IndexOf(offset);
+ CordRep* left = node->edges_[front.index];
+ while (front.n + n <= left->length) {
+ if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
+ node = left->btree();
+ front = node->IndexOf(front.n);
+ left = node->edges_[front.index];
+ }
+
+ const Position back = node->IndexBefore(front, n);
+ CordRep* const right = node->edges_[back.index];
+ assert(back.index > front.index);
+
+ // Get partial suffix and prefix entries.
+ CopyResult prefix;
+ CopyResult suffix;
+ if (height > 0) {
+ // Copy prefix and suffix of the boundary nodes.
+ prefix = left->btree()->CopySuffix(front.n);
+ suffix = right->btree()->CopyPrefix(back.n);
+
+ // If there is an edge between the prefix and suffix edges, then the tree
+ // must remain at its previous (full) height. If we have no edges between
+ // prefix and suffix edges, then the tree must be as high as either the
+ // suffix or prefix edges (which are collapsed to their minimum heights).
+ if (front.index + 1 == back.index) {
+ height = (std::max)(prefix.height, suffix.height) + 1;
+ }
+
+ // Raise prefix and suffixes to the new tree height.
+ for (int h = prefix.height + 1; h < height; ++h) {
+ prefix.edge = CordRepBtree::New(prefix.edge);
+ }
+ for (int h = suffix.height + 1; h < height; ++h) {
+ suffix.edge = CordRepBtree::New(suffix.edge);
+ }
+ } else {
+ // Leaf node, simply take substrings for prefix and suffix.
+ prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
+ suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
+ }
+
+ // Compose resulting tree.
+ CordRepBtree* sub = CordRepBtree::New(height);
+ size_t end = 0;
+ sub->edges_[end++] = prefix.edge;
+ for (CordRep* r : node->Edges(front.index + 1, back.index)) {
+ sub->edges_[end++] = CordRep::Ref(r);
+ }
+ sub->edges_[end++] = suffix.edge;
+ sub->set_end(end);
+ sub->length = n;
+ return AssertValid(sub);
+}
+
+CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
+ CordRepBtree* right) {
+ return left->height() >= right->height() ? Merge<kBack>(left, right)
+ : Merge<kFront>(right, left);
+}
+
+bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
+ if (height() == 0 && size() == 1) {
+ if (fragment) *fragment = Data(begin());
+ return true;
+ }
+ return false;
+}
+
+bool CordRepBtree::IsFlat(size_t offset, const size_t n,
+ absl::string_view* fragment) const {
+ assert(n <= this->length);
+ assert(offset <= this->length - n);
+ if (ABSL_PREDICT_FALSE(n == 0)) return false;
+ int height = this->height();
+ const CordRepBtree* node = this;
+ for (;;) {
+ const Position front = node->IndexOf(offset);
+ const CordRep* edge = node->Edge(front.index);
+ if (edge->length < front.n + n) return false;
+ if (--height < 0) {
+ if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
+ return true;
+ }
+ offset = front.n;
+ node = node->Edge(front.index)->btree();
+ }
+}
+
+char CordRepBtree::GetCharacter(size_t offset) const {
+ assert(offset < length);
+ const CordRepBtree* node = this;
+ int height = node->height();
+ for (;;) {
+ Position front = node->IndexOf(offset);
+ if (--height < 0) return node->Data(front.index)[front.n];
+ offset = front.n;
+ node = node->Edge(front.index)->btree();
+ }
+}
+
+Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
+ // The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
+ assert(height() >= 4);
+ assert(refcount.IsMutable());
+
+ // Build a stack of nodes we may potentially need to update if we find a
+ // non-shared FLAT with capacity at the leaf level.
+ const int depth = height();
+ CordRepBtree* node = this;
+ CordRepBtree* stack[kMaxDepth];
+ for (int i = 0; i < depth; ++i) {
+ node = node->Edge(kBack)->btree();
+ if (!node->refcount.IsMutable()) return {};
+ stack[i] = node;
+ }
+
+ // Must be a privately owned, mutable flat.
+ CordRep* const edge = node->Edge(kBack);
+ if (!edge->refcount.IsMutable() || edge->tag < FLAT) return {};
+
+ // Must have capacity.
+ const size_t avail = edge->flat()->Capacity() - edge->length;
+ if (avail == 0) return {};
+
+ // Build span on remaining capacity.
+ size_t delta = (std::min)(size, avail);
+ Span<char> span = {edge->flat()->Data() + edge->length, delta};
+ edge->length += delta;
+ this->length += delta;
+ for (int i = 0; i < depth; ++i) {
+ stack[i]->length += delta;
+ }
+ return span;
+}
+
+CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
+ if (rep->IsBtree()) return rep->btree();
+
+ CordRepBtree* node = nullptr;
+ auto consume = [&node](CordRep* r, size_t offset, size_t length) {
+ r = MakeSubstring(r, offset, length);
+ if (node == nullptr) {
+ node = New(r);
+ } else {
+ node = CordRepBtree::AddCordRep<kBack>(node, r);
+ }
+ };
+ Consume(rep, consume);
+ return node;
+}
+
+CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
+ if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
+ return MergeTrees(tree, rep->btree());
+ }
+ auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
+ r = MakeSubstring(r, offset, length);
+ tree = CordRepBtree::AddCordRep<kBack>(tree, r);
+ };
+ Consume(rep, consume);
+ return tree;
+}
+
+CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
+ if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
+ return MergeTrees(rep->btree(), tree);
+ }
+ auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
+ r = MakeSubstring(r, offset, length);
+ tree = CordRepBtree::AddCordRep<kFront>(tree, r);
+ };
+ ReverseConsume(rep, consume);
+ return tree;
+}
+
+CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
+ size_t extra) {
+ return CordRepBtree::AddData<kBack>(tree, data, extra);
+}
+
+CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
+ size_t extra) {
+ return CordRepBtree::AddData<kFront>(tree, data, extra);
+}
+
+template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
+ CordRep* rep);
+template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
+ CordRep* rep);
+template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
+ absl::string_view data,
+ size_t extra);
+template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
+ absl::string_view data,
+ size_t extra);
+
+void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree,
+ bool consume) {
+ bool owned = consume && tree->refcount.IsOne();
+ if (tree->height() == 0) {
+ for (CordRep* edge : tree->Edges()) {
+ if (!owned) edge = CordRep::Ref(edge);
+ size_t height = 0;
+ size_t length = edge->length;
+ CordRepBtree* node = stack[0];
+ OpResult result = node->AddEdge<kBack>(true, edge, length);
+ while (result.action == CordRepBtree::kPopped) {
+ stack[height] = result.tree;
+ if (stack[++height] == nullptr) {
+ result.action = CordRepBtree::kSelf;
+ stack[height] = CordRepBtree::New(node, result.tree);
+ } else {
+ node = stack[height];
+ result = node->AddEdge<kBack>(true, result.tree, length);
+ }
+ }
+ while (stack[++height] != nullptr) {
+ stack[height]->length += length;
+ }
+ }
+ } else {
+ for (CordRep* rep : tree->Edges()) {
+ Rebuild(stack, rep->btree(), owned);
+ }
+ }
+ if (consume) {
+ if (owned) {
+ CordRepBtree::Delete(tree);
+ } else {
+ CordRepBtree::Unref(tree);
+ }
+ }
+}
+
+CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) {
+ // Set up initial stack with empty leaf node.
+ CordRepBtree* node = CordRepBtree::New();
+ CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node};
+
+ // Recursively build the tree, consuming the input tree.
+ Rebuild(stack, tree, /* consume reference */ true);
+
+ // Return top most node
+ for (CordRepBtree* parent : stack) {
+ if (parent == nullptr) return node;
+ node = parent;
+ }
+
+ // Unreachable
+ assert(false);
+ return nullptr;
+}
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl