aboutsummaryrefslogtreecommitdiff
path: root/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h')
-rw-r--r--third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h939
1 files changed, 939 insertions, 0 deletions
diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h
new file mode 100644
index 0000000000..bb38f0c3fe
--- /dev/null
+++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h
@@ -0,0 +1,939 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
+#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
+
+#include <cassert>
+#include <cstdint>
+#include <iosfwd>
+
+#include "absl/base/config.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/base/optimization.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+class CordRepBtreeNavigator;
+
+// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
+// Data is stored at the leaf level only, non leaf nodes contain down pointers
+// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
+// or EXTERNAL nodes. The implementation allows for data to be added to either
+// end of the tree only, it does not provide any 'insert' logic. This has the
+// benefit that we can expect good fill ratios: all nodes except the outer
+// 'legs' will have 100% fill ratios for trees built using Append/Prepend
+// methods. Merged trees will typically have a fill ratio well above 50% as in a
+// similar fashion, one side of the merged tree will typically have a 100% fill
+// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
+// better, and the tree never needs balancing.
+//
+// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
+// input unless explicitly stated otherwise. All functions returning a CordRep*
+// or CordRepBtree* instance transfer a reference back to the caller.
+// Simplified, callers both 'donate' and 'consume' a reference count on each
+// call, simplifying the API. An example of building a tree:
+//
+// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
+// tree = CordRepBtree::Append(tree, MakeFlat("world"));
+//
+// In the above example, all inputs are consumed, making each call affecting
+// `tree` reference count neutral. The returned `tree` value can be different
+// from the input if the input is shared with other threads, or if the tree
+// grows in height, but callers typically never have to concern themselves with
+// that and trust that all methods DTRT at all times.
+class CordRepBtree : public CordRep {
+ public:
+ // EdgeType identifies `front` and `back` enum values.
+ // Various implementations in CordRepBtree such as `Add` and `Edge` are
+ // generic and templated on operating on either of the boundary edges.
+ // For more information on the possible edges contained in a CordRepBtree
+ // instance see the documentation for `edges_`.
+ enum class EdgeType { kFront, kBack };
+
+ // Convenience constants into `EdgeType`
+ static constexpr EdgeType kFront = EdgeType::kFront;
+ static constexpr EdgeType kBack = EdgeType::kBack;
+
+ // Maximum number of edges: based on experiments and performance data, we can
+ // pick suitable values resulting in optimum cacheline aligned values. The
+ // preferred values are based on 64-bit systems where we aim to align this
+ // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
+ // TODO(b/192061034): experiment with alternative sizes.
+ static constexpr size_t kMaxCapacity = 6;
+
+ // Reasonable maximum height of the btree. We can expect a fill ratio of at
+ // least 50%: trees are always expanded at the front or back. Concatenating
+ // trees will then typically fold at the top most node, where the lower nodes
+ // are at least at capacity on one side of joined inputs. At a lower fill
+ // rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
+ // We will fail / abort if an application ever exceeds this height, which
+ // should be extremely rare (near impossible) and be an indication of an
+ // application error: we do not assume it reasonable for any application to
+ // operate correctly with such monster trees.
+ // Another compelling reason for the number `12` is that any contextual stack
+ // required for navigation or insertion requires 12 words and 12 bytes, which
+ // fits inside 2 cache lines with some room to spare, and is reasonable as a
+ // local stack variable compared to Cord's current near 400 bytes stack use.
+ // The maximum `height` value of a node is then `kMaxDepth - 1` as node height
+ // values start with a value of 0 for leaf nodes.
+ static constexpr int kMaxDepth = 12;
+ static constexpr int kMaxHeight = kMaxDepth - 1;
+
+ // `Action` defines the action for unwinding changes done at the btree's leaf
+ // level that need to be propagated up to the parent node(s). Each operation
+ // on a node has an effect / action defined as follows:
+ // - kSelf
+ // The operation (add / update, etc) was performed directly on the node as
+ // the node is private to the current thread (i.e.: not shared directly or
+ // indirectly through a refcount > 1). Changes can be propagated directly to
+ // all parent nodes as all parent nodes are also then private to the current
+ // thread.
+ // - kCopied
+ // The operation (add / update, etc) was performed on a copy of the original
+ // node, as the node is (potentially) directly or indirectly shared with
+ // other threads. Changes need to be propagated into the parent nodes where
+ // the old down pointer must be unreffed and replaced with this new copy.
+ // Such changes to parent nodes may themselves require a copy if the parent
+ // node is also shared. A kCopied action can propagate all the way to the
+ // top node where we then must unref the `tree` input provided by the
+ // caller, and return the new copy.
+ // - kPopped
+ // The operation (typically add) could not be satisfied due to insufficient
+ // capacity in the targeted node, and a new 'leg' was created that needs to
+ // be added into the parent node. For example, adding a FLAT inside a leaf
+ // node that is at capacity will create a new leaf node containing that
+ // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
+ // cascade up the tree if parent nodes are also at capacity. A 'Popped'
+ // action propagating all the way to the top of the tree will result in
+ // the tree becoming one level higher than the current tree through a final
+ // `CordRepBtree::New(tree, popped)` call, resulting in a new top node
+ // referencing the old tree and the new (fully popped upwards) 'leg'.
+ enum Action { kSelf, kCopied, kPopped };
+
+ // Result of an operation on a node. See the `Action` enum for details.
+ struct OpResult {
+ CordRepBtree* tree;
+ Action action;
+ };
+
+ // Return value of the CopyPrefix and CopySuffix methods which can
+ // return a node or data edge at any height inside the tree.
+ // A height of 0 defines the lowest (leaf) node, a height of -1 identifies
+ // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
+ struct CopyResult {
+ CordRep* edge;
+ int height;
+ };
+
+ // Logical position inside a node:
+ // - index: index of the edge.
+ // - n: size or offset value depending on context.
+ struct Position {
+ size_t index;
+ size_t n;
+ };
+
+ // Creates a btree from the given input. Adopts a ref of `rep`.
+ // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this
+ // function immediately returns `rep->btree()`. If the input is a valid data
+ // edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
+ // as the sole data edge. Else, the input is assumed to be a (legacy) concat
+ // tree, and the input is consumed and transformed into a btree().
+ static CordRepBtree* Create(CordRep* rep);
+
+ // Destroys the provided tree. Should only be called by cord internal API's,
+ // typically after a ref_count.Decrement() on the last reference count.
+ static void Destroy(CordRepBtree* tree);
+
+ // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>.
+ using CordRep::Unref;
+
+ // Unrefs all edges in `edges` which are assumed to be 'likely one'.
+ static void Unref(absl::Span<CordRep* const> edges);
+
+ // Appends / Prepends an existing CordRep instance to this tree.
+ // The below methods accept three types of input:
+ // 1) `rep` is a data node (See `IsDataNode` for valid data edges).
+ // `rep` is appended or prepended to this tree 'as is'.
+ // 2) `rep` is a BTREE.
+ // `rep` is merged into `tree` respecting the Append/Prepend order.
+ // 3) `rep` is some other (legacy) type.
+ // `rep` is converted in place and added to `tree`
+ // Requires `tree` and `rep` to be not null.
+ static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
+ static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
+
+ // Append/Prepend the data in `data` to this tree.
+ // The `extra` parameter defines how much extra capacity should be allocated
+ // for any additional FLAT being allocated. This is an optimization hint from
+ // the caller. For example, a caller may need to add 2 string_views of data
+ // "abc" and "defghi" which are not consecutive. The caller can in this case
+ // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
+ // where possible with at least 6 bytes of extra capacity beyond `length`.
+ // This helps avoiding data getting fragmented over multiple flats.
+ // There is no limit on the size of `data`. If `data` can not be stored inside
+ // a single flat, then the function will iteratively add flats until all data
+ // has been consumed and appended or prepended to the tree.
+ static CordRepBtree* Append(CordRepBtree* tree, string_view data,
+ size_t extra = 0);
+ static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
+ size_t extra = 0);
+
+ // Returns a new tree, containing `n` bytes of data from this instance
+ // starting at offset `offset`. Where possible, the returned tree shares
+ // (re-uses) data edges and nodes with this instance to minimize the
+ // combined memory footprint of both trees.
+ // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
+ CordRep* SubTree(size_t offset, size_t n);
+
+ // Removes `n` trailing bytes from `tree`, and returns the resulting tree
+ // or data edge. Returns `tree` if n is zero, and nullptr if n == length.
+ // This function is logically identical to:
+ // result = tree->SubTree(0, tree->length - n);
+ // Unref(tree);
+ // return result;
+ // However, the actual implementation will as much as possible perform 'in
+ // place' modifications on the tree on all nodes and edges that are mutable.
+ // For example, in a fully privately owned tree with the last edge being a
+ // flat of length 12, RemoveSuffix(1) will simply set the length of that data
+ // edge to 11, and reduce the length of all nodes on the edge path by 1.
+ static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n);
+
+ // Returns the character at the given offset.
+ char GetCharacter(size_t offset) const;
+
+ // Returns true if this node holds a single data edge, and if so, sets
+ // `fragment` to reference the contained data. `fragment` is an optional
+ // output parameter and allowed to be null.
+ bool IsFlat(absl::string_view* fragment) const;
+
+ // Returns true if the data of `n` bytes starting at offset `offset`
+ // is contained in a single data edge, and if so, sets fragment to reference
+ // the contained data. `fragment` is an optional output parameter and allowed
+ // to be null.
+ bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
+
+ // Returns a span (mutable range of bytes) of up to `size` bytes into the
+ // last FLAT data edge inside this tree under the following conditions:
+ // - none of the nodes down into the FLAT node are shared.
+ // - the last data edge in this tree is a non-shared FLAT.
+ // - the referenced FLAT has additional capacity available.
+ // If all these conditions are met, a non-empty span is returned, and the
+ // length of the flat node and involved tree nodes have been increased by
+ // `span.length()`. The caller is responsible for immediately assigning values
+ // to all uninitialized data reference by the returned span.
+ // Requires `this->refcount.IsMutable()`: this function forces the
+ // caller to do this fast path check on the top level node, as this is the
+ // most commonly shared node of a cord tree.
+ Span<char> GetAppendBuffer(size_t size);
+
+ // Returns the `height` of the tree. The height of a tree is limited to
+ // kMaxHeight. `height` is implemented as an `int` as in some places we
+ // use negative (-1) values for 'data edges'.
+ int height() const { return static_cast<int>(storage[0]); }
+
+ // Properties: begin, back, end, front/back boundary indexes.
+ size_t begin() const { return static_cast<size_t>(storage[1]); }
+ size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
+ size_t end() const { return static_cast<size_t>(storage[2]); }
+ size_t index(EdgeType edge) const {
+ return edge == kFront ? begin() : back();
+ }
+
+ // Properties: size and capacity.
+ // `capacity` contains the current capacity of this instance, where
+ // `kMaxCapacity` contains the maximum capacity of a btree node.
+ // For now, `capacity` and `kMaxCapacity` return the same value, but this may
+ // change in the future if we see benefit in dynamically sizing 'small' nodes
+ // to 'large' nodes for large data trees.
+ size_t size() const { return end() - begin(); }
+ size_t capacity() const { return kMaxCapacity; }
+
+ // Edge access
+ inline CordRep* Edge(size_t index) const;
+ inline CordRep* Edge(EdgeType edge_type) const;
+ inline absl::Span<CordRep* const> Edges() const;
+ inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
+
+ // Returns reference to the data edge at `index`.
+ // Requires this instance to be a leaf node, and `index` to be valid index.
+ inline absl::string_view Data(size_t index) const;
+
+ static const char* EdgeDataPtr(const CordRep* r);
+ static absl::string_view EdgeData(const CordRep* r);
+
+ // Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
+ // holding a FLAT or EXTERNAL child rep.
+ static bool IsDataEdge(const CordRep* rep);
+
+ // Diagnostics: returns true if `tree` is valid and internally consistent.
+ // If `shallow` is false, then the provided top level node and all child nodes
+ // below it are recursively checked. If `shallow` is true, only the provided
+ // node in `tree` and the cumulative length, type and height of the direct
+ // child nodes of `tree` are checked. The value of `shallow` is ignored if the
+ // internal `cord_btree_exhaustive_validation` diagnostics variable is true,
+ // in which case the performed validations works as if `shallow` were false.
+ // This function is intended for debugging and testing purposes only.
+ static bool IsValid(const CordRepBtree* tree, bool shallow = false);
+
+ // Diagnostics: asserts that the provided tree is valid.
+ // `AssertValid()` performs a shallow validation by default. `shallow` can be
+ // set to false in which case an exhaustive validation is performed. This
+ // function is implemented in terms of calling `IsValid()` and asserting the
+ // return value to be true. See `IsValid()` for more information.
+ // This function is intended for debugging and testing purposes only.
+ static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
+ static const CordRepBtree* AssertValid(const CordRepBtree* tree,
+ bool shallow = true);
+
+ // Diagnostics: dump the contents of this tree to `stream`.
+ // This function is intended for debugging and testing purposes only.
+ static void Dump(const CordRep* rep, std::ostream& stream);
+ static void Dump(const CordRep* rep, absl::string_view label,
+ std::ostream& stream);
+ static void Dump(const CordRep* rep, absl::string_view label,
+ bool include_contents, std::ostream& stream);
+
+ // Adds the edge `edge` to this node if possible. `owned` indicates if the
+ // current node is potentially shared or not with other threads. Returns:
+ // - {kSelf, <this>}
+ // The edge was directly added to this node.
+ // - {kCopied, <node>}
+ // The edge was added to a copy of this node.
+ // - {kPopped, New(edge, height())}
+ // A new leg with the edge was created as this node has no extra capacity.
+ template <EdgeType edge_type>
+ inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
+
+ // Replaces the front or back edge with the provided new edge. Returns:
+ // - {kSelf, <this>}
+ // The edge was directly set in this node. The old edge is unreffed.
+ // - {kCopied, <node>}
+ // A copy of this node was created with the new edge value.
+ // In both cases, the function adopts a reference on `edge`.
+ template <EdgeType edge_type>
+ OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
+
+ // Creates a new empty node at the specified height.
+ static CordRepBtree* New(int height = 0);
+
+ // Creates a new node containing `rep`, with the height being computed
+ // automatically based on the type of `rep`.
+ static CordRepBtree* New(CordRep* rep);
+
+ // Creates a new node containing both `front` and `back` at height
+ // `front.height() + 1`. Requires `back.height() == front.height()`.
+ static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
+
+ // Creates a fully balanced tree from the provided tree by rebuilding a new
+ // tree from all data edges in the input. This function is automatically
+ // invoked internally when the tree exceeds the maximum height.
+ static CordRepBtree* Rebuild(CordRepBtree* tree);
+
+ private:
+ CordRepBtree() = default;
+ ~CordRepBtree() = default;
+
+ // Initializes the main properties `tag`, `begin`, `end`, `height`.
+ inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
+
+ // Direct property access begin / end
+ void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
+ void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
+
+ // Decreases the value of `begin` by `n`, and returns the new value. Notice
+ // how this returns the new value unlike atomic::fetch_add which returns the
+ // old value. This is because this is used to prepend edges at 'begin - 1'.
+ size_t sub_fetch_begin(size_t n) {
+ storage[1] -= static_cast<uint8_t>(n);
+ return storage[1];
+ }
+
+ // Increases the value of `end` by `n`, and returns the previous value. This
+ // function is typically used to append edges at 'end'.
+ size_t fetch_add_end(size_t n) {
+ const uint8_t current = storage[2];
+ storage[2] = static_cast<uint8_t>(current + n);
+ return current;
+ }
+
+ // Returns the index of the last edge starting on, or before `offset`, with
+ // `n` containing the relative offset of `offset` inside that edge.
+ // Requires `offset` < length.
+ Position IndexOf(size_t offset) const;
+
+ // Returns the index of the last edge starting before `offset`, with `n`
+ // containing the relative offset of `offset` inside that edge.
+ // This function is useful to find the edges for some span of bytes ending at
+ // `offset` (i.e., `n` bytes). For example:
+ //
+ // Position pos = IndexBefore(n)
+ // edges = Edges(begin(), pos.index) // All full edges (may be empty)
+ // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
+ //
+ // Requires 0 < `offset` <= length.
+ Position IndexBefore(size_t offset) const;
+
+ // Returns the index of the edge ending at (or on) length `length`, and the
+ // number of bytes inside that edge up to `length`. For example, if we have a
+ // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27)
+ // will return {1, 17}, and IndexOfLength(10) will return {0, 10}.
+ Position IndexOfLength(size_t n) const;
+
+ // Identical to the above function except starting from the position `front`.
+ // This function is equivalent to `IndexBefore(front.n + offset)`, with
+ // the difference that this function is optimized to start at `front.index`.
+ Position IndexBefore(Position front, size_t offset) const;
+
+ // Returns the index of the edge directly beyond the edge containing offset
+ // `offset`, with `n` containing the distance of that edge from `offset`.
+ // This function is useful for iteratively finding suffix nodes and remaining
+ // partial bytes in left-most suffix nodes as for example in CopySuffix.
+ // Requires `offset` < length.
+ Position IndexBeyond(size_t offset) const;
+
+ // Destruction
+ static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end);
+ static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end);
+ static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end);
+ static void Delete(CordRepBtree* tree) { delete tree; }
+
+ // Creates a new leaf node containing as much data as possible from `data`.
+ // The data is added either forwards or reversed depending on `edge_type`.
+ // Callers must check the length of the returned node to determine if all data
+ // was copied or not.
+ // See the `Append/Prepend` function for the meaning and purpose of `extra`.
+ template <EdgeType edge_type>
+ static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
+
+ // Creates a raw copy of this Btree node, copying all properties, but
+ // without adding any references to existing edges.
+ CordRepBtree* CopyRaw() const;
+
+ // Creates a full copy of this Btree node, adding a reference on all edges.
+ CordRepBtree* Copy() const;
+
+ // Creates a partial copy of this Btree node, copying all edges up to `end`,
+ // adding a reference on each copied edge, and sets the length of the newly
+ // created copy to `new_length`.
+ CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
+
+ // Returns a tree containing the edges [tree->begin(), end) and length
+ // of `new_length`. This method consumes a reference on the provided
+ // tree, and logically performs the following operation:
+ // result = tree->CopyBeginTo(end, new_length);
+ // CordRep::Unref(tree);
+ // return result;
+ static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end,
+ size_t new_length);
+
+ // Creates a partial copy of this Btree node, copying all edges starting at
+ // `begin`, adding a reference on each copied edge, and sets the length of
+ // the newly created copy to `new_length`.
+ CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
+
+ // Extracts and returns the front edge from the provided tree.
+ // This method consumes a reference on the provided tree, and logically
+ // performs the following operation:
+ // edge = CordRep::Ref(tree->Edge(kFront));
+ // CordRep::Unref(tree);
+ // return edge;
+ static CordRep* ExtractFront(CordRepBtree* tree);
+
+ // Returns a tree containing the result of appending `right` to `left`.
+ static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
+
+ // Fallback functions for `Create()`, `Append()` and `Prepend()` which
+ // deal with legacy / non conforming input, i.e.: CONCAT trees.
+ static CordRepBtree* CreateSlow(CordRep* rep);
+ static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
+ static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
+
+ // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the
+ // function will consume a reference on `tree`. `stack` is a null terminated
+ // array containing the new tree's state, with the current leaf node at
+ // stack[0], and parent nodes above that, or null for 'top of tree'.
+ static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume);
+
+ // Aligns existing edges to start at index 0, to allow for a new edge to be
+ // added to the back of the current edges.
+ inline void AlignBegin();
+
+ // Aligns existing edges to end at `capacity`, to allow for a new edge to be
+ // added in front of the current edges.
+ inline void AlignEnd();
+
+ // Adds the provided edge to this node.
+ // Requires this node to have capacity for the edge. Realigns / moves
+ // existing edges as needed to prepend or append the new edge.
+ template <EdgeType edge_type>
+ inline void Add(CordRep* rep);
+
+ // Adds the provided edges to this node.
+ // Requires this node to have capacity for the edges. Realigns / moves
+ // existing edges as needed to prepend or append the new edges.
+ template <EdgeType edge_type>
+ inline void Add(absl::Span<CordRep* const>);
+
+ // Adds data from `data` to this node until either all data has been consumed,
+ // or there is no more capacity for additional flat nodes inside this node.
+ // Requires the current node to be a leaf node, data to be non empty, and the
+ // current node to have capacity for at least one more data edge.
+ // Returns any remaining data from `data` that was not added, which is
+ // depending on the edge type (front / back) either the remaining prefix of
+ // suffix of the input.
+ // See the `Append/Prepend` function for the meaning and purpose of `extra`.
+ template <EdgeType edge_type>
+ absl::string_view AddData(absl::string_view data, size_t extra);
+
+ // Replace the front or back edge with the provided value.
+ // Adopts a reference on `edge` and unrefs the old edge.
+ template <EdgeType edge_type>
+ inline void SetEdge(CordRep* edge);
+
+ // Returns a partial copy of the current tree containing the first `n` bytes
+ // of data. `CopyResult` contains both the resulting edge and its height. The
+ // resulting tree may be less high than the current tree, or even be a single
+ // matching data edge if `allow_folding` is set to true.
+ // For example, if `n == 1`, then the result will be the single data edge, and
+ // height will be set to -1 (one below the owning leaf node). If n == 0, this
+ // function returns null. Requires `n <= length`
+ CopyResult CopyPrefix(size_t n, bool allow_folding = true);
+
+ // Returns a partial copy of the current tree containing all data starting
+ // after `offset`. `CopyResult` contains both the resulting edge and its
+ // height. The resulting tree may be less high than the current tree, or even
+ // be a single matching data edge. For example, if `n == length - 1`, then the
+ // result will be a single data edge, and height will be set to -1 (one below
+ // the owning leaf node).
+ // Requires `offset < length`
+ CopyResult CopySuffix(size_t offset);
+
+ // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
+ // depending on the value of `owned`.
+ inline OpResult ToOpResult(bool owned);
+
+ // Adds `rep` to the specified tree, returning the modified tree.
+ template <EdgeType edge_type>
+ static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
+
+ // Adds `data` to the specified tree, returning the modified tree.
+ // See the `Append/Prepend` function for the meaning and purpose of `extra`.
+ template <EdgeType edge_type>
+ static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
+ size_t extra = 0);
+
+ // Merges `src` into `dst` with `src` being added either before (kFront) or
+ // after (kBack) `dst`. Requires the height of `dst` to be greater than or
+ // equal to the height of `src`.
+ template <EdgeType edge_type>
+ static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
+
+ // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
+ // implements an inlined version for trees of limited height (3 levels),
+ // GetAppendBufferSlow implements the logic for large trees.
+ Span<char> GetAppendBufferSlow(size_t size);
+
+ // `edges_` contains all edges starting from this instance.
+ // These are explicitly `child` edges only, a cord btree (or any cord tree in
+ // that respect) does not store `parent` pointers anywhere: multiple trees /
+ // parents can reference the same shared child edge. The type of these edges
+ // depends on the height of the node. `Leaf nodes` (height == 0) contain `data
+ // edges` (external or flat nodes, or sub-strings thereof). All other nodes
+ // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
+ CordRep* edges_[kMaxCapacity];
+
+ friend class CordRepBtreeTestPeer;
+ friend class CordRepBtreeNavigator;
+};
+
+inline CordRepBtree* CordRep::btree() {
+ assert(IsBtree());
+ return static_cast<CordRepBtree*>(this);
+}
+
+inline const CordRepBtree* CordRep::btree() const {
+ assert(IsBtree());
+ return static_cast<const CordRepBtree*>(this);
+}
+
+inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
+ tag = BTREE;
+ storage[0] = static_cast<uint8_t>(height);
+ storage[1] = static_cast<uint8_t>(begin);
+ storage[2] = static_cast<uint8_t>(end);
+}
+
+inline CordRep* CordRepBtree::Edge(size_t index) const {
+ assert(index >= begin());
+ assert(index < end());
+ return edges_[index];
+}
+
+inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
+ return edges_[edge_type == kFront ? begin() : back()];
+}
+
+inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
+ return {edges_ + begin(), size()};
+}
+
+inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
+ size_t end) const {
+ assert(begin <= end);
+ assert(begin >= this->begin());
+ assert(end <= this->end());
+ return {edges_ + begin, static_cast<size_t>(end - begin)};
+}
+
+inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) {
+ assert(IsDataEdge(r));
+ size_t offset = 0;
+ if (r->tag == SUBSTRING) {
+ offset = r->substring()->start;
+ r = r->substring()->child;
+ }
+ return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset;
+}
+
+inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) {
+ return absl::string_view(EdgeDataPtr(r), r->length);
+}
+
+inline absl::string_view CordRepBtree::Data(size_t index) const {
+ assert(height() == 0);
+ return EdgeData(Edge(index));
+}
+
+inline bool CordRepBtree::IsDataEdge(const CordRep* rep) {
+ // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
+ // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
+ // check in the slow path the SUBSTRING check to optimize for the hot path.
+ if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
+ if (rep->tag == SUBSTRING) rep = rep->substring()->child;
+ return rep->tag == EXTERNAL || rep->tag >= FLAT;
+}
+
+inline CordRepBtree* CordRepBtree::New(int height) {
+ CordRepBtree* tree = new CordRepBtree;
+ tree->length = 0;
+ tree->InitInstance(height);
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
+ CordRepBtree* tree = new CordRepBtree;
+ int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0;
+ tree->length = rep->length;
+ tree->InitInstance(height, /*begin=*/0, /*end=*/1);
+ tree->edges_[0] = rep;
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
+ CordRepBtree* back) {
+ assert(front->height() == back->height());
+ CordRepBtree* tree = new CordRepBtree;
+ tree->length = front->length + back->length;
+ tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
+ tree->edges_[0] = front;
+ tree->edges_[1] = back;
+ return tree;
+}
+
+inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin,
+ size_t end) {
+ if (tree->height() == 0) {
+ DestroyLeaf(tree, begin, end);
+ } else {
+ DestroyNonLeaf(tree, begin, end);
+ }
+}
+
+inline void CordRepBtree::Destroy(CordRepBtree* tree) {
+ DestroyTree(tree, tree->begin(), tree->end());
+}
+
+inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) {
+ for (CordRep* edge : edges) {
+ if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) {
+ CordRep::Destroy(edge);
+ }
+ }
+}
+
+inline CordRepBtree* CordRepBtree::CopyRaw() const {
+ auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree)));
+ memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree));
+ new (&tree->refcount) RefcountAndFlags;
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::Copy() const {
+ CordRepBtree* tree = CopyRaw();
+ for (CordRep* rep : Edges()) CordRep::Ref(rep);
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
+ size_t new_length) const {
+ assert(begin >= this->begin());
+ assert(begin <= this->end());
+ CordRepBtree* tree = CopyRaw();
+ tree->length = new_length;
+ tree->set_begin(begin);
+ for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
+ size_t new_length) const {
+ assert(end <= capacity());
+ assert(end >= this->begin());
+ CordRepBtree* tree = CopyRaw();
+ tree->length = new_length;
+ tree->set_end(end);
+ for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
+ return tree;
+}
+
+inline void CordRepBtree::AlignBegin() {
+ // The below code itself does not need to be fast as typically we have
+ // mono-directional append/prepend calls, and `begin` / `end` are typically
+ // adjusted no more than once. But we want to avoid potential register clobber
+ // effects, making the compiler emit register save/store/spills, and minimize
+ // the size of code.
+ const size_t delta = begin();
+ if (ABSL_PREDICT_FALSE(delta != 0)) {
+ const size_t new_end = end() - delta;
+ set_begin(0);
+ set_end(new_end);
+ // TODO(mvels): we can write this using 2 loads / 2 stores depending on
+ // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
+ // size, and then do overlapping load/store of up to 4 pointers (inlined as
+ // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
+ // compact and b) not clobbering any registers.
+ ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
+#ifdef __clang__
+#pragma unroll 1
+#endif
+ for (size_t i = 0; i < new_end; ++i) {
+ edges_[i] = edges_[i + delta];
+ }
+ }
+}
+
+inline void CordRepBtree::AlignEnd() {
+ // See comments in `AlignBegin` for motivation on the hand-rolled for loops.
+ const size_t delta = capacity() - end();
+ if (delta != 0) {
+ const size_t new_begin = begin() + delta;
+ const size_t new_end = end() + delta;
+ set_begin(new_begin);
+ set_end(new_end);
+ ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
+#ifdef __clang__
+#pragma unroll 1
+#endif
+ for (size_t i = new_end - 1; i >= new_begin; --i) {
+ edges_[i] = edges_[i - delta];
+ }
+ }
+}
+
+template <>
+inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
+ AlignBegin();
+ edges_[fetch_add_end(1)] = rep;
+}
+
+template <>
+inline void CordRepBtree::Add<CordRepBtree::kBack>(
+ absl::Span<CordRep* const> edges) {
+ AlignBegin();
+ size_t new_end = end();
+ for (CordRep* edge : edges) edges_[new_end++] = edge;
+ set_end(new_end);
+}
+
+template <>
+inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
+ AlignEnd();
+ edges_[sub_fetch_begin(1)] = rep;
+}
+
+template <>
+inline void CordRepBtree::Add<CordRepBtree::kFront>(
+ absl::Span<CordRep* const> edges) {
+ AlignEnd();
+ size_t new_begin = begin() - edges.size();
+ set_begin(new_begin);
+ for (CordRep* edge : edges) edges_[new_begin++] = edge;
+}
+
+template <CordRepBtree::EdgeType edge_type>
+inline void CordRepBtree::SetEdge(CordRep* edge) {
+ const int idx = edge_type == kFront ? begin() : back();
+ CordRep::Unref(edges_[idx]);
+ edges_[idx] = edge;
+}
+
+inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
+ return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
+ assert(offset < length);
+ size_t index = begin();
+ while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
+ return {index, offset};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
+ assert(offset > 0);
+ assert(offset <= length);
+ size_t index = begin();
+ while (offset > edges_[index]->length) offset -= edges_[index++]->length;
+ return {index, offset};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
+ size_t offset) const {
+ size_t index = front.index;
+ offset = offset + front.n;
+ while (offset > edges_[index]->length) offset -= edges_[index++]->length;
+ return {index, offset};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const {
+ assert(n <= length);
+ size_t index = back();
+ size_t strip = length - n;
+ while (strip >= edges_[index]->length) strip -= edges_[index--]->length;
+ return {index, edges_[index]->length - strip};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexBeyond(
+ const size_t offset) const {
+ // We need to find the edge which `starting offset` is beyond (>=)`offset`.
+ // For this we can't use the `offset -= length` logic of IndexOf. Instead, we
+ // track the offset of the `current edge` in `off`, which we increase as we
+ // iterate over the edges until we find the matching edge.
+ size_t off = 0;
+ size_t index = begin();
+ while (offset > off) off += edges_[index++]->length;
+ return {index, off - offset};
+}
+
+inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
+ if (IsDataEdge(rep)) return New(rep);
+ return CreateSlow(rep);
+}
+
+inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
+ assert(refcount.IsMutable());
+ CordRepBtree* tree = this;
+ const int height = this->height();
+ CordRepBtree* n1 = tree;
+ CordRepBtree* n2 = tree;
+ CordRepBtree* n3 = tree;
+ switch (height) {
+ case 3:
+ tree = tree->Edge(kBack)->btree();
+ if (!tree->refcount.IsMutable()) return {};
+ n2 = tree;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 2:
+ tree = tree->Edge(kBack)->btree();
+ if (!tree->refcount.IsMutable()) return {};
+ n1 = tree;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 1:
+ tree = tree->Edge(kBack)->btree();
+ if (!tree->refcount.IsMutable()) return {};
+ ABSL_FALLTHROUGH_INTENDED;
+ case 0:
+ CordRep* edge = tree->Edge(kBack);
+ if (!edge->refcount.IsMutable()) return {};
+ if (edge->tag < FLAT) return {};
+ size_t avail = edge->flat()->Capacity() - edge->length;
+ if (avail == 0) return {};
+ size_t delta = (std::min)(size, avail);
+ Span<char> span = {edge->flat()->Data() + edge->length, delta};
+ edge->length += delta;
+ switch (height) {
+ case 3:
+ n3->length += delta;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 2:
+ n2->length += delta;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 1:
+ n1->length += delta;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 0:
+ tree->length += delta;
+ return span;
+ }
+ break;
+ }
+ return GetAppendBufferSlow(size);
+}
+
+extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
+ CordRepBtree* tree, CordRep* rep);
+
+extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
+ CordRepBtree* tree, CordRep* rep);
+
+inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
+ if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
+ return CordRepBtree::AddCordRep<kBack>(tree, rep);
+ }
+ return AppendSlow(tree, rep);
+}
+
+inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
+ if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
+ return CordRepBtree::AddCordRep<kFront>(tree, rep);
+ }
+ return PrependSlow(tree, rep);
+}
+
+#ifdef NDEBUG
+
+inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
+ bool /* shallow */) {
+ return tree;
+}
+
+inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
+ bool /* shallow */) {
+ return tree;
+}
+
+#endif
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_