diff options
Diffstat (limited to 'clangd/index/dex/Iterator.cpp')
-rw-r--r-- | clangd/index/dex/Iterator.cpp | 244 |
1 files changed, 244 insertions, 0 deletions
diff --git a/clangd/index/dex/Iterator.cpp b/clangd/index/dex/Iterator.cpp new file mode 100644 index 00000000..25107f92 --- /dev/null +++ b/clangd/index/dex/Iterator.cpp @@ -0,0 +1,244 @@ +//===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Iterator.h" +#include <algorithm> +#include <cassert> +#include <numeric> + +namespace clang { +namespace clangd { +namespace dex { + +namespace { + +/// Implements Iterator over a PostingList. DocumentIterator is the most basic +/// iterator: it doesn't have any children (hence it is the leaf of iterator +/// tree) and is simply a wrapper around PostingList::const_iterator. +class DocumentIterator : public Iterator { +public: + DocumentIterator(PostingListRef Documents) + : Documents(Documents), Index(std::begin(Documents)) {} + + bool reachedEnd() const override { return Index == std::end(Documents); } + + /// Advances cursor to the next item. + void advance() override { + assert(!reachedEnd() && "DocumentIterator can't advance at the end."); + ++Index; + } + + /// Applies binary search to advance cursor to the next item with DocID equal + /// or higher than the given one. + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "DocumentIterator can't advance at the end."); + Index = std::lower_bound(Index, std::end(Documents), ID); + } + + DocID peek() const override { + assert(!reachedEnd() && "DocumentIterator can't call peek() at the end."); + return *Index; + } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << '['; + auto Separator = ""; + for (const auto &ID : Documents) { + OS << Separator << ID; + Separator = ", "; + } + OS << ']'; + return OS; + } + +private: + PostingListRef Documents; + PostingListRef::const_iterator Index; +}; + +/// Implements Iterator over the intersection of other iterators. +/// +/// AndIterator iterates through common items among all children. It becomes +/// exhausted as soon as any child becomes exhausted. After each mutation, the +/// iterator restores the invariant: all children must point to the same item. +class AndIterator : public Iterator { +public: + AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren) + : Children(std::move(AllChildren)) { + assert(!Children.empty() && "AndIterator should have at least one child."); + // Establish invariants. + sync(); + } + + bool reachedEnd() const override { return ReachedEnd; } + + /// Advances all children to the next common item. + void advance() override { + assert(!reachedEnd() && "AndIterator can't call advance() at the end."); + Children.front()->advance(); + sync(); + } + + /// Advances all children to the next common item with DocumentID >= ID. + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "AndIterator can't call advanceTo() at the end."); + Children.front()->advanceTo(ID); + sync(); + } + + DocID peek() const override { return Children.front()->peek(); } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(& "; + auto Separator = ""; + for (const auto &Child : Children) { + OS << Separator << *Child; + Separator = " "; + } + OS << ')'; + return OS; + } + +private: + /// Restores class invariants: each child will point to the same element after + /// sync. + void sync() { + ReachedEnd |= Children.front()->reachedEnd(); + if (ReachedEnd) + return; + auto SyncID = Children.front()->peek(); + // Indicates whether any child needs to be advanced to new SyncID. + bool NeedsAdvance = false; + do { + NeedsAdvance = false; + for (auto &Child : Children) { + Child->advanceTo(SyncID); + ReachedEnd |= Child->reachedEnd(); + // If any child reaches end And iterator can not match any other items. + // In this case, just terminate the process. + if (ReachedEnd) + return; + // If any child goes beyond given ID (i.e. ID is not the common item), + // all children should be advanced to the next common item. + // FIXME(kbobyrev): This is not a very optimized version; after costs + // are introduced, cycle should break whenever ID exceeds current one + // and cheapest children should be advanced over again. + if (Child->peek() > SyncID) { + SyncID = Child->peek(); + NeedsAdvance = true; + } + } + } while (NeedsAdvance); + } + + /// AndIterator owns its children and ensures that all of them point to the + /// same element. As soon as one child gets exhausted, AndIterator can no + /// longer advance and has reached its end. + std::vector<std::unique_ptr<Iterator>> Children; + /// Indicates whether any child is exhausted. It is cheaper to maintain and + /// update the field, rather than traversing the whole subtree in each + /// reachedEnd() call. + bool ReachedEnd = false; +}; + +/// Implements Iterator over the union of other iterators. +/// +/// OrIterator iterates through all items which can be pointed to by at least +/// one child. To preserve the sorted order, this iterator always advances the +/// child with smallest Child->peek() value. OrIterator becomes exhausted as +/// soon as all of its children are exhausted. +class OrIterator : public Iterator { +public: + OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren) + : Children(std::move(AllChildren)) { + assert(Children.size() > 0 && "Or Iterator must have at least one child."); + } + + /// Returns true if all children are exhausted. + bool reachedEnd() const override { + return std::all_of(begin(Children), end(Children), + [](const std::unique_ptr<Iterator> &Child) { + return Child->reachedEnd(); + }); + } + + /// Moves each child pointing to the smallest DocID to the next item. + void advance() override { + assert(!reachedEnd() && + "OrIterator must have at least one child to advance()."); + const auto SmallestID = peek(); + for (const auto &Child : Children) + if (!Child->reachedEnd() && Child->peek() == SmallestID) + Child->advance(); + } + + /// Advances each child to the next existing element with DocumentID >= ID. + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "Can't advance iterator after it reached the end."); + for (const auto &Child : Children) + if (!Child->reachedEnd()) + Child->advanceTo(ID); + } + + /// Returns the element under cursor of the child with smallest Child->peek() + /// value. + DocID peek() const override { + assert(!reachedEnd() && + "OrIterator must have at least one child to peek()."); + DocID Result = std::numeric_limits<DocID>::max(); + + for (const auto &Child : Children) + if (!Child->reachedEnd()) + Result = std::min(Result, Child->peek()); + + return Result; + } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(| "; + auto Separator = ""; + for (const auto &Child : Children) { + OS << Separator << *Child; + Separator = " "; + } + OS << ')'; + return OS; + } + +private: + // FIXME(kbobyrev): Would storing Children in min-heap be faster? + std::vector<std::unique_ptr<Iterator>> Children; +}; + +} // end namespace + +std::vector<DocID> consume(Iterator &It) { + std::vector<DocID> Result; + for (; !It.reachedEnd(); It.advance()) + Result.push_back(It.peek()); + return Result; +} + +std::unique_ptr<Iterator> create(PostingListRef Documents) { + return llvm::make_unique<DocumentIterator>(Documents); +} + +std::unique_ptr<Iterator> +createAnd(std::vector<std::unique_ptr<Iterator>> Children) { + return llvm::make_unique<AndIterator>(move(Children)); +} + +std::unique_ptr<Iterator> +createOr(std::vector<std::unique_ptr<Iterator>> Children) { + return llvm::make_unique<OrIterator>(move(Children)); +} + +} // namespace dex +} // namespace clangd +} // namespace clang |