diff options
Diffstat (limited to 'clangd/FileDistance.cpp')
-rw-r--r-- | clangd/FileDistance.cpp | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/clangd/FileDistance.cpp b/clangd/FileDistance.cpp new file mode 100644 index 00000000..a0ce25b4 --- /dev/null +++ b/clangd/FileDistance.cpp @@ -0,0 +1,171 @@ +//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The FileDistance structure allows calculating the minimum distance to paths +// in a single tree. +// We simply walk up the path's ancestors until we find a node whose cost is +// known, and add the cost of walking back down. Initialization ensures this +// gives the correct path to the roots. +// We cache the results, so that the runtime is O(|A|), where A is the set of +// all distinct ancestors of visited paths. +// +// Example after initialization with /=2, /bar=0, DownCost = 1: +// / = 2 +// /bar = 0 +// +// After querying /foo/bar and /bar/foo: +// / = 2 +// /bar = 0 +// /bar/foo = 1 +// /foo = 3 +// /foo/bar = 4 +// +// URIDistance creates FileDistance lazily for each URI scheme encountered. In +// practice this is a small constant factor. +// +//===-------------------------------------------------------------------------// + +#include "FileDistance.h" +#include "Logger.h" +#include "llvm/ADT/STLExtras.h" +#include <queue> + +namespace clang { +namespace clangd { +using namespace llvm; + +// Convert a path into the canonical form. +// Canonical form is either "/", or "/segment" * N: +// C:\foo\bar --> /c:/foo/bar +// /foo/ --> /foo +// a/b/c --> /a/b/c +static SmallString<128> canonicalize(StringRef Path) { + SmallString<128> Result = Path.rtrim('/'); + native(Result, sys::path::Style::posix); + if (Result.empty() || Result.front() != '/') + Result.insert(Result.begin(), '/'); + return Result; +} + +constexpr const unsigned FileDistance::kUnreachable; + +FileDistance::FileDistance(StringMap<SourceParams> Sources, + const FileDistanceOptions &Opts) + : Opts(Opts) { + llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges; + // Compute the best distance following only up edges. + // Keep track of down edges, in case we can use them to improve on this. + for (const auto &S : Sources) { + auto Canonical = canonicalize(S.getKey()); + dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, + S.second.MaxUpTraversals); + // Walk up to ancestors of this source, assigning cost. + StringRef Rest = Canonical; + llvm::hash_code Hash = hash_value(Rest); + for (unsigned I = 0; !Rest.empty(); ++I) { + Rest = parent_path(Rest, sys::path::Style::posix); + auto NextHash = hash_value(Rest); + auto &Down = DownEdges[NextHash]; + if (std::find(Down.begin(), Down.end(), Hash) == Down.end()) + DownEdges[NextHash].push_back(Hash); + // We can't just break after MaxUpTraversals, must still set DownEdges. + if (I > S.getValue().MaxUpTraversals) { + if (Cache.find(Hash) != Cache.end()) + break; + } else { + unsigned Cost = S.getValue().Cost + I * Opts.UpCost; + auto R = Cache.try_emplace(Hash, Cost); + if (!R.second) { + if (Cost < R.first->second) { + R.first->second = Cost; + } else { + // If we're not the best way to get to this path, stop assigning. + break; + } + } + } + Hash = NextHash; + } + } + // Now propagate scores parent -> child if that's an improvement. + // BFS ensures we propagate down chains (must visit parents before children). + std::queue<hash_code> Next; + for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef("")))) + Next.push(Child); + while (!Next.empty()) { + auto ParentCost = Cache.lookup(Next.front()); + for (auto Child : DownEdges.lookup(Next.front())) { + auto &ChildCost = + Cache.try_emplace(Child, kUnreachable).first->getSecond(); + if (ParentCost + Opts.DownCost < ChildCost) + ChildCost = ParentCost + Opts.DownCost; + Next.push(Child); + } + Next.pop(); + } +} + +unsigned FileDistance::distance(StringRef Path) { + auto Canonical = canonicalize(Path); + unsigned Cost = kUnreachable; + SmallVector<hash_code, 16> Ancestors; + // Walk up ancestors until we find a path we know the distance for. + for (StringRef Rest = Canonical; !Rest.empty(); + Rest = parent_path(Rest, sys::path::Style::posix)) { + auto Hash = hash_value(Rest); + auto It = Cache.find(Hash); + if (It != Cache.end()) { + Cost = It->second; + break; + } + Ancestors.push_back(Hash); + } + // Now we know the costs for (known node, queried node]. + // Fill these in, walking down the directory tree. + for (hash_code Hash : reverse(Ancestors)) { + if (Cost != kUnreachable) + Cost += Opts.DownCost; + Cache.try_emplace(Hash, Cost); + } + dlog("distance({0} = {1})", Path, Cost); + return Cost; +} + +unsigned URIDistance::distance(llvm::StringRef URI) { + auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable); + if (!R.second) + return R.first->getSecond(); + if (auto U = clangd::URI::parse(URI)) { + dlog("distance({0} = {1})", URI, U->body()); + R.first->second = forScheme(U->scheme()).distance(U->body()); + } else { + log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError()); + } + return R.first->second; +} + +FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { + auto &Delegate = ByScheme[Scheme]; + if (!Delegate) { + llvm::StringMap<SourceParams> SchemeSources; + for (const auto &Source : Sources) { + if (auto U = clangd::URI::create(Source.getKey(), Scheme)) + SchemeSources.try_emplace(U->body(), Source.getValue()); + else + consumeError(U.takeError()); + } + dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme, + SchemeSources.size(), Sources.size()); + Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); + } + return *Delegate; +} + +} // namespace clangd +} // namespace clang |