aboutsummaryrefslogtreecommitdiff
path: root/clangd/FileDistance.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clangd/FileDistance.cpp')
-rw-r--r--clangd/FileDistance.cpp171
1 files changed, 171 insertions, 0 deletions
diff --git a/clangd/FileDistance.cpp b/clangd/FileDistance.cpp
new file mode 100644
index 00000000..a0ce25b4
--- /dev/null
+++ b/clangd/FileDistance.cpp
@@ -0,0 +1,171 @@
+//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The FileDistance structure allows calculating the minimum distance to paths
+// in a single tree.
+// We simply walk up the path's ancestors until we find a node whose cost is
+// known, and add the cost of walking back down. Initialization ensures this
+// gives the correct path to the roots.
+// We cache the results, so that the runtime is O(|A|), where A is the set of
+// all distinct ancestors of visited paths.
+//
+// Example after initialization with /=2, /bar=0, DownCost = 1:
+// / = 2
+// /bar = 0
+//
+// After querying /foo/bar and /bar/foo:
+// / = 2
+// /bar = 0
+// /bar/foo = 1
+// /foo = 3
+// /foo/bar = 4
+//
+// URIDistance creates FileDistance lazily for each URI scheme encountered. In
+// practice this is a small constant factor.
+//
+//===-------------------------------------------------------------------------//
+
+#include "FileDistance.h"
+#include "Logger.h"
+#include "llvm/ADT/STLExtras.h"
+#include <queue>
+
+namespace clang {
+namespace clangd {
+using namespace llvm;
+
+// Convert a path into the canonical form.
+// Canonical form is either "/", or "/segment" * N:
+// C:\foo\bar --> /c:/foo/bar
+// /foo/ --> /foo
+// a/b/c --> /a/b/c
+static SmallString<128> canonicalize(StringRef Path) {
+ SmallString<128> Result = Path.rtrim('/');
+ native(Result, sys::path::Style::posix);
+ if (Result.empty() || Result.front() != '/')
+ Result.insert(Result.begin(), '/');
+ return Result;
+}
+
+constexpr const unsigned FileDistance::kUnreachable;
+
+FileDistance::FileDistance(StringMap<SourceParams> Sources,
+ const FileDistanceOptions &Opts)
+ : Opts(Opts) {
+ llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges;
+ // Compute the best distance following only up edges.
+ // Keep track of down edges, in case we can use them to improve on this.
+ for (const auto &S : Sources) {
+ auto Canonical = canonicalize(S.getKey());
+ dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
+ S.second.MaxUpTraversals);
+ // Walk up to ancestors of this source, assigning cost.
+ StringRef Rest = Canonical;
+ llvm::hash_code Hash = hash_value(Rest);
+ for (unsigned I = 0; !Rest.empty(); ++I) {
+ Rest = parent_path(Rest, sys::path::Style::posix);
+ auto NextHash = hash_value(Rest);
+ auto &Down = DownEdges[NextHash];
+ if (std::find(Down.begin(), Down.end(), Hash) == Down.end())
+ DownEdges[NextHash].push_back(Hash);
+ // We can't just break after MaxUpTraversals, must still set DownEdges.
+ if (I > S.getValue().MaxUpTraversals) {
+ if (Cache.find(Hash) != Cache.end())
+ break;
+ } else {
+ unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
+ auto R = Cache.try_emplace(Hash, Cost);
+ if (!R.second) {
+ if (Cost < R.first->second) {
+ R.first->second = Cost;
+ } else {
+ // If we're not the best way to get to this path, stop assigning.
+ break;
+ }
+ }
+ }
+ Hash = NextHash;
+ }
+ }
+ // Now propagate scores parent -> child if that's an improvement.
+ // BFS ensures we propagate down chains (must visit parents before children).
+ std::queue<hash_code> Next;
+ for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef(""))))
+ Next.push(Child);
+ while (!Next.empty()) {
+ auto ParentCost = Cache.lookup(Next.front());
+ for (auto Child : DownEdges.lookup(Next.front())) {
+ auto &ChildCost =
+ Cache.try_emplace(Child, kUnreachable).first->getSecond();
+ if (ParentCost + Opts.DownCost < ChildCost)
+ ChildCost = ParentCost + Opts.DownCost;
+ Next.push(Child);
+ }
+ Next.pop();
+ }
+}
+
+unsigned FileDistance::distance(StringRef Path) {
+ auto Canonical = canonicalize(Path);
+ unsigned Cost = kUnreachable;
+ SmallVector<hash_code, 16> Ancestors;
+ // Walk up ancestors until we find a path we know the distance for.
+ for (StringRef Rest = Canonical; !Rest.empty();
+ Rest = parent_path(Rest, sys::path::Style::posix)) {
+ auto Hash = hash_value(Rest);
+ auto It = Cache.find(Hash);
+ if (It != Cache.end()) {
+ Cost = It->second;
+ break;
+ }
+ Ancestors.push_back(Hash);
+ }
+ // Now we know the costs for (known node, queried node].
+ // Fill these in, walking down the directory tree.
+ for (hash_code Hash : reverse(Ancestors)) {
+ if (Cost != kUnreachable)
+ Cost += Opts.DownCost;
+ Cache.try_emplace(Hash, Cost);
+ }
+ dlog("distance({0} = {1})", Path, Cost);
+ return Cost;
+}
+
+unsigned URIDistance::distance(llvm::StringRef URI) {
+ auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable);
+ if (!R.second)
+ return R.first->getSecond();
+ if (auto U = clangd::URI::parse(URI)) {
+ dlog("distance({0} = {1})", URI, U->body());
+ R.first->second = forScheme(U->scheme()).distance(U->body());
+ } else {
+ log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
+ }
+ return R.first->second;
+}
+
+FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
+ auto &Delegate = ByScheme[Scheme];
+ if (!Delegate) {
+ llvm::StringMap<SourceParams> SchemeSources;
+ for (const auto &Source : Sources) {
+ if (auto U = clangd::URI::create(Source.getKey(), Scheme))
+ SchemeSources.try_emplace(U->body(), Source.getValue());
+ else
+ consumeError(U.takeError());
+ }
+ dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
+ SchemeSources.size(), Sources.size());
+ Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
+ }
+ return *Delegate;
+}
+
+} // namespace clangd
+} // namespace clang