aboutsummaryrefslogtreecommitdiff
path: root/clangd/FileDistance.h
diff options
context:
space:
mode:
Diffstat (limited to 'clangd/FileDistance.h')
-rw-r--r--clangd/FileDistance.h109
1 files changed, 109 insertions, 0 deletions
diff --git a/clangd/FileDistance.h b/clangd/FileDistance.h
new file mode 100644
index 00000000..f07804fc
--- /dev/null
+++ b/clangd/FileDistance.h
@@ -0,0 +1,109 @@
+//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library measures the distance between file paths.
+// It's used for ranking symbols, e.g. in code completion.
+// |foo/bar.h -> foo/bar.h| = 0.
+// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
+// This is an edit-distance, where edits go up or down the directory tree.
+// It's not symmetrical, the costs of going up and down may not match.
+//
+// Dealing with multiple sources:
+// In practice we care about the distance from a source file, but files near
+// its main-header and #included files are considered "close".
+// So we start with a set of (anchor, cost) pairs, and call the distance to a
+// path the minimum of `cost + |source -> path|`.
+//
+// We allow each source to limit the number of up-traversals paths may start
+// with. Up-traversals may reach things that are not "semantically near".
+//
+// Symbol URI schemes:
+// Symbol locations may be represented by URIs rather than file paths directly.
+// In this case we want to perform distance computations in URI space rather
+// than in file-space, without performing redundant conversions.
+// Therefore we have a lookup structure that accepts URIs, so that intermediate
+// calculations for the same scheme can be reused.
+//
+// Caveats:
+// Assuming up and down traversals each have uniform costs is simplistic.
+// Often there are "semantic roots" whose children are almost unrelated.
+// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
+//
+//===----------------------------------------------------------------------===//
+
+#include "URI.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/StringSaver.h"
+
+namespace clang {
+namespace clangd {
+
+struct FileDistanceOptions {
+ unsigned UpCost = 2; // |foo/bar.h -> foo|
+ unsigned DownCost = 1; // |foo -> foo/bar.h|
+ unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
+};
+
+struct SourceParams {
+ // Base cost for paths starting at this source.
+ unsigned Cost = 0;
+ // Limits the number of upwards traversals allowed from this source.
+ unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
+};
+
+// Supports lookups to find the minimum distance to a file from any source.
+// This object should be reused, it memoizes intermediate computations.
+class FileDistance {
+public:
+ static constexpr unsigned kUnreachable = std::numeric_limits<unsigned>::max();
+
+ FileDistance(llvm::StringMap<SourceParams> Sources,
+ const FileDistanceOptions &Opts = {});
+
+ // Computes the minimum distance from any source to the file path.
+ unsigned distance(llvm::StringRef Path);
+
+private:
+ // Costs computed so far. Always contains sources and their ancestors.
+ // We store hash codes only. Collisions are rare and consequences aren't dire.
+ llvm::DenseMap<llvm::hash_code, unsigned> Cache;
+ FileDistanceOptions Opts;
+};
+
+// Supports lookups like FileDistance, but the lookup keys are URIs.
+// We convert each of the sources to the scheme of the URI and do a FileDistance
+// comparison on the bodies.
+class URIDistance {
+public:
+ URIDistance(llvm::StringMap<SourceParams> Sources,
+ const FileDistanceOptions &Opts = {})
+ : Sources(Sources), Opts(Opts) {}
+
+ // Computes the minimum distance from any source to the URI.
+ // Only sources that can be mapped into the URI's scheme are considered.
+ unsigned distance(llvm::StringRef URI);
+
+private:
+ // Returns the FileDistance for a URI scheme, creating it if needed.
+ FileDistance &forScheme(llvm::StringRef Scheme);
+
+ // We cache the results using the original strings so we can skip URI parsing.
+ llvm::DenseMap<llvm::hash_code, unsigned> Cache;
+ llvm::StringMap<SourceParams> Sources;
+ llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
+ FileDistanceOptions Opts;
+};
+
+} // namespace clangd
+} // namespace clang