diff options
Diffstat (limited to 'clangd/FileDistance.h')
-rw-r--r-- | clangd/FileDistance.h | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/clangd/FileDistance.h b/clangd/FileDistance.h new file mode 100644 index 00000000..f07804fc --- /dev/null +++ b/clangd/FileDistance.h @@ -0,0 +1,109 @@ +//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library measures the distance between file paths. +// It's used for ranking symbols, e.g. in code completion. +// |foo/bar.h -> foo/bar.h| = 0. +// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|. +// This is an edit-distance, where edits go up or down the directory tree. +// It's not symmetrical, the costs of going up and down may not match. +// +// Dealing with multiple sources: +// In practice we care about the distance from a source file, but files near +// its main-header and #included files are considered "close". +// So we start with a set of (anchor, cost) pairs, and call the distance to a +// path the minimum of `cost + |source -> path|`. +// +// We allow each source to limit the number of up-traversals paths may start +// with. Up-traversals may reach things that are not "semantically near". +// +// Symbol URI schemes: +// Symbol locations may be represented by URIs rather than file paths directly. +// In this case we want to perform distance computations in URI space rather +// than in file-space, without performing redundant conversions. +// Therefore we have a lookup structure that accepts URIs, so that intermediate +// calculations for the same scheme can be reused. +// +// Caveats: +// Assuming up and down traversals each have uniform costs is simplistic. +// Often there are "semantic roots" whose children are almost unrelated. +// (e.g. /usr/include/, or / in an umbrella repository). We ignore this. +// +//===----------------------------------------------------------------------===// + +#include "URI.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" + +namespace clang { +namespace clangd { + +struct FileDistanceOptions { + unsigned UpCost = 2; // |foo/bar.h -> foo| + unsigned DownCost = 1; // |foo -> foo/bar.h| + unsigned IncludeCost = 2; // |foo.cc -> included_header.h| +}; + +struct SourceParams { + // Base cost for paths starting at this source. + unsigned Cost = 0; + // Limits the number of upwards traversals allowed from this source. + unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max(); +}; + +// Supports lookups to find the minimum distance to a file from any source. +// This object should be reused, it memoizes intermediate computations. +class FileDistance { +public: + static constexpr unsigned kUnreachable = std::numeric_limits<unsigned>::max(); + + FileDistance(llvm::StringMap<SourceParams> Sources, + const FileDistanceOptions &Opts = {}); + + // Computes the minimum distance from any source to the file path. + unsigned distance(llvm::StringRef Path); + +private: + // Costs computed so far. Always contains sources and their ancestors. + // We store hash codes only. Collisions are rare and consequences aren't dire. + llvm::DenseMap<llvm::hash_code, unsigned> Cache; + FileDistanceOptions Opts; +}; + +// Supports lookups like FileDistance, but the lookup keys are URIs. +// We convert each of the sources to the scheme of the URI and do a FileDistance +// comparison on the bodies. +class URIDistance { +public: + URIDistance(llvm::StringMap<SourceParams> Sources, + const FileDistanceOptions &Opts = {}) + : Sources(Sources), Opts(Opts) {} + + // Computes the minimum distance from any source to the URI. + // Only sources that can be mapped into the URI's scheme are considered. + unsigned distance(llvm::StringRef URI); + +private: + // Returns the FileDistance for a URI scheme, creating it if needed. + FileDistance &forScheme(llvm::StringRef Scheme); + + // We cache the results using the original strings so we can skip URI parsing. + llvm::DenseMap<llvm::hash_code, unsigned> Cache; + llvm::StringMap<SourceParams> Sources; + llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme; + FileDistanceOptions Opts; +}; + +} // namespace clangd +} // namespace clang |