xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/FileMatchTrie.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- FileMatchTrie.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file contains the implementation of a FileMatchTrie.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Tooling/FileMatchTrie.h"
14 #include "llvm/ADT/StringMap.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Support/FileSystem.h"
17 #include "llvm/Support/Path.h"
18 #include <string>
19 #include <vector>
20 
21 using namespace clang;
22 using namespace tooling;
23 
24 namespace {
25 
26 /// Default \c PathComparator using \c llvm::sys::fs::equivalent().
27 struct DefaultPathComparator : public PathComparator {
28   bool equivalent(StringRef FileA, StringRef FileB) const override {
29     return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
30   }
31 };
32 
33 } // namespace
34 
35 namespace clang {
36 namespace tooling {
37 
38 /// A node of the \c FileMatchTrie.
39 ///
40 /// Each node has storage for up to one path and a map mapping a path segment to
41 /// child nodes. The trie starts with an empty root node.
42 class FileMatchTrieNode {
43 public:
44   /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
45   /// the number of \c NewPath's trailing characters already consumed during
46   /// recursion.
47   ///
48   /// An insert of a path
49   /// 'p'starts at the root node and does the following:
50   /// - If the node is empty, insert 'p' into its storage and abort.
51   /// - If the node has a path 'p2' but no children, take the last path segment
52   ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
53   ///   'p2' there.
54   /// - Insert a new child for the last segment of 'p' and insert the rest of
55   ///   'p' there.
56   ///
57   /// An insert operation is linear in the number of a path's segments.
58   void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
59     // We cannot put relative paths into the FileMatchTrie as then a path can be
60     // a postfix of another path, violating a core assumption of the trie.
61     if (llvm::sys::path::is_relative(NewPath))
62       return;
63     if (Path.empty()) {
64       // This is an empty leaf. Store NewPath and return.
65       Path = std::string(NewPath);
66       return;
67     }
68     if (Children.empty()) {
69       // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
70       if (NewPath == Path)
71           return;
72       // Make this a node and create a child-leaf with 'Path'.
73       StringRef Element(llvm::sys::path::filename(
74           StringRef(Path).drop_back(ConsumedLength)));
75       Children[Element].Path = Path;
76     }
77     StringRef Element(llvm::sys::path::filename(
78           StringRef(NewPath).drop_back(ConsumedLength)));
79     Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
80   }
81 
82   /// Tries to find the node under this \c FileMatchTrieNode that best
83   /// matches 'FileName'.
84   ///
85   /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
86   /// \c true and an empty string is returned. If no path fits 'FileName', an
87   /// empty string is returned. \c ConsumedLength denotes the number of
88   /// \c Filename's trailing characters already consumed during recursion.
89   ///
90   /// To find the best matching node for a given path 'p', the
91   /// \c findEquivalent() function is called recursively for each path segment
92   /// (back to front) of 'p' until a node 'n' is reached that does not ..
93   /// - .. have children. In this case it is checked
94   ///   whether the stored path is equivalent to 'p'. If yes, the best match is
95   ///   found. Otherwise continue with the parent node as if this node did not
96   ///   exist.
97   /// - .. a child matching the next path segment. In this case, all children of
98   ///   'n' are an equally good match for 'p'. All children are of 'n' are found
99   ///   recursively and their equivalence to 'p' is determined. If none are
100   ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
101   ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
102   ///   error.
103   StringRef findEquivalent(const PathComparator& Comparator,
104                            StringRef FileName,
105                            bool &IsAmbiguous,
106                            unsigned ConsumedLength = 0) const {
107     // Note: we support only directory symlinks for performance reasons.
108     if (Children.empty()) {
109       // As far as we do not support file symlinks, compare
110       // basenames here to avoid request to file system.
111       if (llvm::sys::path::filename(Path) ==
112               llvm::sys::path::filename(FileName) &&
113           Comparator.equivalent(StringRef(Path), FileName))
114         return StringRef(Path);
115       return {};
116     }
117     StringRef Element(llvm::sys::path::filename(FileName.drop_back(
118         ConsumedLength)));
119     llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
120         Children.find(Element);
121     if (MatchingChild != Children.end()) {
122       StringRef Result = MatchingChild->getValue().findEquivalent(
123           Comparator, FileName, IsAmbiguous,
124           ConsumedLength + Element.size() + 1);
125       if (!Result.empty() || IsAmbiguous)
126         return Result;
127     }
128 
129     // If `ConsumedLength` is zero, this is the root and we have no filename
130     // match. Give up in this case, we don't try to find symlinks with
131     // different names.
132     if (ConsumedLength == 0)
133       return {};
134 
135     std::vector<StringRef> AllChildren;
136     getAll(AllChildren, MatchingChild);
137     StringRef Result;
138     for (const auto &Child : AllChildren) {
139       if (Comparator.equivalent(Child, FileName)) {
140         if (Result.empty()) {
141           Result = Child;
142         } else {
143           IsAmbiguous = true;
144           return {};
145         }
146       }
147     }
148     return Result;
149   }
150 
151 private:
152   /// Gets all paths under this FileMatchTrieNode.
153   void getAll(std::vector<StringRef> &Results,
154               llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
155     if (Path.empty())
156       return;
157     if (Children.empty()) {
158       Results.push_back(StringRef(Path));
159       return;
160     }
161     for (llvm::StringMap<FileMatchTrieNode>::const_iterator
162          It = Children.begin(), E = Children.end();
163          It != E; ++It) {
164       if (It == Except)
165         continue;
166       It->getValue().getAll(Results, Children.end());
167     }
168   }
169 
170   // The stored absolute path in this node. Only valid for leaf nodes, i.e.
171   // nodes where Children.empty().
172   std::string Path;
173 
174   // The children of this node stored in a map based on the next path segment.
175   llvm::StringMap<FileMatchTrieNode> Children;
176 };
177 
178 } // namespace tooling
179 } // namespace clang
180 
181 FileMatchTrie::FileMatchTrie()
182     : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
183 
184 FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
185     : Root(new FileMatchTrieNode), Comparator(Comparator) {}
186 
187 FileMatchTrie::~FileMatchTrie() {
188   delete Root;
189 }
190 
191 void FileMatchTrie::insert(StringRef NewPath) {
192   Root->insert(NewPath);
193 }
194 
195 StringRef FileMatchTrie::findEquivalent(StringRef FileName,
196                                         raw_ostream &Error) const {
197   if (llvm::sys::path::is_relative(FileName)) {
198     Error << "Cannot resolve relative paths";
199     return {};
200   }
201   bool IsAmbiguous = false;
202   StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
203   if (IsAmbiguous)
204     Error << "Path is ambiguous";
205   return Result;
206 }
207