1 //===- FileMatchTrie.cpp --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of a FileMatchTrie. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Tooling/FileMatchTrie.h" 14 #include "llvm/ADT/StringMap.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/Support/FileSystem.h" 17 #include "llvm/Support/Path.h" 18 #include <string> 19 #include <vector> 20 21 using namespace clang; 22 using namespace tooling; 23 24 namespace { 25 26 /// Default \c PathComparator using \c llvm::sys::fs::equivalent(). 27 struct DefaultPathComparator : public PathComparator { 28 bool equivalent(StringRef FileA, StringRef FileB) const override { 29 return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB); 30 } 31 }; 32 33 } // namespace 34 35 namespace clang { 36 namespace tooling { 37 38 /// A node of the \c FileMatchTrie. 39 /// 40 /// Each node has storage for up to one path and a map mapping a path segment to 41 /// child nodes. The trie starts with an empty root node. 42 class FileMatchTrieNode { 43 public: 44 /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes 45 /// the number of \c NewPath's trailing characters already consumed during 46 /// recursion. 47 /// 48 /// An insert of a path 49 /// 'p'starts at the root node and does the following: 50 /// - If the node is empty, insert 'p' into its storage and abort. 51 /// - If the node has a path 'p2' but no children, take the last path segment 52 /// 's' of 'p2', put a new child into the map at 's' an insert the rest of 53 /// 'p2' there. 54 /// - Insert a new child for the last segment of 'p' and insert the rest of 55 /// 'p' there. 56 /// 57 /// An insert operation is linear in the number of a path's segments. 58 void insert(StringRef NewPath, unsigned ConsumedLength = 0) { 59 // We cannot put relative paths into the FileMatchTrie as then a path can be 60 // a postfix of another path, violating a core assumption of the trie. 61 if (llvm::sys::path::is_relative(NewPath)) 62 return; 63 if (Path.empty()) { 64 // This is an empty leaf. Store NewPath and return. 65 Path = std::string(NewPath); 66 return; 67 } 68 if (Children.empty()) { 69 // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. 70 if (NewPath == Path) 71 return; 72 // Make this a node and create a child-leaf with 'Path'. 73 StringRef Element(llvm::sys::path::filename( 74 StringRef(Path).drop_back(ConsumedLength))); 75 Children[Element].Path = Path; 76 } 77 StringRef Element(llvm::sys::path::filename( 78 StringRef(NewPath).drop_back(ConsumedLength))); 79 Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); 80 } 81 82 /// Tries to find the node under this \c FileMatchTrieNode that best 83 /// matches 'FileName'. 84 /// 85 /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to 86 /// \c true and an empty string is returned. If no path fits 'FileName', an 87 /// empty string is returned. \c ConsumedLength denotes the number of 88 /// \c Filename's trailing characters already consumed during recursion. 89 /// 90 /// To find the best matching node for a given path 'p', the 91 /// \c findEquivalent() function is called recursively for each path segment 92 /// (back to front) of 'p' until a node 'n' is reached that does not .. 93 /// - .. have children. In this case it is checked 94 /// whether the stored path is equivalent to 'p'. If yes, the best match is 95 /// found. Otherwise continue with the parent node as if this node did not 96 /// exist. 97 /// - .. a child matching the next path segment. In this case, all children of 98 /// 'n' are an equally good match for 'p'. All children are of 'n' are found 99 /// recursively and their equivalence to 'p' is determined. If none are 100 /// equivalent, continue with the parent node as if 'n' didn't exist. If one 101 /// is equivalent, the best match is found. Otherwise, report and ambigiuity 102 /// error. 103 StringRef findEquivalent(const PathComparator& Comparator, 104 StringRef FileName, 105 bool &IsAmbiguous, 106 unsigned ConsumedLength = 0) const { 107 // Note: we support only directory symlinks for performance reasons. 108 if (Children.empty()) { 109 // As far as we do not support file symlinks, compare 110 // basenames here to avoid request to file system. 111 if (llvm::sys::path::filename(Path) == 112 llvm::sys::path::filename(FileName) && 113 Comparator.equivalent(StringRef(Path), FileName)) 114 return StringRef(Path); 115 return {}; 116 } 117 StringRef Element(llvm::sys::path::filename(FileName.drop_back( 118 ConsumedLength))); 119 llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild = 120 Children.find(Element); 121 if (MatchingChild != Children.end()) { 122 StringRef Result = MatchingChild->getValue().findEquivalent( 123 Comparator, FileName, IsAmbiguous, 124 ConsumedLength + Element.size() + 1); 125 if (!Result.empty() || IsAmbiguous) 126 return Result; 127 } 128 129 // If `ConsumedLength` is zero, this is the root and we have no filename 130 // match. Give up in this case, we don't try to find symlinks with 131 // different names. 132 if (ConsumedLength == 0) 133 return {}; 134 135 std::vector<StringRef> AllChildren; 136 getAll(AllChildren, MatchingChild); 137 StringRef Result; 138 for (const auto &Child : AllChildren) { 139 if (Comparator.equivalent(Child, FileName)) { 140 if (Result.empty()) { 141 Result = Child; 142 } else { 143 IsAmbiguous = true; 144 return {}; 145 } 146 } 147 } 148 return Result; 149 } 150 151 private: 152 /// Gets all paths under this FileMatchTrieNode. 153 void getAll(std::vector<StringRef> &Results, 154 llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const { 155 if (Path.empty()) 156 return; 157 if (Children.empty()) { 158 Results.push_back(StringRef(Path)); 159 return; 160 } 161 for (llvm::StringMap<FileMatchTrieNode>::const_iterator 162 It = Children.begin(), E = Children.end(); 163 It != E; ++It) { 164 if (It == Except) 165 continue; 166 It->getValue().getAll(Results, Children.end()); 167 } 168 } 169 170 // The stored absolute path in this node. Only valid for leaf nodes, i.e. 171 // nodes where Children.empty(). 172 std::string Path; 173 174 // The children of this node stored in a map based on the next path segment. 175 llvm::StringMap<FileMatchTrieNode> Children; 176 }; 177 178 } // namespace tooling 179 } // namespace clang 180 181 FileMatchTrie::FileMatchTrie() 182 : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {} 183 184 FileMatchTrie::FileMatchTrie(PathComparator *Comparator) 185 : Root(new FileMatchTrieNode), Comparator(Comparator) {} 186 187 FileMatchTrie::~FileMatchTrie() { 188 delete Root; 189 } 190 191 void FileMatchTrie::insert(StringRef NewPath) { 192 Root->insert(NewPath); 193 } 194 195 StringRef FileMatchTrie::findEquivalent(StringRef FileName, 196 raw_ostream &Error) const { 197 if (llvm::sys::path::is_relative(FileName)) { 198 Error << "Cannot resolve relative paths"; 199 return {}; 200 } 201 bool IsAmbiguous = false; 202 StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous); 203 if (IsAmbiguous) 204 Error << "Path is ambiguous"; 205 return Result; 206 } 207