1 //===- FileMatchTrie.cpp --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of a FileMatchTrie. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Tooling/FileMatchTrie.h" 14 #include "llvm/ADT/StringMap.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/Support/FileSystem.h" 17 #include "llvm/Support/Path.h" 18 #include "llvm/Support/raw_ostream.h" 19 #include <string> 20 #include <vector> 21 22 using namespace clang; 23 using namespace tooling; 24 25 namespace { 26 27 /// Default \c PathComparator using \c llvm::sys::fs::equivalent(). 28 struct DefaultPathComparator : public PathComparator { 29 bool equivalent(StringRef FileA, StringRef FileB) const override { 30 return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB); 31 } 32 }; 33 34 } // namespace 35 36 namespace clang { 37 namespace tooling { 38 39 /// A node of the \c FileMatchTrie. 40 /// 41 /// Each node has storage for up to one path and a map mapping a path segment to 42 /// child nodes. The trie starts with an empty root node. 43 class FileMatchTrieNode { 44 public: 45 /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes 46 /// the number of \c NewPath's trailing characters already consumed during 47 /// recursion. 48 /// 49 /// An insert of a path 50 /// 'p'starts at the root node and does the following: 51 /// - If the node is empty, insert 'p' into its storage and abort. 52 /// - If the node has a path 'p2' but no children, take the last path segment 53 /// 's' of 'p2', put a new child into the map at 's' an insert the rest of 54 /// 'p2' there. 55 /// - Insert a new child for the last segment of 'p' and insert the rest of 56 /// 'p' there. 57 /// 58 /// An insert operation is linear in the number of a path's segments. 59 void insert(StringRef NewPath, unsigned ConsumedLength = 0) { 60 // We cannot put relative paths into the FileMatchTrie as then a path can be 61 // a postfix of another path, violating a core assumption of the trie. 62 if (llvm::sys::path::is_relative(NewPath)) 63 return; 64 if (Path.empty()) { 65 // This is an empty leaf. Store NewPath and return. 66 Path = std::string(NewPath); 67 return; 68 } 69 if (Children.empty()) { 70 // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. 71 if (NewPath == Path) 72 return; 73 // Make this a node and create a child-leaf with 'Path'. 74 StringRef Element(llvm::sys::path::filename( 75 StringRef(Path).drop_back(ConsumedLength))); 76 Children[Element].Path = Path; 77 } 78 StringRef Element(llvm::sys::path::filename( 79 StringRef(NewPath).drop_back(ConsumedLength))); 80 Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); 81 } 82 83 /// Tries to find the node under this \c FileMatchTrieNode that best 84 /// matches 'FileName'. 85 /// 86 /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to 87 /// \c true and an empty string is returned. If no path fits 'FileName', an 88 /// empty string is returned. \c ConsumedLength denotes the number of 89 /// \c Filename's trailing characters already consumed during recursion. 90 /// 91 /// To find the best matching node for a given path 'p', the 92 /// \c findEquivalent() function is called recursively for each path segment 93 /// (back to front) of 'p' until a node 'n' is reached that does not .. 94 /// - .. have children. In this case it is checked 95 /// whether the stored path is equivalent to 'p'. If yes, the best match is 96 /// found. Otherwise continue with the parent node as if this node did not 97 /// exist. 98 /// - .. a child matching the next path segment. In this case, all children of 99 /// 'n' are an equally good match for 'p'. All children are of 'n' are found 100 /// recursively and their equivalence to 'p' is determined. If none are 101 /// equivalent, continue with the parent node as if 'n' didn't exist. If one 102 /// is equivalent, the best match is found. Otherwise, report and ambigiuity 103 /// error. 104 StringRef findEquivalent(const PathComparator& Comparator, 105 StringRef FileName, 106 bool &IsAmbiguous, 107 unsigned ConsumedLength = 0) const { 108 if (Children.empty()) { 109 if (Comparator.equivalent(StringRef(Path), FileName)) 110 return StringRef(Path); 111 return {}; 112 } 113 StringRef Element(llvm::sys::path::filename(FileName.drop_back( 114 ConsumedLength))); 115 llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild = 116 Children.find(Element); 117 if (MatchingChild != Children.end()) { 118 StringRef Result = MatchingChild->getValue().findEquivalent( 119 Comparator, FileName, IsAmbiguous, 120 ConsumedLength + Element.size() + 1); 121 if (!Result.empty() || IsAmbiguous) 122 return Result; 123 } 124 std::vector<StringRef> AllChildren; 125 getAll(AllChildren, MatchingChild); 126 StringRef Result; 127 for (const auto &Child : AllChildren) { 128 if (Comparator.equivalent(Child, FileName)) { 129 if (Result.empty()) { 130 Result = Child; 131 } else { 132 IsAmbiguous = true; 133 return {}; 134 } 135 } 136 } 137 return Result; 138 } 139 140 private: 141 /// Gets all paths under this FileMatchTrieNode. 142 void getAll(std::vector<StringRef> &Results, 143 llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const { 144 if (Path.empty()) 145 return; 146 if (Children.empty()) { 147 Results.push_back(StringRef(Path)); 148 return; 149 } 150 for (llvm::StringMap<FileMatchTrieNode>::const_iterator 151 It = Children.begin(), E = Children.end(); 152 It != E; ++It) { 153 if (It == Except) 154 continue; 155 It->getValue().getAll(Results, Children.end()); 156 } 157 } 158 159 // The stored absolute path in this node. Only valid for leaf nodes, i.e. 160 // nodes where Children.empty(). 161 std::string Path; 162 163 // The children of this node stored in a map based on the next path segment. 164 llvm::StringMap<FileMatchTrieNode> Children; 165 }; 166 167 } // namespace tooling 168 } // namespace clang 169 170 FileMatchTrie::FileMatchTrie() 171 : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {} 172 173 FileMatchTrie::FileMatchTrie(PathComparator *Comparator) 174 : Root(new FileMatchTrieNode), Comparator(Comparator) {} 175 176 FileMatchTrie::~FileMatchTrie() { 177 delete Root; 178 } 179 180 void FileMatchTrie::insert(StringRef NewPath) { 181 Root->insert(NewPath); 182 } 183 184 StringRef FileMatchTrie::findEquivalent(StringRef FileName, 185 raw_ostream &Error) const { 186 if (llvm::sys::path::is_relative(FileName)) { 187 Error << "Cannot resolve relative paths"; 188 return {}; 189 } 190 bool IsAmbiguous = false; 191 StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous); 192 if (IsAmbiguous) 193 Error << "Path is ambiguous"; 194 return Result; 195 } 196