1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a glob pattern matcher. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/GlobPattern.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/Optional.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/Support/Errc.h" 18 19 using namespace llvm; 20 21 static bool hasWildcard(StringRef S) { 22 return S.find_first_of("?*[\\") != StringRef::npos; 23 } 24 25 // Expands character ranges and returns a bitmap. 26 // For example, "a-cf-hz" is expanded to "abcfghz". 27 static Expected<BitVector> expand(StringRef S, StringRef Original) { 28 BitVector BV(256, false); 29 30 // Expand X-Y. 31 for (;;) { 32 if (S.size() < 3) 33 break; 34 35 uint8_t Start = S[0]; 36 uint8_t End = S[2]; 37 38 // If it doesn't start with something like X-Y, 39 // consume the first character and proceed. 40 if (S[1] != '-') { 41 BV[Start] = true; 42 S = S.substr(1); 43 continue; 44 } 45 46 // It must be in the form of X-Y. 47 // Validate it and then interpret the range. 48 if (Start > End) 49 return make_error<StringError>("invalid glob pattern: " + Original, 50 errc::invalid_argument); 51 52 for (int C = Start; C <= End; ++C) 53 BV[(uint8_t)C] = true; 54 S = S.substr(3); 55 } 56 57 for (char C : S) 58 BV[(uint8_t)C] = true; 59 return BV; 60 } 61 62 // This is a scanner for the glob pattern. 63 // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]" 64 // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is 65 // equivalent to "[^<chars>]"), or a non-meta character. 66 // This function returns the first token in S. 67 static Expected<BitVector> scan(StringRef &S, StringRef Original) { 68 switch (S[0]) { 69 case '*': 70 S = S.substr(1); 71 // '*' is represented by an empty bitvector. 72 // All other bitvectors are 256-bit long. 73 return BitVector(); 74 case '?': 75 S = S.substr(1); 76 return BitVector(256, true); 77 case '[': { 78 // ']' is allowed as the first character of a character class. '[]' is 79 // invalid. So, just skip the first character. 80 size_t End = S.find(']', 2); 81 if (End == StringRef::npos) 82 return make_error<StringError>("invalid glob pattern: " + Original, 83 errc::invalid_argument); 84 85 StringRef Chars = S.substr(1, End - 1); 86 S = S.substr(End + 1); 87 if (Chars.startswith("^") || Chars.startswith("!")) { 88 Expected<BitVector> BV = expand(Chars.substr(1), Original); 89 if (!BV) 90 return BV.takeError(); 91 return BV->flip(); 92 } 93 return expand(Chars, Original); 94 } 95 case '\\': 96 // Eat this character and fall through below to treat it like a non-meta 97 // character. 98 S = S.substr(1); 99 LLVM_FALLTHROUGH; 100 default: 101 BitVector BV(256, false); 102 BV[(uint8_t)S[0]] = true; 103 S = S.substr(1); 104 return BV; 105 } 106 } 107 108 Expected<GlobPattern> GlobPattern::create(StringRef S) { 109 GlobPattern Pat; 110 111 // S doesn't contain any metacharacter, 112 // so the regular string comparison should work. 113 if (!hasWildcard(S)) { 114 Pat.Exact = S; 115 return Pat; 116 } 117 118 // S is something like "foo*", and the "* is not escaped. We can use 119 // startswith(). 120 if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { 121 Pat.Prefix = S.drop_back(); 122 return Pat; 123 } 124 125 // S is something like "*foo". We can use endswith(). 126 if (S.startswith("*") && !hasWildcard(S.drop_front())) { 127 Pat.Suffix = S.drop_front(); 128 return Pat; 129 } 130 131 // Otherwise, we need to do real glob pattern matching. 132 // Parse the pattern now. 133 StringRef Original = S; 134 while (!S.empty()) { 135 Expected<BitVector> BV = scan(S, Original); 136 if (!BV) 137 return BV.takeError(); 138 Pat.Tokens.push_back(*BV); 139 } 140 return Pat; 141 } 142 143 bool GlobPattern::match(StringRef S) const { 144 if (Exact) 145 return S == *Exact; 146 if (Prefix) 147 return S.startswith(*Prefix); 148 if (Suffix) 149 return S.endswith(*Suffix); 150 return matchOne(Tokens, S); 151 } 152 153 // Runs glob pattern Pats against string S. 154 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { 155 for (;;) { 156 if (Pats.empty()) 157 return S.empty(); 158 159 // If Pats[0] is '*', try to match Pats[1..] against all possible 160 // tail strings of S to see at least one pattern succeeds. 161 if (Pats[0].size() == 0) { 162 Pats = Pats.slice(1); 163 if (Pats.empty()) 164 // Fast path. If a pattern is '*', it matches anything. 165 return true; 166 for (size_t I = 0, E = S.size(); I < E; ++I) 167 if (matchOne(Pats, S.substr(I))) 168 return true; 169 return false; 170 } 171 172 // If Pats[0] is not '*', it must consume one character. 173 if (S.empty() || !Pats[0][(uint8_t)S[0]]) 174 return false; 175 Pats = Pats.slice(1); 176 S = S.substr(1); 177 } 178 } 179