1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a glob pattern matcher. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/GlobPattern.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/Support/Errc.h" 17 18 using namespace llvm; 19 20 static bool hasWildcard(StringRef S) { 21 return S.find_first_of("?*[\\") != StringRef::npos; 22 } 23 24 // Expands character ranges and returns a bitmap. 25 // For example, "a-cf-hz" is expanded to "abcfghz". 26 static Expected<BitVector> expand(StringRef S, StringRef Original) { 27 BitVector BV(256, false); 28 29 // Expand X-Y. 30 for (;;) { 31 if (S.size() < 3) 32 break; 33 34 uint8_t Start = S[0]; 35 uint8_t End = S[2]; 36 37 // If it doesn't start with something like X-Y, 38 // consume the first character and proceed. 39 if (S[1] != '-') { 40 BV[Start] = true; 41 S = S.substr(1); 42 continue; 43 } 44 45 // It must be in the form of X-Y. 46 // Validate it and then interpret the range. 47 if (Start > End) 48 return make_error<StringError>("invalid glob pattern: " + Original, 49 errc::invalid_argument); 50 51 for (int C = Start; C <= End; ++C) 52 BV[(uint8_t)C] = true; 53 S = S.substr(3); 54 } 55 56 for (char C : S) 57 BV[(uint8_t)C] = true; 58 return BV; 59 } 60 61 // This is a scanner for the glob pattern. 62 // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]" 63 // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is 64 // equivalent to "[^<chars>]"), or a non-meta character. 65 // This function returns the first token in S. 66 static Expected<BitVector> scan(StringRef &S, StringRef Original) { 67 switch (S[0]) { 68 case '*': 69 S = S.substr(1); 70 // '*' is represented by an empty bitvector. 71 // All other bitvectors are 256-bit long. 72 return BitVector(); 73 case '?': 74 S = S.substr(1); 75 return BitVector(256, true); 76 case '[': { 77 // ']' is allowed as the first character of a character class. '[]' is 78 // invalid. So, just skip the first character. 79 size_t End = S.find(']', 2); 80 if (End == StringRef::npos) 81 return make_error<StringError>("invalid glob pattern: " + Original, 82 errc::invalid_argument); 83 84 StringRef Chars = S.substr(1, End - 1); 85 S = S.substr(End + 1); 86 if (Chars.startswith("^") || Chars.startswith("!")) { 87 Expected<BitVector> BV = expand(Chars.substr(1), Original); 88 if (!BV) 89 return BV.takeError(); 90 return BV->flip(); 91 } 92 return expand(Chars, Original); 93 } 94 case '\\': 95 // Eat this character and fall through below to treat it like a non-meta 96 // character. 97 S = S.substr(1); 98 [[fallthrough]]; 99 default: 100 BitVector BV(256, false); 101 BV[(uint8_t)S[0]] = true; 102 S = S.substr(1); 103 return BV; 104 } 105 } 106 107 Expected<GlobPattern> GlobPattern::create(StringRef S) { 108 GlobPattern Pat; 109 110 // S doesn't contain any metacharacter, 111 // so the regular string comparison should work. 112 if (!hasWildcard(S)) { 113 Pat.Exact = S; 114 return Pat; 115 } 116 117 // S is something like "foo*", and the "* is not escaped. We can use 118 // startswith(). 119 if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { 120 Pat.Prefix = S.drop_back(); 121 return Pat; 122 } 123 124 // S is something like "*foo". We can use endswith(). 125 if (S.startswith("*") && !hasWildcard(S.drop_front())) { 126 Pat.Suffix = S.drop_front(); 127 return Pat; 128 } 129 130 // Otherwise, we need to do real glob pattern matching. 131 // Parse the pattern now. 132 StringRef Original = S; 133 while (!S.empty()) { 134 Expected<BitVector> BV = scan(S, Original); 135 if (!BV) 136 return BV.takeError(); 137 Pat.Tokens.push_back(*BV); 138 } 139 return Pat; 140 } 141 142 bool GlobPattern::match(StringRef S) const { 143 if (Exact) 144 return S == *Exact; 145 if (Prefix) 146 return S.startswith(*Prefix); 147 if (Suffix) 148 return S.endswith(*Suffix); 149 return matchOne(Tokens, S); 150 } 151 152 // Runs glob pattern Pats against string S. 153 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { 154 for (;;) { 155 if (Pats.empty()) 156 return S.empty(); 157 158 // If Pats[0] is '*', try to match Pats[1..] against all possible 159 // tail strings of S to see at least one pattern succeeds. 160 if (Pats[0].size() == 0) { 161 Pats = Pats.slice(1); 162 if (Pats.empty()) 163 // Fast path. If a pattern is '*', it matches anything. 164 return true; 165 for (size_t I = 0, E = S.size(); I < E; ++I) 166 if (matchOne(Pats, S.substr(I))) 167 return true; 168 return false; 169 } 170 171 // If Pats[0] is not '*', it must consume one character. 172 if (S.empty() || !Pats[0][(uint8_t)S[0]]) 173 return false; 174 Pats = Pats.slice(1); 175 S = S.substr(1); 176 } 177 } 178