//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a glob pattern matcher. // //===----------------------------------------------------------------------===// #include "llvm/Support/GlobPattern.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Errc.h" using namespace llvm; static bool hasWildcard(StringRef S) { return S.find_first_of("?*[\\") != StringRef::npos; } // Expands character ranges and returns a bitmap. // For example, "a-cf-hz" is expanded to "abcfghz". static Expected expand(StringRef S, StringRef Original) { BitVector BV(256, false); // Expand X-Y. for (;;) { if (S.size() < 3) break; uint8_t Start = S[0]; uint8_t End = S[2]; // If it doesn't start with something like X-Y, // consume the first character and proceed. if (S[1] != '-') { BV[Start] = true; S = S.substr(1); continue; } // It must be in the form of X-Y. // Validate it and then interpret the range. if (Start > End) return make_error("invalid glob pattern: " + Original, errc::invalid_argument); for (int C = Start; C <= End; ++C) BV[(uint8_t)C] = true; S = S.substr(3); } for (char C : S) BV[(uint8_t)C] = true; return BV; } // This is a scanner for the glob pattern. // A glob pattern token is one of "*", "?", "\", "[]", "[^]" // (which is a negative form of "[]"), "[!]" (which is // equivalent to "[^]"), or a non-meta character. // This function returns the first token in S. static Expected scan(StringRef &S, StringRef Original) { switch (S[0]) { case '*': S = S.substr(1); // '*' is represented by an empty bitvector. // All other bitvectors are 256-bit long. return BitVector(); case '?': S = S.substr(1); return BitVector(256, true); case '[': { // ']' is allowed as the first character of a character class. '[]' is // invalid. So, just skip the first character. size_t End = S.find(']', 2); if (End == StringRef::npos) return make_error("invalid glob pattern: " + Original, errc::invalid_argument); StringRef Chars = S.substr(1, End - 1); S = S.substr(End + 1); if (Chars.startswith("^") || Chars.startswith("!")) { Expected BV = expand(Chars.substr(1), Original); if (!BV) return BV.takeError(); return BV->flip(); } return expand(Chars, Original); } case '\\': // Eat this character and fall through below to treat it like a non-meta // character. S = S.substr(1); [[fallthrough]]; default: BitVector BV(256, false); BV[(uint8_t)S[0]] = true; S = S.substr(1); return BV; } } Expected GlobPattern::create(StringRef S) { GlobPattern Pat; // S doesn't contain any metacharacter, // so the regular string comparison should work. if (!hasWildcard(S)) { Pat.Exact = S; return Pat; } // S is something like "foo*", and the "* is not escaped. We can use // startswith(). if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { Pat.Prefix = S.drop_back(); return Pat; } // S is something like "*foo". We can use endswith(). if (S.startswith("*") && !hasWildcard(S.drop_front())) { Pat.Suffix = S.drop_front(); return Pat; } // Otherwise, we need to do real glob pattern matching. // Parse the pattern now. StringRef Original = S; while (!S.empty()) { Expected BV = scan(S, Original); if (!BV) return BV.takeError(); Pat.Tokens.push_back(*BV); } return Pat; } bool GlobPattern::match(StringRef S) const { if (Exact) return S == *Exact; if (Prefix) return S.startswith(*Prefix); if (Suffix) return S.endswith(*Suffix); return matchOne(Tokens, S); } // Runs glob pattern Pats against string S. bool GlobPattern::matchOne(ArrayRef Pats, StringRef S) const { for (;;) { if (Pats.empty()) return S.empty(); // If Pats[0] is '*', try to match Pats[1..] against all possible // tail strings of S to see at least one pattern succeeds. if (Pats[0].size() == 0) { Pats = Pats.slice(1); if (Pats.empty()) // Fast path. If a pattern is '*', it matches anything. return true; for (size_t I = 0, E = S.size(); I < E; ++I) if (matchOne(Pats, S.substr(I))) return true; return false; } // If Pats[0] is not '*', it must consume one character. if (S.empty() || !Pats[0][(uint8_t)S[0]]) return false; Pats = Pats.slice(1); S = S.substr(1); } }