1 //===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a glob pattern matcher. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_SUPPORT_GLOBPATTERN_H 14 #define LLVM_SUPPORT_GLOBPATTERN_H 15 16 #include "llvm/ADT/BitVector.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Support/Error.h" 20 #include <optional> 21 22 namespace llvm { 23 24 /// This class implements a glob pattern matcher similar to the one found in 25 /// bash, but with some key differences. Namely, that \p "*" matches all 26 /// characters and does not exclude path separators. 27 /// 28 /// * \p "?" matches a single character. 29 /// * \p "*" matches zero or more characters. 30 /// * \p "[<chars>]" matches one character in the bracket. Character ranges, 31 /// e.g., \p "[a-z]", and negative sets via \p "[^ab]" or \p "[!ab]" are also 32 /// supported. 33 /// * \p "{<glob>,...}" matches one of the globs in the list. Nested brace 34 /// expansions are not supported. If \p MaxSubPatterns is empty then 35 /// brace expansions are not supported and characters \p "{,}" are treated as 36 /// literals. 37 /// * \p "\" escapes the next character so it is treated as a literal. 38 /// 39 /// 40 /// Some known edge cases are: 41 /// * \p "]" is allowed as the first character in a character class, i.e., 42 /// \p "[]]" is valid and matches the literal \p "]". 43 /// * The empty character class, i.e., \p "[]", is invalid. 44 /// * Empty or singleton brace expansions, e.g., \p "{}", \p "{a}", are invalid. 45 /// * \p "}" and \p "," that are not inside a brace expansion are taken as 46 /// literals, e.g., \p ",}" is valid but \p "{" is not. 47 /// 48 /// 49 /// For example, \p "*[/\\]foo.{c,cpp}" will match (unix or windows) paths to 50 /// all files named \p "foo.c" or \p "foo.cpp". 51 class GlobPattern { 52 public: 53 /// \param Pat the pattern to match against 54 /// \param MaxSubPatterns if provided limit the number of allowed subpatterns 55 /// created from expanding braces otherwise disable 56 /// brace expansion 57 static Expected<GlobPattern> 58 create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {}); 59 /// \returns \p true if \p S matches this glob pattern 60 bool match(StringRef S) const; 61 62 // Returns true for glob pattern "*". Can be used to avoid expensive 63 // preparation/acquisition of the input for match(). 64 bool isTrivialMatchAll() const { 65 if (!Prefix.empty()) 66 return false; 67 if (SubGlobs.size() != 1) 68 return false; 69 return SubGlobs[0].getPat() == "*"; 70 } 71 72 private: 73 StringRef Prefix; 74 75 struct SubGlobPattern { 76 /// \param Pat the pattern to match against 77 static Expected<SubGlobPattern> create(StringRef Pat); 78 /// \returns \p true if \p S matches this glob pattern 79 bool match(StringRef S) const; 80 StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); } 81 82 // Brackets with their end position and matched bytes. 83 struct Bracket { 84 size_t NextOffset; 85 BitVector Bytes; 86 }; 87 SmallVector<Bracket, 0> Brackets; 88 SmallVector<char, 0> Pat; 89 }; 90 SmallVector<SubGlobPattern, 1> SubGlobs; 91 }; 92 } 93 94 #endif // LLVM_SUPPORT_GLOBPATTERN_H 95