1 //===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a glob pattern matcher. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_SUPPORT_GLOBPATTERN_H 14 #define LLVM_SUPPORT_GLOBPATTERN_H 15 16 #include "llvm/ADT/BitVector.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Support/Compiler.h" 20 #include "llvm/Support/Error.h" 21 #include <optional> 22 23 namespace llvm { 24 25 /// This class implements a glob pattern matcher similar to the one found in 26 /// bash, but with some key differences. Namely, that `*` matches all 27 /// characters and does not exclude path separators. 28 /// 29 /// * `?` matches a single character. 30 /// * `*` matches zero or more characters. 31 /// * `[<chars>]` matches one character in the bracket. Character ranges, 32 /// e.g., `[a-z]`, and negative sets via `[^ab]` or `[!ab]` are also 33 /// supported. 34 /// * `{<glob>,...}` matches one of the globs in the list. Nested brace 35 /// expansions are not supported. If \p MaxSubPatterns is empty then 36 /// brace expansions are not supported and characters `{,}` are treated as 37 /// literals. 38 /// * `\` escapes the next character so it is treated as a literal. 39 /// 40 /// Some known edge cases are: 41 /// * The literal `]` is allowed as the first character in a character class, 42 /// i.e., `[]]` is valid and matches the literal `]`. 43 /// * The empty character class, i.e., `[]`, is invalid. 44 /// * Empty or singleton brace expansions, e.g., `{}`, `{a}`, are invalid. 45 /// * The literals `}` and `,` that are not inside a brace expansion are taken 46 /// as literals, e.g., `,}` is valid but `{` is not. 47 /// 48 /// Examples: 49 /// * `*[/\\]foo.{c,cpp}` will match (unix or windows) paths to files named 50 /// `foo.c` or `foo.cpp`. 51 /// * `_Z{N,NK,}S[tabsoid]*` will match mangled C++ standard library functions. 52 class GlobPattern { 53 public: 54 /// \param Pat the pattern to match against 55 /// \param MaxSubPatterns if provided limit the number of allowed subpatterns 56 /// created from expanding braces otherwise disable 57 /// brace expansion 58 LLVM_ABI static Expected<GlobPattern> 59 create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {}); 60 /// \returns \p true if \p S matches this glob pattern 61 LLVM_ABI bool match(StringRef S) const; 62 63 // Returns true for glob pattern "*". Can be used to avoid expensive 64 // preparation/acquisition of the input for match(). isTrivialMatchAll()65 bool isTrivialMatchAll() const { 66 if (!Prefix.empty()) 67 return false; 68 if (SubGlobs.size() != 1) 69 return false; 70 return SubGlobs[0].getPat() == "*"; 71 } 72 73 private: 74 StringRef Prefix; 75 76 struct SubGlobPattern { 77 /// \param Pat the pattern to match against 78 LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat); 79 /// \returns \p true if \p S matches this glob pattern 80 LLVM_ABI bool match(StringRef S) const; getPatSubGlobPattern81 StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); } 82 83 // Brackets with their end position and matched bytes. 84 struct Bracket { 85 size_t NextOffset; 86 BitVector Bytes; 87 }; 88 SmallVector<Bracket, 0> Brackets; 89 SmallVector<char, 0> Pat; 90 }; 91 SmallVector<SubGlobPattern, 1> SubGlobs; 92 }; 93 } 94 95 #endif // LLVM_SUPPORT_GLOBPATTERN_H 96