xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Support/GlobPattern.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a glob pattern matcher.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_SUPPORT_GLOBPATTERN_H
14 #define LLVM_SUPPORT_GLOBPATTERN_H
15 
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/Error.h"
21 #include <optional>
22 
23 namespace llvm {
24 
25 /// This class implements a glob pattern matcher similar to the one found in
26 /// bash, but with some key differences. Namely, that `*` matches all
27 /// characters and does not exclude path separators.
28 ///
29 /// * `?` matches a single character.
30 /// * `*` matches zero or more characters.
31 /// * `[<chars>]` matches one character in the bracket. Character ranges,
32 ///   e.g., `[a-z]`, and negative sets via `[^ab]` or `[!ab]` are also
33 ///   supported.
34 /// * `{<glob>,...}` matches one of the globs in the list. Nested brace
35 ///   expansions are not supported. If \p MaxSubPatterns is empty then
36 ///   brace expansions are not supported and characters `{,}` are treated as
37 ///   literals.
38 /// * `\` escapes the next character so it is treated as a literal.
39 ///
40 /// Some known edge cases are:
41 /// * The literal `]` is allowed as the first character in a character class,
42 ///    i.e., `[]]` is valid and matches the literal `]`.
43 /// * The empty character class, i.e., `[]`, is invalid.
44 /// * Empty or singleton brace expansions, e.g., `{}`, `{a}`, are invalid.
45 /// * The literals `}` and `,` that are not inside a brace expansion are taken
46 ///   as literals, e.g., `,}` is valid but `{` is not.
47 ///
48 /// Examples:
49 /// * `*[/\\]foo.{c,cpp}` will match (unix or windows) paths to files named
50 ///   `foo.c` or `foo.cpp`.
51 /// * `_Z{N,NK,}S[tabsoid]*` will match mangled C++ standard library functions.
52 class GlobPattern {
53 public:
54   /// \param Pat the pattern to match against
55   /// \param MaxSubPatterns if provided limit the number of allowed subpatterns
56   ///                       created from expanding braces otherwise disable
57   ///                       brace expansion
58   LLVM_ABI static Expected<GlobPattern>
59   create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
60   /// \returns \p true if \p S matches this glob pattern
61   LLVM_ABI bool match(StringRef S) const;
62 
63   // Returns true for glob pattern "*". Can be used to avoid expensive
64   // preparation/acquisition of the input for match().
isTrivialMatchAll()65   bool isTrivialMatchAll() const {
66     if (!Prefix.empty())
67       return false;
68     if (SubGlobs.size() != 1)
69       return false;
70     return SubGlobs[0].getPat() == "*";
71   }
72 
73 private:
74   StringRef Prefix;
75 
76   struct SubGlobPattern {
77     /// \param Pat the pattern to match against
78     LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
79     /// \returns \p true if \p S matches this glob pattern
80     LLVM_ABI bool match(StringRef S) const;
getPatSubGlobPattern81     StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
82 
83     // Brackets with their end position and matched bytes.
84     struct Bracket {
85       size_t NextOffset;
86       BitVector Bytes;
87     };
88     SmallVector<Bracket, 0> Brackets;
89     SmallVector<char, 0> Pat;
90   };
91   SmallVector<SubGlobPattern, 1> SubGlobs;
92 };
93 }
94 
95 #endif // LLVM_SUPPORT_GLOBPATTERN_H
96