xref: /freebsd/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a glob pattern matcher.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/GlobPattern.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Errc.h"
18 
19 using namespace llvm;
20 
21 static bool hasWildcard(StringRef S) {
22   return S.find_first_of("?*[") != StringRef::npos;
23 }
24 
25 // Expands character ranges and returns a bitmap.
26 // For example, "a-cf-hz" is expanded to "abcfghz".
27 static Expected<BitVector> expand(StringRef S, StringRef Original) {
28   BitVector BV(256, false);
29 
30   // Expand X-Y.
31   for (;;) {
32     if (S.size() < 3)
33       break;
34 
35     uint8_t Start = S[0];
36     uint8_t End = S[2];
37 
38     // If it doesn't start with something like X-Y,
39     // consume the first character and proceed.
40     if (S[1] != '-') {
41       BV[Start] = true;
42       S = S.substr(1);
43       continue;
44     }
45 
46     // It must be in the form of X-Y.
47     // Validate it and then interpret the range.
48     if (Start > End)
49       return make_error<StringError>("invalid glob pattern: " + Original,
50                                      errc::invalid_argument);
51 
52     for (int C = Start; C <= End; ++C)
53       BV[(uint8_t)C] = true;
54     S = S.substr(3);
55   }
56 
57   for (char C : S)
58     BV[(uint8_t)C] = true;
59   return BV;
60 }
61 
62 // This is a scanner for the glob pattern.
63 // A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
64 // (which is a negative form of "[<chars>]"), or a non-meta character.
65 // This function returns the first token in S.
66 static Expected<BitVector> scan(StringRef &S, StringRef Original) {
67   switch (S[0]) {
68   case '*':
69     S = S.substr(1);
70     // '*' is represented by an empty bitvector.
71     // All other bitvectors are 256-bit long.
72     return BitVector();
73   case '?':
74     S = S.substr(1);
75     return BitVector(256, true);
76   case '[': {
77     size_t End = S.find(']', 1);
78     if (End == StringRef::npos)
79       return make_error<StringError>("invalid glob pattern: " + Original,
80                                      errc::invalid_argument);
81 
82     StringRef Chars = S.substr(1, End - 1);
83     S = S.substr(End + 1);
84     if (Chars.startswith("^")) {
85       Expected<BitVector> BV = expand(Chars.substr(1), Original);
86       if (!BV)
87         return BV.takeError();
88       return BV->flip();
89     }
90     return expand(Chars, Original);
91   }
92   default:
93     BitVector BV(256, false);
94     BV[(uint8_t)S[0]] = true;
95     S = S.substr(1);
96     return BV;
97   }
98 }
99 
100 Expected<GlobPattern> GlobPattern::create(StringRef S) {
101   GlobPattern Pat;
102 
103   // S doesn't contain any metacharacter,
104   // so the regular string comparison should work.
105   if (!hasWildcard(S)) {
106     Pat.Exact = S;
107     return Pat;
108   }
109 
110   // S is something like "foo*". We can use startswith().
111   if (S.endswith("*") && !hasWildcard(S.drop_back())) {
112     Pat.Prefix = S.drop_back();
113     return Pat;
114   }
115 
116   // S is something like "*foo". We can use endswith().
117   if (S.startswith("*") && !hasWildcard(S.drop_front())) {
118     Pat.Suffix = S.drop_front();
119     return Pat;
120   }
121 
122   // Otherwise, we need to do real glob pattern matching.
123   // Parse the pattern now.
124   StringRef Original = S;
125   while (!S.empty()) {
126     Expected<BitVector> BV = scan(S, Original);
127     if (!BV)
128       return BV.takeError();
129     Pat.Tokens.push_back(*BV);
130   }
131   return Pat;
132 }
133 
134 bool GlobPattern::match(StringRef S) const {
135   if (Exact)
136     return S == *Exact;
137   if (Prefix)
138     return S.startswith(*Prefix);
139   if (Suffix)
140     return S.endswith(*Suffix);
141   return matchOne(Tokens, S);
142 }
143 
144 // Runs glob pattern Pats against string S.
145 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
146   for (;;) {
147     if (Pats.empty())
148       return S.empty();
149 
150     // If Pats[0] is '*', try to match Pats[1..] against all possible
151     // tail strings of S to see at least one pattern succeeds.
152     if (Pats[0].size() == 0) {
153       Pats = Pats.slice(1);
154       if (Pats.empty())
155         // Fast path. If a pattern is '*', it matches anything.
156         return true;
157       for (size_t I = 0, E = S.size(); I < E; ++I)
158         if (matchOne(Pats, S.substr(I)))
159           return true;
160       return false;
161     }
162 
163     // If Pats[0] is not '*', it must consume one character.
164     if (S.empty() || !Pats[0][(uint8_t)S[0]])
165       return false;
166     Pats = Pats.slice(1);
167     S = S.substr(1);
168   }
169 }
170