xref: /freebsd/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp (revision 994c943a0098e38bff96f9f9169cbb38542c39c1)
1  //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements a glob pattern matcher.
10  //
11  //===----------------------------------------------------------------------===//
12  
13  #include "llvm/Support/GlobPattern.h"
14  #include "llvm/ADT/ArrayRef.h"
15  #include "llvm/ADT/StringRef.h"
16  #include "llvm/Support/Errc.h"
17  
18  using namespace llvm;
19  
20  static bool hasWildcard(StringRef S) {
21    return S.find_first_of("?*[\\") != StringRef::npos;
22  }
23  
24  // Expands character ranges and returns a bitmap.
25  // For example, "a-cf-hz" is expanded to "abcfghz".
26  static Expected<BitVector> expand(StringRef S, StringRef Original) {
27    BitVector BV(256, false);
28  
29    // Expand X-Y.
30    for (;;) {
31      if (S.size() < 3)
32        break;
33  
34      uint8_t Start = S[0];
35      uint8_t End = S[2];
36  
37      // If it doesn't start with something like X-Y,
38      // consume the first character and proceed.
39      if (S[1] != '-') {
40        BV[Start] = true;
41        S = S.substr(1);
42        continue;
43      }
44  
45      // It must be in the form of X-Y.
46      // Validate it and then interpret the range.
47      if (Start > End)
48        return make_error<StringError>("invalid glob pattern: " + Original,
49                                       errc::invalid_argument);
50  
51      for (int C = Start; C <= End; ++C)
52        BV[(uint8_t)C] = true;
53      S = S.substr(3);
54    }
55  
56    for (char C : S)
57      BV[(uint8_t)C] = true;
58    return BV;
59  }
60  
61  // This is a scanner for the glob pattern.
62  // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
63  // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
64  // equivalent to "[^<chars>]"), or a non-meta character.
65  // This function returns the first token in S.
66  static Expected<BitVector> scan(StringRef &S, StringRef Original) {
67    switch (S[0]) {
68    case '*':
69      S = S.substr(1);
70      // '*' is represented by an empty bitvector.
71      // All other bitvectors are 256-bit long.
72      return BitVector();
73    case '?':
74      S = S.substr(1);
75      return BitVector(256, true);
76    case '[': {
77      // ']' is allowed as the first character of a character class. '[]' is
78      // invalid. So, just skip the first character.
79      size_t End = S.find(']', 2);
80      if (End == StringRef::npos)
81        return make_error<StringError>("invalid glob pattern: " + Original,
82                                       errc::invalid_argument);
83  
84      StringRef Chars = S.substr(1, End - 1);
85      S = S.substr(End + 1);
86      if (Chars.startswith("^") || Chars.startswith("!")) {
87        Expected<BitVector> BV = expand(Chars.substr(1), Original);
88        if (!BV)
89          return BV.takeError();
90        return BV->flip();
91      }
92      return expand(Chars, Original);
93    }
94    case '\\':
95      // Eat this character and fall through below to treat it like a non-meta
96      // character.
97      S = S.substr(1);
98      [[fallthrough]];
99    default:
100      BitVector BV(256, false);
101      BV[(uint8_t)S[0]] = true;
102      S = S.substr(1);
103      return BV;
104    }
105  }
106  
107  Expected<GlobPattern> GlobPattern::create(StringRef S) {
108    GlobPattern Pat;
109  
110    // S doesn't contain any metacharacter,
111    // so the regular string comparison should work.
112    if (!hasWildcard(S)) {
113      Pat.Exact = S;
114      return Pat;
115    }
116  
117    // S is something like "foo*", and the "* is not escaped. We can use
118    // startswith().
119    if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
120      Pat.Prefix = S.drop_back();
121      return Pat;
122    }
123  
124    // S is something like "*foo". We can use endswith().
125    if (S.startswith("*") && !hasWildcard(S.drop_front())) {
126      Pat.Suffix = S.drop_front();
127      return Pat;
128    }
129  
130    // Otherwise, we need to do real glob pattern matching.
131    // Parse the pattern now.
132    StringRef Original = S;
133    while (!S.empty()) {
134      Expected<BitVector> BV = scan(S, Original);
135      if (!BV)
136        return BV.takeError();
137      Pat.Tokens.push_back(*BV);
138    }
139    return Pat;
140  }
141  
142  bool GlobPattern::match(StringRef S) const {
143    if (Exact)
144      return S == *Exact;
145    if (Prefix)
146      return S.startswith(*Prefix);
147    if (Suffix)
148      return S.endswith(*Suffix);
149    return matchOne(Tokens, S);
150  }
151  
152  // Runs glob pattern Pats against string S.
153  bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
154    for (;;) {
155      if (Pats.empty())
156        return S.empty();
157  
158      // If Pats[0] is '*', try to match Pats[1..] against all possible
159      // tail strings of S to see at least one pattern succeeds.
160      if (Pats[0].size() == 0) {
161        Pats = Pats.slice(1);
162        if (Pats.empty())
163          // Fast path. If a pattern is '*', it matches anything.
164          return true;
165        for (size_t I = 0, E = S.size(); I < E; ++I)
166          if (matchOne(Pats, S.substr(I)))
167            return true;
168        return false;
169      }
170  
171      // If Pats[0] is not '*', it must consume one character.
172      if (S.empty() || !Pats[0][(uint8_t)S[0]])
173        return false;
174      Pats = Pats.slice(1);
175      S = S.substr(1);
176    }
177  }
178