xref: /freebsd/contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp (revision ada4cd3f7710d9759e391e84ad21b7763062bdbc)
1  //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // FileCheck does a line-by line check of a file that validates whether it
10  // contains the expected content.  This is useful for regression tests etc.
11  //
12  // This file implements most of the API that will be used by the FileCheck utility
13  // as well as various unittests.
14  //===----------------------------------------------------------------------===//
15  
16  #include "llvm/FileCheck/FileCheck.h"
17  #include "FileCheckImpl.h"
18  #include "llvm/ADT/STLExtras.h"
19  #include "llvm/ADT/StringExtras.h"
20  #include "llvm/ADT/StringSet.h"
21  #include "llvm/ADT/Twine.h"
22  #include "llvm/Support/CheckedArithmetic.h"
23  #include "llvm/Support/FormatVariadic.h"
24  #include <cstdint>
25  #include <list>
26  #include <set>
27  #include <tuple>
28  #include <utility>
29  
30  using namespace llvm;
31  
32  StringRef ExpressionFormat::toString() const {
33    switch (Value) {
34    case Kind::NoFormat:
35      return StringRef("<none>");
36    case Kind::Unsigned:
37      return StringRef("%u");
38    case Kind::Signed:
39      return StringRef("%d");
40    case Kind::HexUpper:
41      return StringRef("%X");
42    case Kind::HexLower:
43      return StringRef("%x");
44    }
45    llvm_unreachable("unknown expression format");
46  }
47  
48  Expected<std::string> ExpressionFormat::getWildcardRegex() const {
49    StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef();
50  
51    auto CreatePrecisionRegex = [&](StringRef S) {
52      return (Twine(AlternateFormPrefix) + S + Twine('{') + Twine(Precision) +
53              "}")
54          .str();
55    };
56  
57    switch (Value) {
58    case Kind::Unsigned:
59      if (Precision)
60        return CreatePrecisionRegex("([1-9][0-9]*)?[0-9]");
61      return std::string("[0-9]+");
62    case Kind::Signed:
63      if (Precision)
64        return CreatePrecisionRegex("-?([1-9][0-9]*)?[0-9]");
65      return std::string("-?[0-9]+");
66    case Kind::HexUpper:
67      if (Precision)
68        return CreatePrecisionRegex("([1-9A-F][0-9A-F]*)?[0-9A-F]");
69      return (Twine(AlternateFormPrefix) + Twine("[0-9A-F]+")).str();
70    case Kind::HexLower:
71      if (Precision)
72        return CreatePrecisionRegex("([1-9a-f][0-9a-f]*)?[0-9a-f]");
73      return (Twine(AlternateFormPrefix) + Twine("[0-9a-f]+")).str();
74    default:
75      return createStringError(std::errc::invalid_argument,
76                               "trying to match value with invalid format");
77    }
78  }
79  
80  Expected<std::string>
81  ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const {
82    APInt IntValue = IntegerValue.getAPIntValue();
83    // Error out for values that cannot be represented by the appropriate 64-bit
84    // integer (e.g. int64_t for a signed format) to keep the getter of
85    // ExpressionValue as an APInt an NFC.
86    if (Value == Kind::Signed) {
87      if (!IntValue.isSignedIntN(64))
88        return make_error<OverflowError>();
89    } else {
90      if (!IntValue.isIntN(64))
91        return make_error<OverflowError>();
92    }
93  
94    unsigned Radix;
95    bool UpperCase = false;
96    SmallString<8> AbsoluteValueStr;
97    StringRef SignPrefix = IntValue.isNegative() ? "-" : "";
98    switch (Value) {
99    case Kind::Unsigned:
100    case Kind::Signed:
101      Radix = 10;
102      break;
103    case Kind::HexUpper:
104      UpperCase = true;
105      Radix = 16;
106      break;
107    case Kind::HexLower:
108      Radix = 16;
109      UpperCase = false;
110      break;
111    default:
112      return createStringError(std::errc::invalid_argument,
113                               "trying to match value with invalid format");
114    }
115    IntValue.abs().toString(AbsoluteValueStr, Radix, /*Signed=*/false,
116                            /*formatAsCLiteral=*/false,
117                            /*UpperCase=*/UpperCase);
118  
119    StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef();
120  
121    if (Precision > AbsoluteValueStr.size()) {
122      unsigned LeadingZeros = Precision - AbsoluteValueStr.size();
123      return (Twine(SignPrefix) + Twine(AlternateFormPrefix) +
124              std::string(LeadingZeros, '0') + AbsoluteValueStr)
125          .str();
126    }
127  
128    return (Twine(SignPrefix) + Twine(AlternateFormPrefix) + AbsoluteValueStr)
129        .str();
130  }
131  
132  Expected<ExpressionValue>
133  ExpressionFormat::valueFromStringRepr(StringRef StrVal,
134                                        const SourceMgr &SM) const {
135    bool ValueIsSigned = Value == Kind::Signed;
136    // Both the FileCheck utility and library only call this method with a valid
137    // value in StrVal. This is guaranteed by the regex returned by
138    // getWildcardRegex() above. Only underflow and overflow errors can thus
139    // occur. However new uses of this method could be added in the future so
140    // the error message does not make assumptions about StrVal.
141    StringRef IntegerParseErrorStr = "unable to represent numeric value";
142    if (ValueIsSigned) {
143      int64_t SignedValue;
144  
145      if (StrVal.getAsInteger(10, SignedValue))
146        return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr);
147  
148      return ExpressionValue(SignedValue);
149    }
150  
151    bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower;
152    uint64_t UnsignedValue;
153    bool MissingFormPrefix = AlternateForm && !StrVal.consume_front("0x");
154    (void)MissingFormPrefix;
155    assert(!MissingFormPrefix && "missing alternate form prefix");
156    if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue))
157      return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr);
158  
159    return ExpressionValue(UnsignedValue);
160  }
161  
162  Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand,
163                                            const ExpressionValue &RightOperand) {
164    bool Overflow;
165    APInt Result = LeftOperand.getAPIntValue().sadd_ov(
166        RightOperand.getAPIntValue(), Overflow);
167    if (Overflow ||
168        (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
169      return make_error<OverflowError>();
170  
171    if (Result.isNegative())
172      return ExpressionValue(Result.getSExtValue());
173    else
174      return ExpressionValue(Result.getZExtValue());
175  }
176  
177  Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand,
178                                            const ExpressionValue &RightOperand) {
179    bool Overflow;
180    APInt Result = LeftOperand.getAPIntValue().ssub_ov(
181        RightOperand.getAPIntValue(), Overflow);
182    if (Overflow ||
183        (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
184      return make_error<OverflowError>();
185  
186    if (Result.isNegative())
187      return ExpressionValue(Result.getSExtValue());
188    else
189      return ExpressionValue(Result.getZExtValue());
190  }
191  
192  Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand,
193                                            const ExpressionValue &RightOperand) {
194    bool Overflow;
195    APInt Result = LeftOperand.getAPIntValue().smul_ov(
196        RightOperand.getAPIntValue(), Overflow);
197    if (Overflow ||
198        (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
199      return make_error<OverflowError>();
200  
201    if (Result.isNegative())
202      return ExpressionValue(Result.getSExtValue());
203    else
204      return ExpressionValue(Result.getZExtValue());
205  }
206  
207  Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand,
208                                            const ExpressionValue &RightOperand) {
209    // Check for division by zero.
210    if (RightOperand.getAPIntValue().isZero())
211      return make_error<OverflowError>();
212  
213    bool Overflow;
214    APInt Result = LeftOperand.getAPIntValue().sdiv_ov(
215        RightOperand.getAPIntValue(), Overflow);
216    if (Overflow ||
217        (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
218      return make_error<OverflowError>();
219  
220    if (Result.isNegative())
221      return ExpressionValue(Result.getSExtValue());
222    else
223      return ExpressionValue(Result.getZExtValue());
224  }
225  
226  Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand,
227                                      const ExpressionValue &RightOperand) {
228    return LeftOperand.getAPIntValue().slt(RightOperand.getAPIntValue())
229               ? RightOperand
230               : LeftOperand;
231  }
232  
233  Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand,
234                                      const ExpressionValue &RightOperand) {
235    if (cantFail(max(LeftOperand, RightOperand)).getAPIntValue() ==
236        LeftOperand.getAPIntValue())
237      return RightOperand;
238  
239    return LeftOperand;
240  }
241  
242  Expected<ExpressionValue> NumericVariableUse::eval() const {
243    std::optional<ExpressionValue> Value = Variable->getValue();
244    if (Value)
245      return *Value;
246  
247    return make_error<UndefVarError>(getExpressionStr());
248  }
249  
250  Expected<ExpressionValue> BinaryOperation::eval() const {
251    Expected<ExpressionValue> LeftOp = LeftOperand->eval();
252    Expected<ExpressionValue> RightOp = RightOperand->eval();
253  
254    // Bubble up any error (e.g. undefined variables) in the recursive
255    // evaluation.
256    if (!LeftOp || !RightOp) {
257      Error Err = Error::success();
258      if (!LeftOp)
259        Err = joinErrors(std::move(Err), LeftOp.takeError());
260      if (!RightOp)
261        Err = joinErrors(std::move(Err), RightOp.takeError());
262      return std::move(Err);
263    }
264  
265    return EvalBinop(*LeftOp, *RightOp);
266  }
267  
268  Expected<ExpressionFormat>
269  BinaryOperation::getImplicitFormat(const SourceMgr &SM) const {
270    Expected<ExpressionFormat> LeftFormat = LeftOperand->getImplicitFormat(SM);
271    Expected<ExpressionFormat> RightFormat = RightOperand->getImplicitFormat(SM);
272    if (!LeftFormat || !RightFormat) {
273      Error Err = Error::success();
274      if (!LeftFormat)
275        Err = joinErrors(std::move(Err), LeftFormat.takeError());
276      if (!RightFormat)
277        Err = joinErrors(std::move(Err), RightFormat.takeError());
278      return std::move(Err);
279    }
280  
281    if (*LeftFormat != ExpressionFormat::Kind::NoFormat &&
282        *RightFormat != ExpressionFormat::Kind::NoFormat &&
283        *LeftFormat != *RightFormat)
284      return ErrorDiagnostic::get(
285          SM, getExpressionStr(),
286          "implicit format conflict between '" + LeftOperand->getExpressionStr() +
287              "' (" + LeftFormat->toString() + ") and '" +
288              RightOperand->getExpressionStr() + "' (" + RightFormat->toString() +
289              "), need an explicit format specifier");
290  
291    return *LeftFormat != ExpressionFormat::Kind::NoFormat ? *LeftFormat
292                                                           : *RightFormat;
293  }
294  
295  Expected<std::string> NumericSubstitution::getResult() const {
296    assert(ExpressionPointer->getAST() != nullptr &&
297           "Substituting empty expression");
298    Expected<ExpressionValue> EvaluatedValue =
299        ExpressionPointer->getAST()->eval();
300    if (!EvaluatedValue)
301      return EvaluatedValue.takeError();
302    ExpressionFormat Format = ExpressionPointer->getFormat();
303    return Format.getMatchingString(*EvaluatedValue);
304  }
305  
306  Expected<std::string> StringSubstitution::getResult() const {
307    // Look up the value and escape it so that we can put it into the regex.
308    Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
309    if (!VarVal)
310      return VarVal.takeError();
311    return Regex::escape(*VarVal);
312  }
313  
314  bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); }
315  
316  Expected<Pattern::VariableProperties>
317  Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
318    if (Str.empty())
319      return ErrorDiagnostic::get(SM, Str, "empty variable name");
320  
321    size_t I = 0;
322    bool IsPseudo = Str[0] == '@';
323  
324    // Global vars start with '$'.
325    if (Str[0] == '$' || IsPseudo)
326      ++I;
327  
328    if (!isValidVarNameStart(Str[I++]))
329      return ErrorDiagnostic::get(SM, Str, "invalid variable name");
330  
331    for (size_t E = Str.size(); I != E; ++I)
332      // Variable names are composed of alphanumeric characters and underscores.
333      if (Str[I] != '_' && !isAlnum(Str[I]))
334        break;
335  
336    StringRef Name = Str.take_front(I);
337    Str = Str.substr(I);
338    return VariableProperties {Name, IsPseudo};
339  }
340  
341  // StringRef holding all characters considered as horizontal whitespaces by
342  // FileCheck input canonicalization.
343  constexpr StringLiteral SpaceChars = " \t";
344  
345  // Parsing helper function that strips the first character in S and returns it.
346  static char popFront(StringRef &S) {
347    char C = S.front();
348    S = S.drop_front();
349    return C;
350  }
351  
352  char OverflowError::ID = 0;
353  char UndefVarError::ID = 0;
354  char ErrorDiagnostic::ID = 0;
355  char NotFoundError::ID = 0;
356  char ErrorReported::ID = 0;
357  
358  Expected<NumericVariable *> Pattern::parseNumericVariableDefinition(
359      StringRef &Expr, FileCheckPatternContext *Context,
360      std::optional<size_t> LineNumber, ExpressionFormat ImplicitFormat,
361      const SourceMgr &SM) {
362    Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
363    if (!ParseVarResult)
364      return ParseVarResult.takeError();
365    StringRef Name = ParseVarResult->Name;
366  
367    if (ParseVarResult->IsPseudo)
368      return ErrorDiagnostic::get(
369          SM, Name, "definition of pseudo numeric variable unsupported");
370  
371    // Detect collisions between string and numeric variables when the latter
372    // is created later than the former.
373    if (Context->DefinedVariableTable.contains(Name))
374      return ErrorDiagnostic::get(
375          SM, Name, "string variable with name '" + Name + "' already exists");
376  
377    Expr = Expr.ltrim(SpaceChars);
378    if (!Expr.empty())
379      return ErrorDiagnostic::get(
380          SM, Expr, "unexpected characters after numeric variable name");
381  
382    NumericVariable *DefinedNumericVariable;
383    auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
384    if (VarTableIter != Context->GlobalNumericVariableTable.end()) {
385      DefinedNumericVariable = VarTableIter->second;
386      if (DefinedNumericVariable->getImplicitFormat() != ImplicitFormat)
387        return ErrorDiagnostic::get(
388            SM, Expr, "format different from previous variable definition");
389    } else
390      DefinedNumericVariable =
391          Context->makeNumericVariable(Name, ImplicitFormat, LineNumber);
392  
393    return DefinedNumericVariable;
394  }
395  
396  Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse(
397      StringRef Name, bool IsPseudo, std::optional<size_t> LineNumber,
398      FileCheckPatternContext *Context, const SourceMgr &SM) {
399    if (IsPseudo && !Name.equals("@LINE"))
400      return ErrorDiagnostic::get(
401          SM, Name, "invalid pseudo numeric variable '" + Name + "'");
402  
403    // Numeric variable definitions and uses are parsed in the order in which
404    // they appear in the CHECK patterns. For each definition, the pointer to the
405    // class instance of the corresponding numeric variable definition is stored
406    // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer
407    // we get below is null, it means no such variable was defined before. When
408    // that happens, we create a dummy variable so that parsing can continue. All
409    // uses of undefined variables, whether string or numeric, are then diagnosed
410    // in printNoMatch() after failing to match.
411    auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
412    NumericVariable *NumericVariable;
413    if (VarTableIter != Context->GlobalNumericVariableTable.end())
414      NumericVariable = VarTableIter->second;
415    else {
416      NumericVariable = Context->makeNumericVariable(
417          Name, ExpressionFormat(ExpressionFormat::Kind::Unsigned));
418      Context->GlobalNumericVariableTable[Name] = NumericVariable;
419    }
420  
421    std::optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber();
422    if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
423      return ErrorDiagnostic::get(
424          SM, Name,
425          "numeric variable '" + Name +
426              "' defined earlier in the same CHECK directive");
427  
428    return std::make_unique<NumericVariableUse>(Name, NumericVariable);
429  }
430  
431  Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand(
432      StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint,
433      std::optional<size_t> LineNumber, FileCheckPatternContext *Context,
434      const SourceMgr &SM) {
435    if (Expr.startswith("(")) {
436      if (AO != AllowedOperand::Any)
437        return ErrorDiagnostic::get(
438            SM, Expr, "parenthesized expression not permitted here");
439      return parseParenExpr(Expr, LineNumber, Context, SM);
440    }
441  
442    if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
443      // Try to parse as a numeric variable use.
444      Expected<Pattern::VariableProperties> ParseVarResult =
445          parseVariable(Expr, SM);
446      if (ParseVarResult) {
447        // Try to parse a function call.
448        if (Expr.ltrim(SpaceChars).startswith("(")) {
449          if (AO != AllowedOperand::Any)
450            return ErrorDiagnostic::get(SM, ParseVarResult->Name,
451                                        "unexpected function call");
452  
453          return parseCallExpr(Expr, ParseVarResult->Name, LineNumber, Context,
454                               SM);
455        }
456  
457        return parseNumericVariableUse(ParseVarResult->Name,
458                                       ParseVarResult->IsPseudo, LineNumber,
459                                       Context, SM);
460      }
461  
462      if (AO == AllowedOperand::LineVar)
463        return ParseVarResult.takeError();
464      // Ignore the error and retry parsing as a literal.
465      consumeError(ParseVarResult.takeError());
466    }
467  
468    // Otherwise, parse it as a literal.
469    int64_t SignedLiteralValue;
470    uint64_t UnsignedLiteralValue;
471    StringRef SaveExpr = Expr;
472    // Accept both signed and unsigned literal, default to signed literal.
473    if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0,
474                             UnsignedLiteralValue))
475      return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()),
476                                                 UnsignedLiteralValue);
477    Expr = SaveExpr;
478    if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue))
479      return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()),
480                                                 SignedLiteralValue);
481  
482    return ErrorDiagnostic::get(
483        SM, Expr,
484        Twine("invalid ") +
485            (MaybeInvalidConstraint ? "matching constraint or " : "") +
486            "operand format");
487  }
488  
489  Expected<std::unique_ptr<ExpressionAST>>
490  Pattern::parseParenExpr(StringRef &Expr, std::optional<size_t> LineNumber,
491                          FileCheckPatternContext *Context, const SourceMgr &SM) {
492    Expr = Expr.ltrim(SpaceChars);
493    assert(Expr.startswith("("));
494  
495    // Parse right operand.
496    Expr.consume_front("(");
497    Expr = Expr.ltrim(SpaceChars);
498    if (Expr.empty())
499      return ErrorDiagnostic::get(SM, Expr, "missing operand in expression");
500  
501    // Note: parseNumericOperand handles nested opening parentheses.
502    Expected<std::unique_ptr<ExpressionAST>> SubExprResult = parseNumericOperand(
503        Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber,
504        Context, SM);
505    Expr = Expr.ltrim(SpaceChars);
506    while (SubExprResult && !Expr.empty() && !Expr.startswith(")")) {
507      StringRef OrigExpr = Expr;
508      SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false,
509                                 LineNumber, Context, SM);
510      Expr = Expr.ltrim(SpaceChars);
511    }
512    if (!SubExprResult)
513      return SubExprResult;
514  
515    if (!Expr.consume_front(")")) {
516      return ErrorDiagnostic::get(SM, Expr,
517                                  "missing ')' at end of nested expression");
518    }
519    return SubExprResult;
520  }
521  
522  Expected<std::unique_ptr<ExpressionAST>>
523  Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr,
524                      std::unique_ptr<ExpressionAST> LeftOp,
525                      bool IsLegacyLineExpr, std::optional<size_t> LineNumber,
526                      FileCheckPatternContext *Context, const SourceMgr &SM) {
527    RemainingExpr = RemainingExpr.ltrim(SpaceChars);
528    if (RemainingExpr.empty())
529      return std::move(LeftOp);
530  
531    // Check if this is a supported operation and select a function to perform
532    // it.
533    SMLoc OpLoc = SMLoc::getFromPointer(RemainingExpr.data());
534    char Operator = popFront(RemainingExpr);
535    binop_eval_t EvalBinop;
536    switch (Operator) {
537    case '+':
538      EvalBinop = operator+;
539      break;
540    case '-':
541      EvalBinop = operator-;
542      break;
543    default:
544      return ErrorDiagnostic::get(
545          SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'");
546    }
547  
548    // Parse right operand.
549    RemainingExpr = RemainingExpr.ltrim(SpaceChars);
550    if (RemainingExpr.empty())
551      return ErrorDiagnostic::get(SM, RemainingExpr,
552                                  "missing operand in expression");
553    // The second operand in a legacy @LINE expression is always a literal.
554    AllowedOperand AO =
555        IsLegacyLineExpr ? AllowedOperand::LegacyLiteral : AllowedOperand::Any;
556    Expected<std::unique_ptr<ExpressionAST>> RightOpResult =
557        parseNumericOperand(RemainingExpr, AO, /*MaybeInvalidConstraint=*/false,
558                            LineNumber, Context, SM);
559    if (!RightOpResult)
560      return RightOpResult;
561  
562    Expr = Expr.drop_back(RemainingExpr.size());
563    return std::make_unique<BinaryOperation>(Expr, EvalBinop, std::move(LeftOp),
564                                             std::move(*RightOpResult));
565  }
566  
567  Expected<std::unique_ptr<ExpressionAST>>
568  Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName,
569                         std::optional<size_t> LineNumber,
570                         FileCheckPatternContext *Context, const SourceMgr &SM) {
571    Expr = Expr.ltrim(SpaceChars);
572    assert(Expr.startswith("("));
573  
574    auto OptFunc = StringSwitch<binop_eval_t>(FuncName)
575                       .Case("add", operator+)
576                       .Case("div", operator/)
577                       .Case("max", max)
578                       .Case("min", min)
579                       .Case("mul", operator*)
580                       .Case("sub", operator-)
581                       .Default(nullptr);
582  
583    if (!OptFunc)
584      return ErrorDiagnostic::get(
585          SM, FuncName, Twine("call to undefined function '") + FuncName + "'");
586  
587    Expr.consume_front("(");
588    Expr = Expr.ltrim(SpaceChars);
589  
590    // Parse call arguments, which are comma separated.
591    SmallVector<std::unique_ptr<ExpressionAST>, 4> Args;
592    while (!Expr.empty() && !Expr.startswith(")")) {
593      if (Expr.startswith(","))
594        return ErrorDiagnostic::get(SM, Expr, "missing argument");
595  
596      // Parse the argument, which is an arbitary expression.
597      StringRef OuterBinOpExpr = Expr;
598      Expected<std::unique_ptr<ExpressionAST>> Arg = parseNumericOperand(
599          Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber,
600          Context, SM);
601      while (Arg && !Expr.empty()) {
602        Expr = Expr.ltrim(SpaceChars);
603        // Have we reached an argument terminator?
604        if (Expr.startswith(",") || Expr.startswith(")"))
605          break;
606  
607        // Arg = Arg <op> <expr>
608        Arg = parseBinop(OuterBinOpExpr, Expr, std::move(*Arg), false, LineNumber,
609                         Context, SM);
610      }
611  
612      // Prefer an expression error over a generic invalid argument message.
613      if (!Arg)
614        return Arg.takeError();
615      Args.push_back(std::move(*Arg));
616  
617      // Have we parsed all available arguments?
618      Expr = Expr.ltrim(SpaceChars);
619      if (!Expr.consume_front(","))
620        break;
621  
622      Expr = Expr.ltrim(SpaceChars);
623      if (Expr.startswith(")"))
624        return ErrorDiagnostic::get(SM, Expr, "missing argument");
625    }
626  
627    if (!Expr.consume_front(")"))
628      return ErrorDiagnostic::get(SM, Expr,
629                                  "missing ')' at end of call expression");
630  
631    const unsigned NumArgs = Args.size();
632    if (NumArgs == 2)
633      return std::make_unique<BinaryOperation>(Expr, *OptFunc, std::move(Args[0]),
634                                               std::move(Args[1]));
635  
636    // TODO: Support more than binop_eval_t.
637    return ErrorDiagnostic::get(SM, FuncName,
638                                Twine("function '") + FuncName +
639                                    Twine("' takes 2 arguments but ") +
640                                    Twine(NumArgs) + " given");
641  }
642  
643  Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock(
644      StringRef Expr, std::optional<NumericVariable *> &DefinedNumericVariable,
645      bool IsLegacyLineExpr, std::optional<size_t> LineNumber,
646      FileCheckPatternContext *Context, const SourceMgr &SM) {
647    std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr;
648    StringRef DefExpr = StringRef();
649    DefinedNumericVariable = std::nullopt;
650    ExpressionFormat ExplicitFormat = ExpressionFormat();
651    unsigned Precision = 0;
652  
653    // Parse format specifier (NOTE: ',' is also an argument seperator).
654    size_t FormatSpecEnd = Expr.find(',');
655    size_t FunctionStart = Expr.find('(');
656    if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) {
657      StringRef FormatExpr = Expr.take_front(FormatSpecEnd);
658      Expr = Expr.drop_front(FormatSpecEnd + 1);
659      FormatExpr = FormatExpr.trim(SpaceChars);
660      if (!FormatExpr.consume_front("%"))
661        return ErrorDiagnostic::get(
662            SM, FormatExpr,
663            "invalid matching format specification in expression");
664  
665      // Parse alternate form flag.
666      SMLoc AlternateFormFlagLoc = SMLoc::getFromPointer(FormatExpr.data());
667      bool AlternateForm = FormatExpr.consume_front("#");
668  
669      // Parse precision.
670      if (FormatExpr.consume_front(".")) {
671        if (FormatExpr.consumeInteger(10, Precision))
672          return ErrorDiagnostic::get(SM, FormatExpr,
673                                      "invalid precision in format specifier");
674      }
675  
676      if (!FormatExpr.empty()) {
677        // Check for unknown matching format specifier and set matching format in
678        // class instance representing this expression.
679        SMLoc FmtLoc = SMLoc::getFromPointer(FormatExpr.data());
680        switch (popFront(FormatExpr)) {
681        case 'u':
682          ExplicitFormat =
683              ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision);
684          break;
685        case 'd':
686          ExplicitFormat =
687              ExpressionFormat(ExpressionFormat::Kind::Signed, Precision);
688          break;
689        case 'x':
690          ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexLower,
691                                            Precision, AlternateForm);
692          break;
693        case 'X':
694          ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexUpper,
695                                            Precision, AlternateForm);
696          break;
697        default:
698          return ErrorDiagnostic::get(SM, FmtLoc,
699                                      "invalid format specifier in expression");
700        }
701      }
702  
703      if (AlternateForm && ExplicitFormat != ExpressionFormat::Kind::HexLower &&
704          ExplicitFormat != ExpressionFormat::Kind::HexUpper)
705        return ErrorDiagnostic::get(
706            SM, AlternateFormFlagLoc,
707            "alternate form only supported for hex values");
708  
709      FormatExpr = FormatExpr.ltrim(SpaceChars);
710      if (!FormatExpr.empty())
711        return ErrorDiagnostic::get(
712            SM, FormatExpr,
713            "invalid matching format specification in expression");
714    }
715  
716    // Save variable definition expression if any.
717    size_t DefEnd = Expr.find(':');
718    if (DefEnd != StringRef::npos) {
719      DefExpr = Expr.substr(0, DefEnd);
720      Expr = Expr.substr(DefEnd + 1);
721    }
722  
723    // Parse matching constraint.
724    Expr = Expr.ltrim(SpaceChars);
725    bool HasParsedValidConstraint = false;
726    if (Expr.consume_front("=="))
727      HasParsedValidConstraint = true;
728  
729    // Parse the expression itself.
730    Expr = Expr.ltrim(SpaceChars);
731    if (Expr.empty()) {
732      if (HasParsedValidConstraint)
733        return ErrorDiagnostic::get(
734            SM, Expr, "empty numeric expression should not have a constraint");
735    } else {
736      Expr = Expr.rtrim(SpaceChars);
737      StringRef OuterBinOpExpr = Expr;
738      // The first operand in a legacy @LINE expression is always the @LINE
739      // pseudo variable.
740      AllowedOperand AO =
741          IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
742      Expected<std::unique_ptr<ExpressionAST>> ParseResult = parseNumericOperand(
743          Expr, AO, !HasParsedValidConstraint, LineNumber, Context, SM);
744      while (ParseResult && !Expr.empty()) {
745        ParseResult = parseBinop(OuterBinOpExpr, Expr, std::move(*ParseResult),
746                                 IsLegacyLineExpr, LineNumber, Context, SM);
747        // Legacy @LINE expressions only allow 2 operands.
748        if (ParseResult && IsLegacyLineExpr && !Expr.empty())
749          return ErrorDiagnostic::get(
750              SM, Expr,
751              "unexpected characters at end of expression '" + Expr + "'");
752      }
753      if (!ParseResult)
754        return ParseResult.takeError();
755      ExpressionASTPointer = std::move(*ParseResult);
756    }
757  
758    // Select format of the expression, i.e. (i) its explicit format, if any,
759    // otherwise (ii) its implicit format, if any, otherwise (iii) the default
760    // format (unsigned). Error out in case of conflicting implicit format
761    // without explicit format.
762    ExpressionFormat Format;
763    if (ExplicitFormat)
764      Format = ExplicitFormat;
765    else if (ExpressionASTPointer) {
766      Expected<ExpressionFormat> ImplicitFormat =
767          ExpressionASTPointer->getImplicitFormat(SM);
768      if (!ImplicitFormat)
769        return ImplicitFormat.takeError();
770      Format = *ImplicitFormat;
771    }
772    if (!Format)
773      Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision);
774  
775    std::unique_ptr<Expression> ExpressionPointer =
776        std::make_unique<Expression>(std::move(ExpressionASTPointer), Format);
777  
778    // Parse the numeric variable definition.
779    if (DefEnd != StringRef::npos) {
780      DefExpr = DefExpr.ltrim(SpaceChars);
781      Expected<NumericVariable *> ParseResult = parseNumericVariableDefinition(
782          DefExpr, Context, LineNumber, ExpressionPointer->getFormat(), SM);
783  
784      if (!ParseResult)
785        return ParseResult.takeError();
786      DefinedNumericVariable = *ParseResult;
787    }
788  
789    return std::move(ExpressionPointer);
790  }
791  
792  bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
793                             SourceMgr &SM, const FileCheckRequest &Req) {
794    bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
795    IgnoreCase = Req.IgnoreCase;
796  
797    PatternLoc = SMLoc::getFromPointer(PatternStr.data());
798  
799    if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
800      // Ignore trailing whitespace.
801      while (!PatternStr.empty() &&
802             (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
803        PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
804  
805    // Check that there is something on the line.
806    if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
807      SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
808                      "found empty check string with prefix '" + Prefix + ":'");
809      return true;
810    }
811  
812    if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
813      SM.PrintMessage(
814          PatternLoc, SourceMgr::DK_Error,
815          "found non-empty check string for empty check with prefix '" + Prefix +
816              ":'");
817      return true;
818    }
819  
820    if (CheckTy == Check::CheckEmpty) {
821      RegExStr = "(\n$)";
822      return false;
823    }
824  
825    // If literal check, set fixed string.
826    if (CheckTy.isLiteralMatch()) {
827      FixedStr = PatternStr;
828      return false;
829    }
830  
831    // Check to see if this is a fixed string, or if it has regex pieces.
832    if (!MatchFullLinesHere &&
833        (PatternStr.size() < 2 ||
834         (!PatternStr.contains("{{") && !PatternStr.contains("[[")))) {
835      FixedStr = PatternStr;
836      return false;
837    }
838  
839    if (MatchFullLinesHere) {
840      RegExStr += '^';
841      if (!Req.NoCanonicalizeWhiteSpace)
842        RegExStr += " *";
843    }
844  
845    // Paren value #0 is for the fully matched string.  Any new parenthesized
846    // values add from there.
847    unsigned CurParen = 1;
848  
849    // Otherwise, there is at least one regex piece.  Build up the regex pattern
850    // by escaping scary characters in fixed strings, building up one big regex.
851    while (!PatternStr.empty()) {
852      // RegEx matches.
853      if (PatternStr.startswith("{{")) {
854        // This is the start of a regex match.  Scan for the }}.
855        size_t End = PatternStr.find("}}");
856        if (End == StringRef::npos) {
857          SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
858                          SourceMgr::DK_Error,
859                          "found start of regex string with no end '}}'");
860          return true;
861        }
862  
863        // Enclose {{}} patterns in parens just like [[]] even though we're not
864        // capturing the result for any purpose.  This is required in case the
865        // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
866        // want this to turn into: "abc(x|z)def" not "abcx|zdef".
867        RegExStr += '(';
868        ++CurParen;
869  
870        if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
871          return true;
872        RegExStr += ')';
873  
874        PatternStr = PatternStr.substr(End + 2);
875        continue;
876      }
877  
878      // String and numeric substitution blocks. Pattern substitution blocks come
879      // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some
880      // other regex) and assigns it to the string variable 'foo'. The latter
881      // substitutes foo's value. Numeric substitution blocks recognize the same
882      // form as string ones, but start with a '#' sign after the double
883      // brackets. They also accept a combined form which sets a numeric variable
884      // to the evaluation of an expression. Both string and numeric variable
885      // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be
886      // valid, as this helps catch some common errors. If there are extra '['s
887      // before the "[[", treat them literally.
888      if (PatternStr.startswith("[[") && !PatternStr.startswith("[[[")) {
889        StringRef UnparsedPatternStr = PatternStr.substr(2);
890        // Find the closing bracket pair ending the match.  End is going to be an
891        // offset relative to the beginning of the match string.
892        size_t End = FindRegexVarEnd(UnparsedPatternStr, SM);
893        StringRef MatchStr = UnparsedPatternStr.substr(0, End);
894        bool IsNumBlock = MatchStr.consume_front("#");
895  
896        if (End == StringRef::npos) {
897          SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
898                          SourceMgr::DK_Error,
899                          "Invalid substitution block, no ]] found");
900          return true;
901        }
902        // Strip the substitution block we are parsing. End points to the start
903        // of the "]]" closing the expression so account for it in computing the
904        // index of the first unparsed character.
905        PatternStr = UnparsedPatternStr.substr(End + 2);
906  
907        bool IsDefinition = false;
908        bool SubstNeeded = false;
909        // Whether the substitution block is a legacy use of @LINE with string
910        // substitution block syntax.
911        bool IsLegacyLineExpr = false;
912        StringRef DefName;
913        StringRef SubstStr;
914        StringRef MatchRegexp;
915        std::string WildcardRegexp;
916        size_t SubstInsertIdx = RegExStr.size();
917  
918        // Parse string variable or legacy @LINE expression.
919        if (!IsNumBlock) {
920          size_t VarEndIdx = MatchStr.find(':');
921          size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
922          if (SpacePos != StringRef::npos) {
923            SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos),
924                            SourceMgr::DK_Error, "unexpected whitespace");
925            return true;
926          }
927  
928          // Get the name (e.g. "foo") and verify it is well formed.
929          StringRef OrigMatchStr = MatchStr;
930          Expected<Pattern::VariableProperties> ParseVarResult =
931              parseVariable(MatchStr, SM);
932          if (!ParseVarResult) {
933            logAllUnhandledErrors(ParseVarResult.takeError(), errs());
934            return true;
935          }
936          StringRef Name = ParseVarResult->Name;
937          bool IsPseudo = ParseVarResult->IsPseudo;
938  
939          IsDefinition = (VarEndIdx != StringRef::npos);
940          SubstNeeded = !IsDefinition;
941          if (IsDefinition) {
942            if ((IsPseudo || !MatchStr.consume_front(":"))) {
943              SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
944                              SourceMgr::DK_Error,
945                              "invalid name in string variable definition");
946              return true;
947            }
948  
949            // Detect collisions between string and numeric variables when the
950            // former is created later than the latter.
951            if (Context->GlobalNumericVariableTable.contains(Name)) {
952              SM.PrintMessage(
953                  SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
954                  "numeric variable with name '" + Name + "' already exists");
955              return true;
956            }
957            DefName = Name;
958            MatchRegexp = MatchStr;
959          } else {
960            if (IsPseudo) {
961              MatchStr = OrigMatchStr;
962              IsLegacyLineExpr = IsNumBlock = true;
963            } else {
964              if (!MatchStr.empty()) {
965                SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
966                                SourceMgr::DK_Error,
967                                "invalid name in string variable use");
968                return true;
969              }
970              SubstStr = Name;
971            }
972          }
973        }
974  
975        // Parse numeric substitution block.
976        std::unique_ptr<Expression> ExpressionPointer;
977        std::optional<NumericVariable *> DefinedNumericVariable;
978        if (IsNumBlock) {
979          Expected<std::unique_ptr<Expression>> ParseResult =
980              parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
981                                            IsLegacyLineExpr, LineNumber, Context,
982                                            SM);
983          if (!ParseResult) {
984            logAllUnhandledErrors(ParseResult.takeError(), errs());
985            return true;
986          }
987          ExpressionPointer = std::move(*ParseResult);
988          SubstNeeded = ExpressionPointer->getAST() != nullptr;
989          if (DefinedNumericVariable) {
990            IsDefinition = true;
991            DefName = (*DefinedNumericVariable)->getName();
992          }
993          if (SubstNeeded)
994            SubstStr = MatchStr;
995          else {
996            ExpressionFormat Format = ExpressionPointer->getFormat();
997            WildcardRegexp = cantFail(Format.getWildcardRegex());
998            MatchRegexp = WildcardRegexp;
999          }
1000        }
1001  
1002        // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]].
1003        if (IsDefinition) {
1004          RegExStr += '(';
1005          ++SubstInsertIdx;
1006  
1007          if (IsNumBlock) {
1008            NumericVariableMatch NumericVariableDefinition = {
1009                *DefinedNumericVariable, CurParen};
1010            NumericVariableDefs[DefName] = NumericVariableDefinition;
1011            // This store is done here rather than in match() to allow
1012            // parseNumericVariableUse() to get the pointer to the class instance
1013            // of the right variable definition corresponding to a given numeric
1014            // variable use.
1015            Context->GlobalNumericVariableTable[DefName] =
1016                *DefinedNumericVariable;
1017          } else {
1018            VariableDefs[DefName] = CurParen;
1019            // Mark string variable as defined to detect collisions between
1020            // string and numeric variables in parseNumericVariableUse() and
1021            // defineCmdlineVariables() when the latter is created later than the
1022            // former. We cannot reuse GlobalVariableTable for this by populating
1023            // it with an empty string since we would then lose the ability to
1024            // detect the use of an undefined variable in match().
1025            Context->DefinedVariableTable[DefName] = true;
1026          }
1027  
1028          ++CurParen;
1029        }
1030  
1031        if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM))
1032          return true;
1033  
1034        if (IsDefinition)
1035          RegExStr += ')';
1036  
1037        // Handle substitutions: [[foo]] and [[#<foo expr>]].
1038        if (SubstNeeded) {
1039          // Handle substitution of string variables that were defined earlier on
1040          // the same line by emitting a backreference. Expressions do not
1041          // support substituting a numeric variable defined on the same line.
1042          if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) {
1043            unsigned CaptureParenGroup = VariableDefs[SubstStr];
1044            if (CaptureParenGroup < 1 || CaptureParenGroup > 9) {
1045              SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()),
1046                              SourceMgr::DK_Error,
1047                              "Can't back-reference more than 9 variables");
1048              return true;
1049            }
1050            AddBackrefToRegEx(CaptureParenGroup);
1051          } else {
1052            // Handle substitution of string variables ([[<var>]]) defined in
1053            // previous CHECK patterns, and substitution of expressions.
1054            Substitution *Substitution =
1055                IsNumBlock
1056                    ? Context->makeNumericSubstitution(
1057                          SubstStr, std::move(ExpressionPointer), SubstInsertIdx)
1058                    : Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
1059            Substitutions.push_back(Substitution);
1060          }
1061        }
1062  
1063        continue;
1064      }
1065  
1066      // Handle fixed string matches.
1067      // Find the end, which is the start of the next regex.
1068      size_t FixedMatchEnd =
1069          std::min(PatternStr.find("{{", 1), PatternStr.find("[[", 1));
1070      RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
1071      PatternStr = PatternStr.substr(FixedMatchEnd);
1072    }
1073  
1074    if (MatchFullLinesHere) {
1075      if (!Req.NoCanonicalizeWhiteSpace)
1076        RegExStr += " *";
1077      RegExStr += '$';
1078    }
1079  
1080    return false;
1081  }
1082  
1083  bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
1084    Regex R(RS);
1085    std::string Error;
1086    if (!R.isValid(Error)) {
1087      SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
1088                      "invalid regex: " + Error);
1089      return true;
1090    }
1091  
1092    RegExStr += RS.str();
1093    CurParen += R.getNumMatches();
1094    return false;
1095  }
1096  
1097  void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
1098    assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
1099    std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
1100    RegExStr += Backref;
1101  }
1102  
1103  Pattern::MatchResult Pattern::match(StringRef Buffer,
1104                                      const SourceMgr &SM) const {
1105    // If this is the EOF pattern, match it immediately.
1106    if (CheckTy == Check::CheckEOF)
1107      return MatchResult(Buffer.size(), 0, Error::success());
1108  
1109    // If this is a fixed string pattern, just match it now.
1110    if (!FixedStr.empty()) {
1111      size_t Pos =
1112          IgnoreCase ? Buffer.find_insensitive(FixedStr) : Buffer.find(FixedStr);
1113      if (Pos == StringRef::npos)
1114        return make_error<NotFoundError>();
1115      return MatchResult(Pos, /*MatchLen=*/FixedStr.size(), Error::success());
1116    }
1117  
1118    // Regex match.
1119  
1120    // If there are substitutions, we need to create a temporary string with the
1121    // actual value.
1122    StringRef RegExToMatch = RegExStr;
1123    std::string TmpStr;
1124    if (!Substitutions.empty()) {
1125      TmpStr = RegExStr;
1126      if (LineNumber)
1127        Context->LineVariable->setValue(ExpressionValue(*LineNumber));
1128  
1129      size_t InsertOffset = 0;
1130      // Substitute all string variables and expressions whose values are only
1131      // now known. Use of string variables defined on the same line are handled
1132      // by back-references.
1133      Error Errs = Error::success();
1134      for (const auto &Substitution : Substitutions) {
1135        // Substitute and check for failure (e.g. use of undefined variable).
1136        Expected<std::string> Value = Substitution->getResult();
1137        if (!Value) {
1138          // Convert to an ErrorDiagnostic to get location information. This is
1139          // done here rather than printMatch/printNoMatch since now we know which
1140          // substitution block caused the overflow.
1141          Errs = joinErrors(std::move(Errs),
1142                            handleErrors(
1143                                Value.takeError(),
1144                                [&](const OverflowError &E) {
1145                                  return ErrorDiagnostic::get(
1146                                      SM, Substitution->getFromString(),
1147                                      "unable to substitute variable or "
1148                                      "numeric expression: overflow error");
1149                                },
1150                                [&SM](const UndefVarError &E) {
1151                                  return ErrorDiagnostic::get(SM, E.getVarName(),
1152                                                              E.message());
1153                                }));
1154          continue;
1155        }
1156  
1157        // Plop it into the regex at the adjusted offset.
1158        TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset,
1159                      Value->begin(), Value->end());
1160        InsertOffset += Value->size();
1161      }
1162      if (Errs)
1163        return std::move(Errs);
1164  
1165      // Match the newly constructed regex.
1166      RegExToMatch = TmpStr;
1167    }
1168  
1169    SmallVector<StringRef, 4> MatchInfo;
1170    unsigned int Flags = Regex::Newline;
1171    if (IgnoreCase)
1172      Flags |= Regex::IgnoreCase;
1173    if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo))
1174      return make_error<NotFoundError>();
1175  
1176    // Successful regex match.
1177    assert(!MatchInfo.empty() && "Didn't get any match");
1178    StringRef FullMatch = MatchInfo[0];
1179  
1180    // If this defines any string variables, remember their values.
1181    for (const auto &VariableDef : VariableDefs) {
1182      assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
1183      Context->GlobalVariableTable[VariableDef.first] =
1184          MatchInfo[VariableDef.second];
1185    }
1186  
1187    // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
1188    // the required preceding newline, which is consumed by the pattern in the
1189    // case of CHECK-EMPTY but not CHECK-NEXT.
1190    size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
1191    Match TheMatch;
1192    TheMatch.Pos = FullMatch.data() - Buffer.data() + MatchStartSkip;
1193    TheMatch.Len = FullMatch.size() - MatchStartSkip;
1194  
1195    // If this defines any numeric variables, remember their values.
1196    for (const auto &NumericVariableDef : NumericVariableDefs) {
1197      const NumericVariableMatch &NumericVariableMatch =
1198          NumericVariableDef.getValue();
1199      unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup;
1200      assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error");
1201      NumericVariable *DefinedNumericVariable =
1202          NumericVariableMatch.DefinedNumericVariable;
1203  
1204      StringRef MatchedValue = MatchInfo[CaptureParenGroup];
1205      ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat();
1206      Expected<ExpressionValue> Value =
1207          Format.valueFromStringRepr(MatchedValue, SM);
1208      if (!Value)
1209        return MatchResult(TheMatch, Value.takeError());
1210      DefinedNumericVariable->setValue(*Value, MatchedValue);
1211    }
1212  
1213    return MatchResult(TheMatch, Error::success());
1214  }
1215  
1216  unsigned Pattern::computeMatchDistance(StringRef Buffer) const {
1217    // Just compute the number of matching characters. For regular expressions, we
1218    // just compare against the regex itself and hope for the best.
1219    //
1220    // FIXME: One easy improvement here is have the regex lib generate a single
1221    // example regular expression which matches, and use that as the example
1222    // string.
1223    StringRef ExampleString(FixedStr);
1224    if (ExampleString.empty())
1225      ExampleString = RegExStr;
1226  
1227    // Only compare up to the first line in the buffer, or the string size.
1228    StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
1229    BufferPrefix = BufferPrefix.split('\n').first;
1230    return BufferPrefix.edit_distance(ExampleString);
1231  }
1232  
1233  void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
1234                                   SMRange Range,
1235                                   FileCheckDiag::MatchType MatchTy,
1236                                   std::vector<FileCheckDiag> *Diags) const {
1237    // Print what we know about substitutions.
1238    if (!Substitutions.empty()) {
1239      for (const auto &Substitution : Substitutions) {
1240        SmallString<256> Msg;
1241        raw_svector_ostream OS(Msg);
1242  
1243        Expected<std::string> MatchedValue = Substitution->getResult();
1244        // Substitution failures are handled in printNoMatch().
1245        if (!MatchedValue) {
1246          consumeError(MatchedValue.takeError());
1247          continue;
1248        }
1249  
1250        OS << "with \"";
1251        OS.write_escaped(Substitution->getFromString()) << "\" equal to \"";
1252        OS.write_escaped(*MatchedValue) << "\"";
1253  
1254        // We report only the start of the match/search range to suggest we are
1255        // reporting the substitutions as set at the start of the match/search.
1256        // Indicating a non-zero-length range might instead seem to imply that the
1257        // substitution matches or was captured from exactly that range.
1258        if (Diags)
1259          Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy,
1260                              SMRange(Range.Start, Range.Start), OS.str());
1261        else
1262          SM.PrintMessage(Range.Start, SourceMgr::DK_Note, OS.str());
1263      }
1264    }
1265  }
1266  
1267  void Pattern::printVariableDefs(const SourceMgr &SM,
1268                                  FileCheckDiag::MatchType MatchTy,
1269                                  std::vector<FileCheckDiag> *Diags) const {
1270    if (VariableDefs.empty() && NumericVariableDefs.empty())
1271      return;
1272    // Build list of variable captures.
1273    struct VarCapture {
1274      StringRef Name;
1275      SMRange Range;
1276    };
1277    SmallVector<VarCapture, 2> VarCaptures;
1278    for (const auto &VariableDef : VariableDefs) {
1279      VarCapture VC;
1280      VC.Name = VariableDef.first;
1281      StringRef Value = Context->GlobalVariableTable[VC.Name];
1282      SMLoc Start = SMLoc::getFromPointer(Value.data());
1283      SMLoc End = SMLoc::getFromPointer(Value.data() + Value.size());
1284      VC.Range = SMRange(Start, End);
1285      VarCaptures.push_back(VC);
1286    }
1287    for (const auto &VariableDef : NumericVariableDefs) {
1288      VarCapture VC;
1289      VC.Name = VariableDef.getKey();
1290      std::optional<StringRef> StrValue =
1291          VariableDef.getValue().DefinedNumericVariable->getStringValue();
1292      if (!StrValue)
1293        continue;
1294      SMLoc Start = SMLoc::getFromPointer(StrValue->data());
1295      SMLoc End = SMLoc::getFromPointer(StrValue->data() + StrValue->size());
1296      VC.Range = SMRange(Start, End);
1297      VarCaptures.push_back(VC);
1298    }
1299    // Sort variable captures by the order in which they matched the input.
1300    // Ranges shouldn't be overlapping, so we can just compare the start.
1301    llvm::sort(VarCaptures, [](const VarCapture &A, const VarCapture &B) {
1302      if (&A == &B)
1303        return false;
1304      assert(A.Range.Start != B.Range.Start &&
1305             "unexpected overlapping variable captures");
1306      return A.Range.Start.getPointer() < B.Range.Start.getPointer();
1307    });
1308    // Create notes for the sorted captures.
1309    for (const VarCapture &VC : VarCaptures) {
1310      SmallString<256> Msg;
1311      raw_svector_ostream OS(Msg);
1312      OS << "captured var \"" << VC.Name << "\"";
1313      if (Diags)
1314        Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, VC.Range, OS.str());
1315      else
1316        SM.PrintMessage(VC.Range.Start, SourceMgr::DK_Note, OS.str(), VC.Range);
1317    }
1318  }
1319  
1320  static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
1321                                    const SourceMgr &SM, SMLoc Loc,
1322                                    Check::FileCheckType CheckTy,
1323                                    StringRef Buffer, size_t Pos, size_t Len,
1324                                    std::vector<FileCheckDiag> *Diags,
1325                                    bool AdjustPrevDiags = false) {
1326    SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
1327    SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
1328    SMRange Range(Start, End);
1329    if (Diags) {
1330      if (AdjustPrevDiags) {
1331        SMLoc CheckLoc = Diags->rbegin()->CheckLoc;
1332        for (auto I = Diags->rbegin(), E = Diags->rend();
1333             I != E && I->CheckLoc == CheckLoc; ++I)
1334          I->MatchTy = MatchTy;
1335      } else
1336        Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
1337    }
1338    return Range;
1339  }
1340  
1341  void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
1342                                std::vector<FileCheckDiag> *Diags) const {
1343    // Attempt to find the closest/best fuzzy match.  Usually an error happens
1344    // because some string in the output didn't exactly match. In these cases, we
1345    // would like to show the user a best guess at what "should have" matched, to
1346    // save them having to actually check the input manually.
1347    size_t NumLinesForward = 0;
1348    size_t Best = StringRef::npos;
1349    double BestQuality = 0;
1350  
1351    // Use an arbitrary 4k limit on how far we will search.
1352    for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
1353      if (Buffer[i] == '\n')
1354        ++NumLinesForward;
1355  
1356      // Patterns have leading whitespace stripped, so skip whitespace when
1357      // looking for something which looks like a pattern.
1358      if (Buffer[i] == ' ' || Buffer[i] == '\t')
1359        continue;
1360  
1361      // Compute the "quality" of this match as an arbitrary combination of the
1362      // match distance and the number of lines skipped to get to this match.
1363      unsigned Distance = computeMatchDistance(Buffer.substr(i));
1364      double Quality = Distance + (NumLinesForward / 100.);
1365  
1366      if (Quality < BestQuality || Best == StringRef::npos) {
1367        Best = i;
1368        BestQuality = Quality;
1369      }
1370    }
1371  
1372    // Print the "possible intended match here" line if we found something
1373    // reasonable and not equal to what we showed in the "scanning from here"
1374    // line.
1375    if (Best && Best != StringRef::npos && BestQuality < 50) {
1376      SMRange MatchRange =
1377          ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(),
1378                             getCheckTy(), Buffer, Best, 0, Diags);
1379      SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note,
1380                      "possible intended match here");
1381  
1382      // FIXME: If we wanted to be really friendly we would show why the match
1383      // failed, as it can be hard to spot simple one character differences.
1384    }
1385  }
1386  
1387  Expected<StringRef>
1388  FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
1389    auto VarIter = GlobalVariableTable.find(VarName);
1390    if (VarIter == GlobalVariableTable.end())
1391      return make_error<UndefVarError>(VarName);
1392  
1393    return VarIter->second;
1394  }
1395  
1396  template <class... Types>
1397  NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) {
1398    NumericVariables.push_back(std::make_unique<NumericVariable>(args...));
1399    return NumericVariables.back().get();
1400  }
1401  
1402  Substitution *
1403  FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
1404                                                  size_t InsertIdx) {
1405    Substitutions.push_back(
1406        std::make_unique<StringSubstitution>(this, VarName, InsertIdx));
1407    return Substitutions.back().get();
1408  }
1409  
1410  Substitution *FileCheckPatternContext::makeNumericSubstitution(
1411      StringRef ExpressionStr, std::unique_ptr<Expression> Expression,
1412      size_t InsertIdx) {
1413    Substitutions.push_back(std::make_unique<NumericSubstitution>(
1414        this, ExpressionStr, std::move(Expression), InsertIdx));
1415    return Substitutions.back().get();
1416  }
1417  
1418  size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
1419    // Offset keeps track of the current offset within the input Str
1420    size_t Offset = 0;
1421    // [...] Nesting depth
1422    size_t BracketDepth = 0;
1423  
1424    while (!Str.empty()) {
1425      if (Str.startswith("]]") && BracketDepth == 0)
1426        return Offset;
1427      if (Str[0] == '\\') {
1428        // Backslash escapes the next char within regexes, so skip them both.
1429        Str = Str.substr(2);
1430        Offset += 2;
1431      } else {
1432        switch (Str[0]) {
1433        default:
1434          break;
1435        case '[':
1436          BracketDepth++;
1437          break;
1438        case ']':
1439          if (BracketDepth == 0) {
1440            SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
1441                            SourceMgr::DK_Error,
1442                            "missing closing \"]\" for regex variable");
1443            exit(1);
1444          }
1445          BracketDepth--;
1446          break;
1447        }
1448        Str = Str.substr(1);
1449        Offset++;
1450      }
1451    }
1452  
1453    return StringRef::npos;
1454  }
1455  
1456  StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB,
1457                                        SmallVectorImpl<char> &OutputBuffer) {
1458    OutputBuffer.reserve(MB.getBufferSize());
1459  
1460    for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
1461         Ptr != End; ++Ptr) {
1462      // Eliminate trailing dosish \r.
1463      if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
1464        continue;
1465      }
1466  
1467      // If current char is not a horizontal whitespace or if horizontal
1468      // whitespace canonicalization is disabled, dump it to output as is.
1469      if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
1470        OutputBuffer.push_back(*Ptr);
1471        continue;
1472      }
1473  
1474      // Otherwise, add one space and advance over neighboring space.
1475      OutputBuffer.push_back(' ');
1476      while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
1477        ++Ptr;
1478    }
1479  
1480    // Add a null byte and then return all but that byte.
1481    OutputBuffer.push_back('\0');
1482    return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
1483  }
1484  
1485  FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
1486                               const Check::FileCheckType &CheckTy,
1487                               SMLoc CheckLoc, MatchType MatchTy,
1488                               SMRange InputRange, StringRef Note)
1489      : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy), Note(Note) {
1490    auto Start = SM.getLineAndColumn(InputRange.Start);
1491    auto End = SM.getLineAndColumn(InputRange.End);
1492    InputStartLine = Start.first;
1493    InputStartCol = Start.second;
1494    InputEndLine = End.first;
1495    InputEndCol = End.second;
1496  }
1497  
1498  static bool IsPartOfWord(char c) {
1499    return (isAlnum(c) || c == '-' || c == '_');
1500  }
1501  
1502  Check::FileCheckType &Check::FileCheckType::setCount(int C) {
1503    assert(Count > 0 && "zero and negative counts are not supported");
1504    assert((C == 1 || Kind == CheckPlain) &&
1505           "count supported only for plain CHECK directives");
1506    Count = C;
1507    return *this;
1508  }
1509  
1510  std::string Check::FileCheckType::getModifiersDescription() const {
1511    if (Modifiers.none())
1512      return "";
1513    std::string Ret;
1514    raw_string_ostream OS(Ret);
1515    OS << '{';
1516    if (isLiteralMatch())
1517      OS << "LITERAL";
1518    OS << '}';
1519    return OS.str();
1520  }
1521  
1522  std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
1523    // Append directive modifiers.
1524    auto WithModifiers = [this, Prefix](StringRef Str) -> std::string {
1525      return (Prefix + Str + getModifiersDescription()).str();
1526    };
1527  
1528    switch (Kind) {
1529    case Check::CheckNone:
1530      return "invalid";
1531    case Check::CheckMisspelled:
1532      return "misspelled";
1533    case Check::CheckPlain:
1534      if (Count > 1)
1535        return WithModifiers("-COUNT");
1536      return WithModifiers("");
1537    case Check::CheckNext:
1538      return WithModifiers("-NEXT");
1539    case Check::CheckSame:
1540      return WithModifiers("-SAME");
1541    case Check::CheckNot:
1542      return WithModifiers("-NOT");
1543    case Check::CheckDAG:
1544      return WithModifiers("-DAG");
1545    case Check::CheckLabel:
1546      return WithModifiers("-LABEL");
1547    case Check::CheckEmpty:
1548      return WithModifiers("-EMPTY");
1549    case Check::CheckComment:
1550      return std::string(Prefix);
1551    case Check::CheckEOF:
1552      return "implicit EOF";
1553    case Check::CheckBadNot:
1554      return "bad NOT";
1555    case Check::CheckBadCount:
1556      return "bad COUNT";
1557    }
1558    llvm_unreachable("unknown FileCheckType");
1559  }
1560  
1561  static std::pair<Check::FileCheckType, StringRef>
1562  FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix,
1563                bool &Misspelled) {
1564    if (Buffer.size() <= Prefix.size())
1565      return {Check::CheckNone, StringRef()};
1566  
1567    StringRef Rest = Buffer.drop_front(Prefix.size());
1568    // Check for comment.
1569    if (llvm::is_contained(Req.CommentPrefixes, Prefix)) {
1570      if (Rest.consume_front(":"))
1571        return {Check::CheckComment, Rest};
1572      // Ignore a comment prefix if it has a suffix like "-NOT".
1573      return {Check::CheckNone, StringRef()};
1574    }
1575  
1576    auto ConsumeModifiers = [&](Check::FileCheckType Ret)
1577        -> std::pair<Check::FileCheckType, StringRef> {
1578      if (Rest.consume_front(":"))
1579        return {Ret, Rest};
1580      if (!Rest.consume_front("{"))
1581        return {Check::CheckNone, StringRef()};
1582  
1583      // Parse the modifiers, speparated by commas.
1584      do {
1585        // Allow whitespace in modifiers list.
1586        Rest = Rest.ltrim();
1587        if (Rest.consume_front("LITERAL"))
1588          Ret.setLiteralMatch();
1589        else
1590          return {Check::CheckNone, Rest};
1591        // Allow whitespace in modifiers list.
1592        Rest = Rest.ltrim();
1593      } while (Rest.consume_front(","));
1594      if (!Rest.consume_front("}:"))
1595        return {Check::CheckNone, Rest};
1596      return {Ret, Rest};
1597    };
1598  
1599    // Verify that the prefix is followed by directive modifiers or a colon.
1600    if (Rest.consume_front(":"))
1601      return {Check::CheckPlain, Rest};
1602    if (Rest.front() == '{')
1603      return ConsumeModifiers(Check::CheckPlain);
1604  
1605    if (Rest.consume_front("_"))
1606      Misspelled = true;
1607    else if (!Rest.consume_front("-"))
1608      return {Check::CheckNone, StringRef()};
1609  
1610    if (Rest.consume_front("COUNT-")) {
1611      int64_t Count;
1612      if (Rest.consumeInteger(10, Count))
1613        // Error happened in parsing integer.
1614        return {Check::CheckBadCount, Rest};
1615      if (Count <= 0 || Count > INT32_MAX)
1616        return {Check::CheckBadCount, Rest};
1617      if (Rest.front() != ':' && Rest.front() != '{')
1618        return {Check::CheckBadCount, Rest};
1619      return ConsumeModifiers(
1620          Check::FileCheckType(Check::CheckPlain).setCount(Count));
1621    }
1622  
1623    // You can't combine -NOT with another suffix.
1624    if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
1625        Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
1626        Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
1627        Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
1628      return {Check::CheckBadNot, Rest};
1629  
1630    if (Rest.consume_front("NEXT"))
1631      return ConsumeModifiers(Check::CheckNext);
1632  
1633    if (Rest.consume_front("SAME"))
1634      return ConsumeModifiers(Check::CheckSame);
1635  
1636    if (Rest.consume_front("NOT"))
1637      return ConsumeModifiers(Check::CheckNot);
1638  
1639    if (Rest.consume_front("DAG"))
1640      return ConsumeModifiers(Check::CheckDAG);
1641  
1642    if (Rest.consume_front("LABEL"))
1643      return ConsumeModifiers(Check::CheckLabel);
1644  
1645    if (Rest.consume_front("EMPTY"))
1646      return ConsumeModifiers(Check::CheckEmpty);
1647  
1648    return {Check::CheckNone, Rest};
1649  }
1650  
1651  static std::pair<Check::FileCheckType, StringRef>
1652  FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
1653    bool Misspelled = false;
1654    auto Res = FindCheckType(Req, Buffer, Prefix, Misspelled);
1655    if (Res.first != Check::CheckNone && Misspelled)
1656      return {Check::CheckMisspelled, Res.second};
1657    return Res;
1658  }
1659  
1660  // From the given position, find the next character after the word.
1661  static size_t SkipWord(StringRef Str, size_t Loc) {
1662    while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
1663      ++Loc;
1664    return Loc;
1665  }
1666  
1667  /// Searches the buffer for the first prefix in the prefix regular expression.
1668  ///
1669  /// This searches the buffer using the provided regular expression, however it
1670  /// enforces constraints beyond that:
1671  /// 1) The found prefix must not be a suffix of something that looks like
1672  ///    a valid prefix.
1673  /// 2) The found prefix must be followed by a valid check type suffix using \c
1674  ///    FindCheckType above.
1675  ///
1676  /// \returns a pair of StringRefs into the Buffer, which combines:
1677  ///   - the first match of the regular expression to satisfy these two is
1678  ///   returned,
1679  ///     otherwise an empty StringRef is returned to indicate failure.
1680  ///   - buffer rewound to the location right after parsed suffix, for parsing
1681  ///     to continue from
1682  ///
1683  /// If this routine returns a valid prefix, it will also shrink \p Buffer to
1684  /// start at the beginning of the returned prefix, increment \p LineNumber for
1685  /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
1686  /// check found by examining the suffix.
1687  ///
1688  /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
1689  /// is unspecified.
1690  static std::pair<StringRef, StringRef>
1691  FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE,
1692                          StringRef &Buffer, unsigned &LineNumber,
1693                          Check::FileCheckType &CheckTy) {
1694    SmallVector<StringRef, 2> Matches;
1695  
1696    while (!Buffer.empty()) {
1697      // Find the first (longest) match using the RE.
1698      if (!PrefixRE.match(Buffer, &Matches))
1699        // No match at all, bail.
1700        return {StringRef(), StringRef()};
1701  
1702      StringRef Prefix = Matches[0];
1703      Matches.clear();
1704  
1705      assert(Prefix.data() >= Buffer.data() &&
1706             Prefix.data() < Buffer.data() + Buffer.size() &&
1707             "Prefix doesn't start inside of buffer!");
1708      size_t Loc = Prefix.data() - Buffer.data();
1709      StringRef Skipped = Buffer.substr(0, Loc);
1710      Buffer = Buffer.drop_front(Loc);
1711      LineNumber += Skipped.count('\n');
1712  
1713      // Check that the matched prefix isn't a suffix of some other check-like
1714      // word.
1715      // FIXME: This is a very ad-hoc check. it would be better handled in some
1716      // other way. Among other things it seems hard to distinguish between
1717      // intentional and unintentional uses of this feature.
1718      if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
1719        // Now extract the type.
1720        StringRef AfterSuffix;
1721        std::tie(CheckTy, AfterSuffix) = FindCheckType(Req, Buffer, Prefix);
1722  
1723        // If we've found a valid check type for this prefix, we're done.
1724        if (CheckTy != Check::CheckNone)
1725          return {Prefix, AfterSuffix};
1726      }
1727  
1728      // If we didn't successfully find a prefix, we need to skip this invalid
1729      // prefix and continue scanning. We directly skip the prefix that was
1730      // matched and any additional parts of that check-like word.
1731      Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
1732    }
1733  
1734    // We ran out of buffer while skipping partial matches so give up.
1735    return {StringRef(), StringRef()};
1736  }
1737  
1738  void FileCheckPatternContext::createLineVariable() {
1739    assert(!LineVariable && "@LINE pseudo numeric variable already created");
1740    StringRef LineName = "@LINE";
1741    LineVariable = makeNumericVariable(
1742        LineName, ExpressionFormat(ExpressionFormat::Kind::Unsigned));
1743    GlobalNumericVariableTable[LineName] = LineVariable;
1744  }
1745  
1746  FileCheck::FileCheck(FileCheckRequest Req)
1747      : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()),
1748        CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {}
1749  
1750  FileCheck::~FileCheck() = default;
1751  
1752  bool FileCheck::readCheckFile(
1753      SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
1754      std::pair<unsigned, unsigned> *ImpPatBufferIDRange) {
1755    if (ImpPatBufferIDRange)
1756      ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0;
1757  
1758    Error DefineError =
1759        PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM);
1760    if (DefineError) {
1761      logAllUnhandledErrors(std::move(DefineError), errs());
1762      return true;
1763    }
1764  
1765    PatternContext->createLineVariable();
1766  
1767    std::vector<Pattern> ImplicitNegativeChecks;
1768    for (StringRef PatternString : Req.ImplicitCheckNot) {
1769      // Create a buffer with fake command line content in order to display the
1770      // command line option responsible for the specific implicit CHECK-NOT.
1771      std::string Prefix = "-implicit-check-not='";
1772      std::string Suffix = "'";
1773      std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
1774          (Prefix + PatternString + Suffix).str(), "command line");
1775  
1776      StringRef PatternInBuffer =
1777          CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
1778      unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
1779      if (ImpPatBufferIDRange) {
1780        if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) {
1781          ImpPatBufferIDRange->first = BufferID;
1782          ImpPatBufferIDRange->second = BufferID + 1;
1783        } else {
1784          assert(BufferID == ImpPatBufferIDRange->second &&
1785                 "expected consecutive source buffer IDs");
1786          ++ImpPatBufferIDRange->second;
1787        }
1788      }
1789  
1790      ImplicitNegativeChecks.push_back(
1791          Pattern(Check::CheckNot, PatternContext.get()));
1792      ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
1793                                                 "IMPLICIT-CHECK", SM, Req);
1794    }
1795  
1796    std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
1797  
1798    // LineNumber keeps track of the line on which CheckPrefix instances are
1799    // found.
1800    unsigned LineNumber = 1;
1801  
1802    std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(),
1803                                         Req.CheckPrefixes.end());
1804    const size_t DistinctPrefixes = PrefixesNotFound.size();
1805    while (true) {
1806      Check::FileCheckType CheckTy;
1807  
1808      // See if a prefix occurs in the memory buffer.
1809      StringRef UsedPrefix;
1810      StringRef AfterSuffix;
1811      std::tie(UsedPrefix, AfterSuffix) =
1812          FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy);
1813      if (UsedPrefix.empty())
1814        break;
1815      if (CheckTy != Check::CheckComment)
1816        PrefixesNotFound.erase(UsedPrefix);
1817  
1818      assert(UsedPrefix.data() == Buffer.data() &&
1819             "Failed to move Buffer's start forward, or pointed prefix outside "
1820             "of the buffer!");
1821      assert(AfterSuffix.data() >= Buffer.data() &&
1822             AfterSuffix.data() < Buffer.data() + Buffer.size() &&
1823             "Parsing after suffix doesn't start inside of buffer!");
1824  
1825      // Location to use for error messages.
1826      const char *UsedPrefixStart = UsedPrefix.data();
1827  
1828      // Skip the buffer to the end of parsed suffix (or just prefix, if no good
1829      // suffix was processed).
1830      Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
1831                                   : AfterSuffix;
1832  
1833      // Complain about misspelled directives.
1834      if (CheckTy == Check::CheckMisspelled) {
1835        StringRef UsedDirective(UsedPrefix.data(),
1836                                AfterSuffix.data() - UsedPrefix.data());
1837        SM.PrintMessage(SMLoc::getFromPointer(UsedDirective.data()),
1838                        SourceMgr::DK_Error,
1839                        "misspelled directive '" + UsedDirective + "'");
1840        return true;
1841      }
1842  
1843      // Complain about useful-looking but unsupported suffixes.
1844      if (CheckTy == Check::CheckBadNot) {
1845        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
1846                        "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
1847        return true;
1848      }
1849  
1850      // Complain about invalid count specification.
1851      if (CheckTy == Check::CheckBadCount) {
1852        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
1853                        "invalid count in -COUNT specification on prefix '" +
1854                            UsedPrefix + "'");
1855        return true;
1856      }
1857  
1858      // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
1859      // leading whitespace.
1860      if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
1861        Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
1862  
1863      // Scan ahead to the end of line.
1864      size_t EOL = Buffer.find_first_of("\n\r");
1865  
1866      // Remember the location of the start of the pattern, for diagnostics.
1867      SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
1868  
1869      // Extract the pattern from the buffer.
1870      StringRef PatternBuffer = Buffer.substr(0, EOL);
1871      Buffer = Buffer.substr(EOL);
1872  
1873      // If this is a comment, we're done.
1874      if (CheckTy == Check::CheckComment)
1875        continue;
1876  
1877      // Parse the pattern.
1878      Pattern P(CheckTy, PatternContext.get(), LineNumber);
1879      if (P.parsePattern(PatternBuffer, UsedPrefix, SM, Req))
1880        return true;
1881  
1882      // Verify that CHECK-LABEL lines do not define or use variables
1883      if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
1884        SM.PrintMessage(
1885            SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
1886            "found '" + UsedPrefix + "-LABEL:'"
1887                                     " with variable definition or use");
1888        return true;
1889      }
1890  
1891      // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
1892      if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
1893           CheckTy == Check::CheckEmpty) &&
1894          CheckStrings->empty()) {
1895        StringRef Type = CheckTy == Check::CheckNext
1896                             ? "NEXT"
1897                             : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
1898        SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
1899                        SourceMgr::DK_Error,
1900                        "found '" + UsedPrefix + "-" + Type +
1901                            "' without previous '" + UsedPrefix + ": line");
1902        return true;
1903      }
1904  
1905      // Handle CHECK-DAG/-NOT.
1906      if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
1907        DagNotMatches.push_back(P);
1908        continue;
1909      }
1910  
1911      // Okay, add the string we captured to the output vector and move on.
1912      CheckStrings->emplace_back(P, UsedPrefix, PatternLoc);
1913      std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
1914      DagNotMatches = ImplicitNegativeChecks;
1915    }
1916  
1917    // When there are no used prefixes we report an error except in the case that
1918    // no prefix is specified explicitly but -implicit-check-not is specified.
1919    const bool NoPrefixesFound = PrefixesNotFound.size() == DistinctPrefixes;
1920    const bool SomePrefixesUnexpectedlyNotUsed =
1921        !Req.AllowUnusedPrefixes && !PrefixesNotFound.empty();
1922    if ((NoPrefixesFound || SomePrefixesUnexpectedlyNotUsed) &&
1923        (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) {
1924      errs() << "error: no check strings found with prefix"
1925             << (PrefixesNotFound.size() > 1 ? "es " : " ");
1926      bool First = true;
1927      for (StringRef MissingPrefix : PrefixesNotFound) {
1928        if (!First)
1929          errs() << ", ";
1930        errs() << "\'" << MissingPrefix << ":'";
1931        First = false;
1932      }
1933      errs() << '\n';
1934      return true;
1935    }
1936  
1937    // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs,
1938    // and use the first prefix as a filler for the error message.
1939    if (!DagNotMatches.empty()) {
1940      CheckStrings->emplace_back(
1941          Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
1942          *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
1943      std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
1944    }
1945  
1946    return false;
1947  }
1948  
1949  /// Returns either (1) \c ErrorSuccess if there was no error or (2)
1950  /// \c ErrorReported if an error was reported, such as an unexpected match.
1951  static Error printMatch(bool ExpectedMatch, const SourceMgr &SM,
1952                          StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1953                          int MatchedCount, StringRef Buffer,
1954                          Pattern::MatchResult MatchResult,
1955                          const FileCheckRequest &Req,
1956                          std::vector<FileCheckDiag> *Diags) {
1957    // Suppress some verbosity if there's no error.
1958    bool HasError = !ExpectedMatch || MatchResult.TheError;
1959    bool PrintDiag = true;
1960    if (!HasError) {
1961      if (!Req.Verbose)
1962        return ErrorReported::reportedOrSuccess(HasError);
1963      if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
1964        return ErrorReported::reportedOrSuccess(HasError);
1965      // Due to their verbosity, we don't print verbose diagnostics here if we're
1966      // gathering them for Diags to be rendered elsewhere, but we always print
1967      // other diagnostics.
1968      PrintDiag = !Diags;
1969    }
1970  
1971    // Add "found" diagnostic, substitutions, and variable definitions to Diags.
1972    FileCheckDiag::MatchType MatchTy = ExpectedMatch
1973                                           ? FileCheckDiag::MatchFoundAndExpected
1974                                           : FileCheckDiag::MatchFoundButExcluded;
1975    SMRange MatchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(),
1976                                            Buffer, MatchResult.TheMatch->Pos,
1977                                            MatchResult.TheMatch->Len, Diags);
1978    if (Diags) {
1979      Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, Diags);
1980      Pat.printVariableDefs(SM, MatchTy, Diags);
1981    }
1982    if (!PrintDiag) {
1983      assert(!HasError && "expected to report more diagnostics for error");
1984      return ErrorReported::reportedOrSuccess(HasError);
1985    }
1986  
1987    // Print the match.
1988    std::string Message = formatv("{0}: {1} string found in input",
1989                                  Pat.getCheckTy().getDescription(Prefix),
1990                                  (ExpectedMatch ? "expected" : "excluded"))
1991                              .str();
1992    if (Pat.getCount() > 1)
1993      Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1994    SM.PrintMessage(
1995        Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
1996    SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
1997                    {MatchRange});
1998  
1999    // Print additional information, which can be useful even if there are errors.
2000    Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, nullptr);
2001    Pat.printVariableDefs(SM, MatchTy, nullptr);
2002  
2003    // Print errors and add them to Diags.  We report these errors after the match
2004    // itself because we found them after the match.  If we had found them before
2005    // the match, we'd be in printNoMatch.
2006    handleAllErrors(std::move(MatchResult.TheError),
2007                    [&](const ErrorDiagnostic &E) {
2008                      E.log(errs());
2009                      if (Diags) {
2010                        Diags->emplace_back(SM, Pat.getCheckTy(), Loc,
2011                                            FileCheckDiag::MatchFoundErrorNote,
2012                                            E.getRange(), E.getMessage().str());
2013                      }
2014                    });
2015    return ErrorReported::reportedOrSuccess(HasError);
2016  }
2017  
2018  /// Returns either (1) \c ErrorSuccess if there was no error, or (2)
2019  /// \c ErrorReported if an error was reported, such as an expected match not
2020  /// found.
2021  static Error printNoMatch(bool ExpectedMatch, const SourceMgr &SM,
2022                            StringRef Prefix, SMLoc Loc, const Pattern &Pat,
2023                            int MatchedCount, StringRef Buffer, Error MatchError,
2024                            bool VerboseVerbose,
2025                            std::vector<FileCheckDiag> *Diags) {
2026    // Print any pattern errors, and record them to be added to Diags later.
2027    bool HasError = ExpectedMatch;
2028    bool HasPatternError = false;
2029    FileCheckDiag::MatchType MatchTy = ExpectedMatch
2030                                           ? FileCheckDiag::MatchNoneButExpected
2031                                           : FileCheckDiag::MatchNoneAndExcluded;
2032    SmallVector<std::string, 4> ErrorMsgs;
2033    handleAllErrors(
2034        std::move(MatchError),
2035        [&](const ErrorDiagnostic &E) {
2036          HasError = HasPatternError = true;
2037          MatchTy = FileCheckDiag::MatchNoneForInvalidPattern;
2038          E.log(errs());
2039          if (Diags)
2040            ErrorMsgs.push_back(E.getMessage().str());
2041        },
2042        // NotFoundError is why printNoMatch was invoked.
2043        [](const NotFoundError &E) {});
2044  
2045    // Suppress some verbosity if there's no error.
2046    bool PrintDiag = true;
2047    if (!HasError) {
2048      if (!VerboseVerbose)
2049        return ErrorReported::reportedOrSuccess(HasError);
2050      // Due to their verbosity, we don't print verbose diagnostics here if we're
2051      // gathering them for Diags to be rendered elsewhere, but we always print
2052      // other diagnostics.
2053      PrintDiag = !Diags;
2054    }
2055  
2056    // Add "not found" diagnostic, substitutions, and pattern errors to Diags.
2057    //
2058    // We handle Diags a little differently than the errors we print directly:
2059    // we add the "not found" diagnostic to Diags even if there are pattern
2060    // errors.  The reason is that we need to attach pattern errors as notes
2061    // somewhere in the input, and the input search range from the "not found"
2062    // diagnostic is all we have to anchor them.
2063    SMRange SearchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(),
2064                                             Buffer, 0, Buffer.size(), Diags);
2065    if (Diags) {
2066      SMRange NoteRange = SMRange(SearchRange.Start, SearchRange.Start);
2067      for (StringRef ErrorMsg : ErrorMsgs)
2068        Diags->emplace_back(SM, Pat.getCheckTy(), Loc, MatchTy, NoteRange,
2069                            ErrorMsg);
2070      Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, Diags);
2071    }
2072    if (!PrintDiag) {
2073      assert(!HasError && "expected to report more diagnostics for error");
2074      return ErrorReported::reportedOrSuccess(HasError);
2075    }
2076  
2077    // Print "not found" diagnostic, except that's implied if we already printed a
2078    // pattern error.
2079    if (!HasPatternError) {
2080      std::string Message = formatv("{0}: {1} string not found in input",
2081                                    Pat.getCheckTy().getDescription(Prefix),
2082                                    (ExpectedMatch ? "expected" : "excluded"))
2083                                .str();
2084      if (Pat.getCount() > 1)
2085        Message +=
2086            formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
2087      SM.PrintMessage(Loc,
2088                      ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark,
2089                      Message);
2090      SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note,
2091                      "scanning from here");
2092    }
2093  
2094    // Print additional information, which can be useful even after a pattern
2095    // error.
2096    Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, nullptr);
2097    if (ExpectedMatch)
2098      Pat.printFuzzyMatch(SM, Buffer, Diags);
2099    return ErrorReported::reportedOrSuccess(HasError);
2100  }
2101  
2102  /// Returns either (1) \c ErrorSuccess if there was no error, or (2)
2103  /// \c ErrorReported if an error was reported.
2104  static Error reportMatchResult(bool ExpectedMatch, const SourceMgr &SM,
2105                                 StringRef Prefix, SMLoc Loc, const Pattern &Pat,
2106                                 int MatchedCount, StringRef Buffer,
2107                                 Pattern::MatchResult MatchResult,
2108                                 const FileCheckRequest &Req,
2109                                 std::vector<FileCheckDiag> *Diags) {
2110    if (MatchResult.TheMatch)
2111      return printMatch(ExpectedMatch, SM, Prefix, Loc, Pat, MatchedCount, Buffer,
2112                        std::move(MatchResult), Req, Diags);
2113    return printNoMatch(ExpectedMatch, SM, Prefix, Loc, Pat, MatchedCount, Buffer,
2114                        std::move(MatchResult.TheError), Req.VerboseVerbose,
2115                        Diags);
2116  }
2117  
2118  /// Counts the number of newlines in the specified range.
2119  static unsigned CountNumNewlinesBetween(StringRef Range,
2120                                          const char *&FirstNewLine) {
2121    unsigned NumNewLines = 0;
2122    while (true) {
2123      // Scan for newline.
2124      Range = Range.substr(Range.find_first_of("\n\r"));
2125      if (Range.empty())
2126        return NumNewLines;
2127  
2128      ++NumNewLines;
2129  
2130      // Handle \n\r and \r\n as a single newline.
2131      if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
2132          (Range[0] != Range[1]))
2133        Range = Range.substr(1);
2134      Range = Range.substr(1);
2135  
2136      if (NumNewLines == 1)
2137        FirstNewLine = Range.begin();
2138    }
2139  }
2140  
2141  size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
2142                                bool IsLabelScanMode, size_t &MatchLen,
2143                                FileCheckRequest &Req,
2144                                std::vector<FileCheckDiag> *Diags) const {
2145    size_t LastPos = 0;
2146    std::vector<const Pattern *> NotStrings;
2147  
2148    // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
2149    // bounds; we have not processed variable definitions within the bounded block
2150    // yet so cannot handle any final CHECK-DAG yet; this is handled when going
2151    // over the block again (including the last CHECK-LABEL) in normal mode.
2152    if (!IsLabelScanMode) {
2153      // Match "dag strings" (with mixed "not strings" if any).
2154      LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags);
2155      if (LastPos == StringRef::npos)
2156        return StringRef::npos;
2157    }
2158  
2159    // Match itself from the last position after matching CHECK-DAG.
2160    size_t LastMatchEnd = LastPos;
2161    size_t FirstMatchPos = 0;
2162    // Go match the pattern Count times. Majority of patterns only match with
2163    // count 1 though.
2164    assert(Pat.getCount() != 0 && "pattern count can not be zero");
2165    for (int i = 1; i <= Pat.getCount(); i++) {
2166      StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
2167      // get a match at current start point
2168      Pattern::MatchResult MatchResult = Pat.match(MatchBuffer, SM);
2169  
2170      // report
2171      if (Error Err = reportMatchResult(/*ExpectedMatch=*/true, SM, Prefix, Loc,
2172                                        Pat, i, MatchBuffer,
2173                                        std::move(MatchResult), Req, Diags)) {
2174        cantFail(handleErrors(std::move(Err), [&](const ErrorReported &E) {}));
2175        return StringRef::npos;
2176      }
2177  
2178      size_t MatchPos = MatchResult.TheMatch->Pos;
2179      if (i == 1)
2180        FirstMatchPos = LastPos + MatchPos;
2181  
2182      // move start point after the match
2183      LastMatchEnd += MatchPos + MatchResult.TheMatch->Len;
2184    }
2185    // Full match len counts from first match pos.
2186    MatchLen = LastMatchEnd - FirstMatchPos;
2187  
2188    // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
2189    // or CHECK-NOT
2190    if (!IsLabelScanMode) {
2191      size_t MatchPos = FirstMatchPos - LastPos;
2192      StringRef MatchBuffer = Buffer.substr(LastPos);
2193      StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
2194  
2195      // If this check is a "CHECK-NEXT", verify that the previous match was on
2196      // the previous line (i.e. that there is one newline between them).
2197      if (CheckNext(SM, SkippedRegion)) {
2198        ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
2199                           Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
2200                           Diags, Req.Verbose);
2201        return StringRef::npos;
2202      }
2203  
2204      // If this check is a "CHECK-SAME", verify that the previous match was on
2205      // the same line (i.e. that there is no newline between them).
2206      if (CheckSame(SM, SkippedRegion)) {
2207        ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
2208                           Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
2209                           Diags, Req.Verbose);
2210        return StringRef::npos;
2211      }
2212  
2213      // If this match had "not strings", verify that they don't exist in the
2214      // skipped region.
2215      if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
2216        return StringRef::npos;
2217    }
2218  
2219    return FirstMatchPos;
2220  }
2221  
2222  bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
2223    if (Pat.getCheckTy() != Check::CheckNext &&
2224        Pat.getCheckTy() != Check::CheckEmpty)
2225      return false;
2226  
2227    Twine CheckName =
2228        Prefix +
2229        Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
2230  
2231    // Count the number of newlines between the previous match and this one.
2232    const char *FirstNewLine = nullptr;
2233    unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
2234  
2235    if (NumNewLines == 0) {
2236      SM.PrintMessage(Loc, SourceMgr::DK_Error,
2237                      CheckName + ": is on the same line as previous match");
2238      SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
2239                      "'next' match was here");
2240      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
2241                      "previous match ended here");
2242      return true;
2243    }
2244  
2245    if (NumNewLines != 1) {
2246      SM.PrintMessage(Loc, SourceMgr::DK_Error,
2247                      CheckName +
2248                          ": is not on the line after the previous match");
2249      SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
2250                      "'next' match was here");
2251      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
2252                      "previous match ended here");
2253      SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
2254                      "non-matching line after previous match is here");
2255      return true;
2256    }
2257  
2258    return false;
2259  }
2260  
2261  bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
2262    if (Pat.getCheckTy() != Check::CheckSame)
2263      return false;
2264  
2265    // Count the number of newlines between the previous match and this one.
2266    const char *FirstNewLine = nullptr;
2267    unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
2268  
2269    if (NumNewLines != 0) {
2270      SM.PrintMessage(Loc, SourceMgr::DK_Error,
2271                      Prefix +
2272                          "-SAME: is not on the same line as the previous match");
2273      SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
2274                      "'next' match was here");
2275      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
2276                      "previous match ended here");
2277      return true;
2278    }
2279  
2280    return false;
2281  }
2282  
2283  bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
2284                                 const std::vector<const Pattern *> &NotStrings,
2285                                 const FileCheckRequest &Req,
2286                                 std::vector<FileCheckDiag> *Diags) const {
2287    bool DirectiveFail = false;
2288    for (const Pattern *Pat : NotStrings) {
2289      assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
2290      Pattern::MatchResult MatchResult = Pat->match(Buffer, SM);
2291      if (Error Err = reportMatchResult(/*ExpectedMatch=*/false, SM, Prefix,
2292                                        Pat->getLoc(), *Pat, 1, Buffer,
2293                                        std::move(MatchResult), Req, Diags)) {
2294        cantFail(handleErrors(std::move(Err), [&](const ErrorReported &E) {}));
2295        DirectiveFail = true;
2296        continue;
2297      }
2298    }
2299    return DirectiveFail;
2300  }
2301  
2302  size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
2303                                   std::vector<const Pattern *> &NotStrings,
2304                                   const FileCheckRequest &Req,
2305                                   std::vector<FileCheckDiag> *Diags) const {
2306    if (DagNotStrings.empty())
2307      return 0;
2308  
2309    // The start of the search range.
2310    size_t StartPos = 0;
2311  
2312    struct MatchRange {
2313      size_t Pos;
2314      size_t End;
2315    };
2316    // A sorted list of ranges for non-overlapping CHECK-DAG matches.  Match
2317    // ranges are erased from this list once they are no longer in the search
2318    // range.
2319    std::list<MatchRange> MatchRanges;
2320  
2321    // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
2322    // group, so we don't use a range-based for loop here.
2323    for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
2324         PatItr != PatEnd; ++PatItr) {
2325      const Pattern &Pat = *PatItr;
2326      assert((Pat.getCheckTy() == Check::CheckDAG ||
2327              Pat.getCheckTy() == Check::CheckNot) &&
2328             "Invalid CHECK-DAG or CHECK-NOT!");
2329  
2330      if (Pat.getCheckTy() == Check::CheckNot) {
2331        NotStrings.push_back(&Pat);
2332        continue;
2333      }
2334  
2335      assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
2336  
2337      // CHECK-DAG always matches from the start.
2338      size_t MatchLen = 0, MatchPos = StartPos;
2339  
2340      // Search for a match that doesn't overlap a previous match in this
2341      // CHECK-DAG group.
2342      for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
2343        StringRef MatchBuffer = Buffer.substr(MatchPos);
2344        Pattern::MatchResult MatchResult = Pat.match(MatchBuffer, SM);
2345        // With a group of CHECK-DAGs, a single mismatching means the match on
2346        // that group of CHECK-DAGs fails immediately.
2347        if (MatchResult.TheError || Req.VerboseVerbose) {
2348          if (Error Err = reportMatchResult(/*ExpectedMatch=*/true, SM, Prefix,
2349                                            Pat.getLoc(), Pat, 1, MatchBuffer,
2350                                            std::move(MatchResult), Req, Diags)) {
2351            cantFail(
2352                handleErrors(std::move(Err), [&](const ErrorReported &E) {}));
2353            return StringRef::npos;
2354          }
2355        }
2356        MatchLen = MatchResult.TheMatch->Len;
2357        // Re-calc it as the offset relative to the start of the original
2358        // string.
2359        MatchPos += MatchResult.TheMatch->Pos;
2360        MatchRange M{MatchPos, MatchPos + MatchLen};
2361        if (Req.AllowDeprecatedDagOverlap) {
2362          // We don't need to track all matches in this mode, so we just maintain
2363          // one match range that encompasses the current CHECK-DAG group's
2364          // matches.
2365          if (MatchRanges.empty())
2366            MatchRanges.insert(MatchRanges.end(), M);
2367          else {
2368            auto Block = MatchRanges.begin();
2369            Block->Pos = std::min(Block->Pos, M.Pos);
2370            Block->End = std::max(Block->End, M.End);
2371          }
2372          break;
2373        }
2374        // Iterate previous matches until overlapping match or insertion point.
2375        bool Overlap = false;
2376        for (; MI != ME; ++MI) {
2377          if (M.Pos < MI->End) {
2378            // !Overlap => New match has no overlap and is before this old match.
2379            // Overlap => New match overlaps this old match.
2380            Overlap = MI->Pos < M.End;
2381            break;
2382          }
2383        }
2384        if (!Overlap) {
2385          // Insert non-overlapping match into list.
2386          MatchRanges.insert(MI, M);
2387          break;
2388        }
2389        if (Req.VerboseVerbose) {
2390          // Due to their verbosity, we don't print verbose diagnostics here if
2391          // we're gathering them for a different rendering, but we always print
2392          // other diagnostics.
2393          if (!Diags) {
2394            SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
2395            SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
2396            SMRange OldRange(OldStart, OldEnd);
2397            SM.PrintMessage(OldStart, SourceMgr::DK_Note,
2398                            "match discarded, overlaps earlier DAG match here",
2399                            {OldRange});
2400          } else {
2401            SMLoc CheckLoc = Diags->rbegin()->CheckLoc;
2402            for (auto I = Diags->rbegin(), E = Diags->rend();
2403                 I != E && I->CheckLoc == CheckLoc; ++I)
2404              I->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
2405          }
2406        }
2407        MatchPos = MI->End;
2408      }
2409      if (!Req.VerboseVerbose)
2410        cantFail(printMatch(
2411            /*ExpectedMatch=*/true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer,
2412            Pattern::MatchResult(MatchPos, MatchLen, Error::success()), Req,
2413            Diags));
2414  
2415      // Handle the end of a CHECK-DAG group.
2416      if (std::next(PatItr) == PatEnd ||
2417          std::next(PatItr)->getCheckTy() == Check::CheckNot) {
2418        if (!NotStrings.empty()) {
2419          // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
2420          // CHECK-DAG, verify that there are no 'not' strings occurred in that
2421          // region.
2422          StringRef SkippedRegion =
2423              Buffer.slice(StartPos, MatchRanges.begin()->Pos);
2424          if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
2425            return StringRef::npos;
2426          // Clear "not strings".
2427          NotStrings.clear();
2428        }
2429        // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
2430        // end of this CHECK-DAG group's match range.
2431        StartPos = MatchRanges.rbegin()->End;
2432        // Don't waste time checking for (impossible) overlaps before that.
2433        MatchRanges.clear();
2434      }
2435    }
2436  
2437    return StartPos;
2438  }
2439  
2440  static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes,
2441                               ArrayRef<StringRef> SuppliedPrefixes) {
2442    for (StringRef Prefix : SuppliedPrefixes) {
2443      if (Prefix.empty()) {
2444        errs() << "error: supplied " << Kind << " prefix must not be the empty "
2445               << "string\n";
2446        return false;
2447      }
2448      static const Regex Validator("^[a-zA-Z0-9_-]*$");
2449      if (!Validator.match(Prefix)) {
2450        errs() << "error: supplied " << Kind << " prefix must start with a "
2451               << "letter and contain only alphanumeric characters, hyphens, and "
2452               << "underscores: '" << Prefix << "'\n";
2453        return false;
2454      }
2455      if (!UniquePrefixes.insert(Prefix).second) {
2456        errs() << "error: supplied " << Kind << " prefix must be unique among "
2457               << "check and comment prefixes: '" << Prefix << "'\n";
2458        return false;
2459      }
2460    }
2461    return true;
2462  }
2463  
2464  static const char *DefaultCheckPrefixes[] = {"CHECK"};
2465  static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};
2466  
2467  bool FileCheck::ValidateCheckPrefixes() {
2468    StringSet<> UniquePrefixes;
2469    // Add default prefixes to catch user-supplied duplicates of them below.
2470    if (Req.CheckPrefixes.empty()) {
2471      for (const char *Prefix : DefaultCheckPrefixes)
2472        UniquePrefixes.insert(Prefix);
2473    }
2474    if (Req.CommentPrefixes.empty()) {
2475      for (const char *Prefix : DefaultCommentPrefixes)
2476        UniquePrefixes.insert(Prefix);
2477    }
2478    // Do not validate the default prefixes, or diagnostics about duplicates might
2479    // incorrectly indicate that they were supplied by the user.
2480    if (!ValidatePrefixes("check", UniquePrefixes, Req.CheckPrefixes))
2481      return false;
2482    if (!ValidatePrefixes("comment", UniquePrefixes, Req.CommentPrefixes))
2483      return false;
2484    return true;
2485  }
2486  
2487  Regex FileCheck::buildCheckPrefixRegex() {
2488    if (Req.CheckPrefixes.empty()) {
2489      for (const char *Prefix : DefaultCheckPrefixes)
2490        Req.CheckPrefixes.push_back(Prefix);
2491      Req.IsDefaultCheckPrefix = true;
2492    }
2493    if (Req.CommentPrefixes.empty()) {
2494      for (const char *Prefix : DefaultCommentPrefixes)
2495        Req.CommentPrefixes.push_back(Prefix);
2496    }
2497  
2498    // We already validated the contents of CheckPrefixes and CommentPrefixes so
2499    // just concatenate them as alternatives.
2500    SmallString<32> PrefixRegexStr;
2501    for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) {
2502      if (I != 0)
2503        PrefixRegexStr.push_back('|');
2504      PrefixRegexStr.append(Req.CheckPrefixes[I]);
2505    }
2506    for (StringRef Prefix : Req.CommentPrefixes) {
2507      PrefixRegexStr.push_back('|');
2508      PrefixRegexStr.append(Prefix);
2509    }
2510  
2511    return Regex(PrefixRegexStr);
2512  }
2513  
2514  Error FileCheckPatternContext::defineCmdlineVariables(
2515      ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) {
2516    assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&
2517           "Overriding defined variable with command-line variable definitions");
2518  
2519    if (CmdlineDefines.empty())
2520      return Error::success();
2521  
2522    // Create a string representing the vector of command-line definitions. Each
2523    // definition is on its own line and prefixed with a definition number to
2524    // clarify which definition a given diagnostic corresponds to.
2525    unsigned I = 0;
2526    Error Errs = Error::success();
2527    std::string CmdlineDefsDiag;
2528    SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices;
2529    for (StringRef CmdlineDef : CmdlineDefines) {
2530      std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str();
2531      size_t EqIdx = CmdlineDef.find('=');
2532      if (EqIdx == StringRef::npos) {
2533        CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0));
2534        continue;
2535      }
2536      // Numeric variable definition.
2537      if (CmdlineDef[0] == '#') {
2538        // Append a copy of the command-line definition adapted to use the same
2539        // format as in the input file to be able to reuse
2540        // parseNumericSubstitutionBlock.
2541        CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str();
2542        std::string SubstitutionStr = std::string(CmdlineDef);
2543        SubstitutionStr[EqIdx] = ':';
2544        CmdlineDefsIndices.push_back(
2545            std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size()));
2546        CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str();
2547      } else {
2548        CmdlineDefsDiag += DefPrefix;
2549        CmdlineDefsIndices.push_back(
2550            std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size()));
2551        CmdlineDefsDiag += (CmdlineDef + "\n").str();
2552      }
2553    }
2554  
2555    // Create a buffer with fake command line content in order to display
2556    // parsing diagnostic with location information and point to the
2557    // global definition with invalid syntax.
2558    std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer =
2559        MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines");
2560    StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
2561    SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
2562  
2563    for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) {
2564      StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first,
2565                                                       CmdlineDefIndices.second);
2566      if (CmdlineDef.empty()) {
2567        Errs = joinErrors(
2568            std::move(Errs),
2569            ErrorDiagnostic::get(SM, CmdlineDef,
2570                                 "missing equal sign in global definition"));
2571        continue;
2572      }
2573  
2574      // Numeric variable definition.
2575      if (CmdlineDef[0] == '#') {
2576        // Now parse the definition both to check that the syntax is correct and
2577        // to create the necessary class instance.
2578        StringRef CmdlineDefExpr = CmdlineDef.substr(1);
2579        std::optional<NumericVariable *> DefinedNumericVariable;
2580        Expected<std::unique_ptr<Expression>> ExpressionResult =
2581            Pattern::parseNumericSubstitutionBlock(CmdlineDefExpr,
2582                                                   DefinedNumericVariable, false,
2583                                                   std::nullopt, this, SM);
2584        if (!ExpressionResult) {
2585          Errs = joinErrors(std::move(Errs), ExpressionResult.takeError());
2586          continue;
2587        }
2588        std::unique_ptr<Expression> Expression = std::move(*ExpressionResult);
2589        // Now evaluate the expression whose value this variable should be set
2590        // to, since the expression of a command-line variable definition should
2591        // only use variables defined earlier on the command-line. If not, this
2592        // is an error and we report it.
2593        Expected<ExpressionValue> Value = Expression->getAST()->eval();
2594        if (!Value) {
2595          Errs = joinErrors(std::move(Errs), Value.takeError());
2596          continue;
2597        }
2598  
2599        assert(DefinedNumericVariable && "No variable defined");
2600        (*DefinedNumericVariable)->setValue(*Value);
2601  
2602        // Record this variable definition.
2603        GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] =
2604            *DefinedNumericVariable;
2605      } else {
2606        // String variable definition.
2607        std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
2608        StringRef CmdlineName = CmdlineNameVal.first;
2609        StringRef OrigCmdlineName = CmdlineName;
2610        Expected<Pattern::VariableProperties> ParseVarResult =
2611            Pattern::parseVariable(CmdlineName, SM);
2612        if (!ParseVarResult) {
2613          Errs = joinErrors(std::move(Errs), ParseVarResult.takeError());
2614          continue;
2615        }
2616        // Check that CmdlineName does not denote a pseudo variable is only
2617        // composed of the parsed numeric variable. This catches cases like
2618        // "FOO+2" in a "FOO+2=10" definition.
2619        if (ParseVarResult->IsPseudo || !CmdlineName.empty()) {
2620          Errs = joinErrors(std::move(Errs),
2621                            ErrorDiagnostic::get(
2622                                SM, OrigCmdlineName,
2623                                "invalid name in string variable definition '" +
2624                                    OrigCmdlineName + "'"));
2625          continue;
2626        }
2627        StringRef Name = ParseVarResult->Name;
2628  
2629        // Detect collisions between string and numeric variables when the former
2630        // is created later than the latter.
2631        if (GlobalNumericVariableTable.contains(Name)) {
2632          Errs = joinErrors(std::move(Errs),
2633                            ErrorDiagnostic::get(SM, Name,
2634                                                 "numeric variable with name '" +
2635                                                     Name + "' already exists"));
2636          continue;
2637        }
2638        GlobalVariableTable.insert(CmdlineNameVal);
2639        // Mark the string variable as defined to detect collisions between
2640        // string and numeric variables in defineCmdlineVariables when the latter
2641        // is created later than the former. We cannot reuse GlobalVariableTable
2642        // for this by populating it with an empty string since we would then
2643        // lose the ability to detect the use of an undefined variable in
2644        // match().
2645        DefinedVariableTable[Name] = true;
2646      }
2647    }
2648  
2649    return Errs;
2650  }
2651  
2652  void FileCheckPatternContext::clearLocalVars() {
2653    SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars;
2654    for (const StringMapEntry<StringRef> &Var : GlobalVariableTable)
2655      if (Var.first()[0] != '$')
2656        LocalPatternVars.push_back(Var.first());
2657  
2658    // Numeric substitution reads the value of a variable directly, not via
2659    // GlobalNumericVariableTable. Therefore, we clear local variables by
2660    // clearing their value which will lead to a numeric substitution failure. We
2661    // also mark the variable for removal from GlobalNumericVariableTable since
2662    // this is what defineCmdlineVariables checks to decide that no global
2663    // variable has been defined.
2664    for (const auto &Var : GlobalNumericVariableTable)
2665      if (Var.first()[0] != '$') {
2666        Var.getValue()->clearValue();
2667        LocalNumericVars.push_back(Var.first());
2668      }
2669  
2670    for (const auto &Var : LocalPatternVars)
2671      GlobalVariableTable.erase(Var);
2672    for (const auto &Var : LocalNumericVars)
2673      GlobalNumericVariableTable.erase(Var);
2674  }
2675  
2676  bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer,
2677                             std::vector<FileCheckDiag> *Diags) {
2678    bool ChecksFailed = false;
2679  
2680    unsigned i = 0, j = 0, e = CheckStrings->size();
2681    while (true) {
2682      StringRef CheckRegion;
2683      if (j == e) {
2684        CheckRegion = Buffer;
2685      } else {
2686        const FileCheckString &CheckLabelStr = (*CheckStrings)[j];
2687        if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
2688          ++j;
2689          continue;
2690        }
2691  
2692        // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
2693        size_t MatchLabelLen = 0;
2694        size_t MatchLabelPos =
2695            CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags);
2696        if (MatchLabelPos == StringRef::npos)
2697          // Immediately bail if CHECK-LABEL fails, nothing else we can do.
2698          return false;
2699  
2700        CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
2701        Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
2702        ++j;
2703      }
2704  
2705      // Do not clear the first region as it's the one before the first
2706      // CHECK-LABEL and it would clear variables defined on the command-line
2707      // before they get used.
2708      if (i != 0 && Req.EnableVarScope)
2709        PatternContext->clearLocalVars();
2710  
2711      for (; i != j; ++i) {
2712        const FileCheckString &CheckStr = (*CheckStrings)[i];
2713  
2714        // Check each string within the scanned region, including a second check
2715        // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
2716        size_t MatchLen = 0;
2717        size_t MatchPos =
2718            CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags);
2719  
2720        if (MatchPos == StringRef::npos) {
2721          ChecksFailed = true;
2722          i = j;
2723          break;
2724        }
2725  
2726        CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
2727      }
2728  
2729      if (j == e)
2730        break;
2731    }
2732  
2733    // Success if no checks failed.
2734    return !ChecksFailed;
2735  }
2736