xref: /freebsd/contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the private interfaces of FileCheck. Its purpose is to
10 // allow unit testing of FileCheck and to separate the interface from the
11 // implementation. It is only meant to be used by FileCheck.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_FILECHECK_FILECHECKIMPL_H
16 #define LLVM_LIB_FILECHECK_FILECHECKIMPL_H
17 
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/FileCheck/FileCheck.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/SourceMgr.h"
25 #include <map>
26 #include <optional>
27 #include <string>
28 #include <vector>
29 
30 namespace llvm {
31 
32 //===----------------------------------------------------------------------===//
33 // Numeric substitution handling code.
34 //===----------------------------------------------------------------------===//
35 
36 /// Type representing the format an expression value should be textualized into
37 /// for matching. Used to represent both explicit format specifiers as well as
38 /// implicit format from using numeric variables.
39 struct ExpressionFormat {
40   enum class Kind {
41     /// Denote absence of format. Used for implicit format of literals and
42     /// empty expressions.
43     NoFormat,
44     /// Value is an unsigned integer and should be printed as a decimal number.
45     Unsigned,
46     /// Value is a signed integer and should be printed as a decimal number.
47     Signed,
48     /// Value should be printed as an uppercase hex number.
49     HexUpper,
50     /// Value should be printed as a lowercase hex number.
51     HexLower
52   };
53 
54 private:
55   Kind Value = Kind::NoFormat;
56   unsigned Precision = 0;
57   /// printf-like "alternate form" selected.
58   bool AlternateForm = false;
59 
60 public:
61   /// Evaluates a format to true if it can be used in a match.
62   explicit operator bool() const { return Value != Kind::NoFormat; }
63 
64   /// Define format equality: formats are equal if neither is NoFormat and
65   /// their kinds and precision are the same.
66   bool operator==(const ExpressionFormat &Other) const {
67     return Value != Kind::NoFormat && Value == Other.Value &&
68            Precision == Other.Precision && AlternateForm == Other.AlternateForm;
69   }
70 
71   bool operator!=(const ExpressionFormat &Other) const {
72     return !(*this == Other);
73   }
74 
75   bool operator==(Kind OtherValue) const { return Value == OtherValue; }
76 
77   bool operator!=(Kind OtherValue) const { return !(*this == OtherValue); }
78 
79   /// \returns the format specifier corresponding to this format as a string.
80   StringRef toString() const;
81 
82   ExpressionFormat() = default;
83   explicit ExpressionFormat(Kind Value) : Value(Value), Precision(0){};
84   explicit ExpressionFormat(Kind Value, unsigned Precision)
85       : Value(Value), Precision(Precision){};
86   explicit ExpressionFormat(Kind Value, unsigned Precision, bool AlternateForm)
87       : Value(Value), Precision(Precision), AlternateForm(AlternateForm){};
88 
89   /// \returns a wildcard regular expression string that matches any value in
90   /// the format represented by this instance and no other value, or an error
91   /// if the format is NoFormat.
92   LLVM_ABI_FOR_TEST Expected<std::string> getWildcardRegex() const;
93 
94   /// \returns the string representation of \p Value in the format represented
95   /// by this instance, or an error if conversion to this format failed or the
96   /// format is NoFormat.
97   LLVM_ABI_FOR_TEST Expected<std::string> getMatchingString(APInt Value) const;
98 
99   /// \returns the value corresponding to string representation \p StrVal
100   /// according to the matching format represented by this instance.
101   LLVM_ABI_FOR_TEST APInt valueFromStringRepr(StringRef StrVal,
102                                               const SourceMgr &SM) const;
103 };
104 
105 /// Class to represent an overflow error that might result when manipulating a
106 /// value.
107 class OverflowError : public ErrorInfo<OverflowError> {
108 public:
109   LLVM_ABI_FOR_TEST static char ID;
110 
111   std::error_code convertToErrorCode() const override {
112     return std::make_error_code(std::errc::value_too_large);
113   }
114 
115   void log(raw_ostream &OS) const override { OS << "overflow error"; }
116 };
117 
118 /// Performs operation and \returns its result or an error in case of failure,
119 /// such as if an overflow occurs.
120 LLVM_ABI_FOR_TEST Expected<APInt> exprAdd(const APInt &Lhs, const APInt &Rhs,
121                                           bool &Overflow);
122 LLVM_ABI_FOR_TEST Expected<APInt> exprSub(const APInt &Lhs, const APInt &Rhs,
123                                           bool &Overflow);
124 LLVM_ABI_FOR_TEST Expected<APInt> exprMul(const APInt &Lhs, const APInt &Rhs,
125                                           bool &Overflow);
126 LLVM_ABI_FOR_TEST Expected<APInt> exprDiv(const APInt &Lhs, const APInt &Rhs,
127                                           bool &Overflow);
128 Expected<APInt> exprMax(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
129 Expected<APInt> exprMin(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
130 
131 /// Base class representing the AST of a given expression.
132 class ExpressionAST {
133 private:
134   StringRef ExpressionStr;
135 
136 public:
137   ExpressionAST(StringRef ExpressionStr) : ExpressionStr(ExpressionStr) {}
138 
139   virtual ~ExpressionAST() = default;
140 
141   StringRef getExpressionStr() const { return ExpressionStr; }
142 
143   /// Evaluates and \returns the value of the expression represented by this
144   /// AST or an error if evaluation fails.
145   virtual Expected<APInt> eval() const = 0;
146 
147   /// \returns either the implicit format of this AST, a diagnostic against
148   /// \p SM if implicit formats of the AST's components conflict, or NoFormat
149   /// if the AST has no implicit format (e.g. AST is made up of a single
150   /// literal).
151   virtual Expected<ExpressionFormat>
152   getImplicitFormat(const SourceMgr &SM) const {
153     return ExpressionFormat();
154   }
155 };
156 
157 /// Class representing an unsigned literal in the AST of an expression.
158 class ExpressionLiteral : public ExpressionAST {
159 private:
160   /// Actual value of the literal.
161   APInt Value;
162 
163 public:
164   explicit ExpressionLiteral(StringRef ExpressionStr, APInt Val)
165       : ExpressionAST(ExpressionStr), Value(Val) {}
166 
167   /// \returns the literal's value.
168   Expected<APInt> eval() const override { return Value; }
169 };
170 
171 /// Class to represent an undefined variable error, which quotes that
172 /// variable's name when printed.
173 class UndefVarError : public ErrorInfo<UndefVarError> {
174 private:
175   StringRef VarName;
176 
177 public:
178   LLVM_ABI_FOR_TEST static char ID;
179 
180   UndefVarError(StringRef VarName) : VarName(VarName) {}
181 
182   StringRef getVarName() const { return VarName; }
183 
184   std::error_code convertToErrorCode() const override {
185     return inconvertibleErrorCode();
186   }
187 
188   /// Print name of variable associated with this error.
189   void log(raw_ostream &OS) const override {
190     OS << "undefined variable: " << VarName;
191   }
192 };
193 
194 /// Class representing an expression and its matching format.
195 class Expression {
196 private:
197   /// Pointer to AST of the expression.
198   std::unique_ptr<ExpressionAST> AST;
199 
200   /// Format to use (e.g. hex upper case letters) when matching the value.
201   ExpressionFormat Format;
202 
203 public:
204   /// Generic constructor for an expression represented by the given \p AST and
205   /// whose matching format is \p Format.
206   Expression(std::unique_ptr<ExpressionAST> AST, ExpressionFormat Format)
207       : AST(std::move(AST)), Format(Format) {}
208 
209   /// \returns pointer to AST of the expression. Pointer is guaranteed to be
210   /// valid as long as this object is.
211   ExpressionAST *getAST() const { return AST.get(); }
212 
213   ExpressionFormat getFormat() const { return Format; }
214 };
215 
216 /// Class representing a numeric variable and its associated current value.
217 class NumericVariable {
218 private:
219   /// Name of the numeric variable.
220   StringRef Name;
221 
222   /// Format to use for expressions using this variable without an explicit
223   /// format.
224   ExpressionFormat ImplicitFormat;
225 
226   /// Value of numeric variable, if defined, or std::nullopt otherwise.
227   std::optional<APInt> Value;
228 
229   /// The input buffer's string from which Value was parsed, or std::nullopt.
230   /// See comments on getStringValue for a discussion of the std::nullopt case.
231   std::optional<StringRef> StrValue;
232 
233   /// Line number where this variable is defined, or std::nullopt if defined
234   /// before input is parsed. Used to determine whether a variable is defined on
235   /// the same line as a given use.
236   std::optional<size_t> DefLineNumber;
237 
238 public:
239   /// Constructor for a variable \p Name with implicit format \p ImplicitFormat
240   /// defined at line \p DefLineNumber or defined before input is parsed if
241   /// \p DefLineNumber is std::nullopt.
242   explicit NumericVariable(StringRef Name, ExpressionFormat ImplicitFormat,
243                            std::optional<size_t> DefLineNumber = std::nullopt)
244       : Name(Name), ImplicitFormat(ImplicitFormat),
245         DefLineNumber(DefLineNumber) {}
246 
247   /// \returns name of this numeric variable.
248   StringRef getName() const { return Name; }
249 
250   /// \returns implicit format of this numeric variable.
251   ExpressionFormat getImplicitFormat() const { return ImplicitFormat; }
252 
253   /// \returns this variable's value.
254   std::optional<APInt> getValue() const { return Value; }
255 
256   /// \returns the input buffer's string from which this variable's value was
257   /// parsed, or std::nullopt if the value is not yet defined or was not parsed
258   /// from the input buffer.  For example, the value of @LINE is not parsed from
259   /// the input buffer, and some numeric variables are parsed from the command
260   /// line instead.
261   std::optional<StringRef> getStringValue() const { return StrValue; }
262 
263   /// Sets value of this numeric variable to \p NewValue, and sets the input
264   /// buffer string from which it was parsed to \p NewStrValue.  See comments on
265   /// getStringValue for a discussion of when the latter can be std::nullopt.
266   void setValue(APInt NewValue,
267                 std::optional<StringRef> NewStrValue = std::nullopt) {
268     Value = NewValue;
269     StrValue = NewStrValue;
270   }
271 
272   /// Clears value of this numeric variable, regardless of whether it is
273   /// currently defined or not.
274   void clearValue() {
275     Value = std::nullopt;
276     StrValue = std::nullopt;
277   }
278 
279   /// \returns the line number where this variable is defined, if any, or
280   /// std::nullopt if defined before input is parsed.
281   std::optional<size_t> getDefLineNumber() const { return DefLineNumber; }
282 };
283 
284 /// Class representing the use of a numeric variable in the AST of an
285 /// expression.
286 class LLVM_ABI_FOR_TEST NumericVariableUse : public ExpressionAST {
287 private:
288   /// Pointer to the class instance for the variable this use is about.
289   NumericVariable *Variable;
290 
291 public:
292   NumericVariableUse(StringRef Name, NumericVariable *Variable)
293       : ExpressionAST(Name), Variable(Variable) {}
294   /// \returns the value of the variable referenced by this instance.
295   Expected<APInt> eval() const override;
296 
297   /// \returns implicit format of this numeric variable.
298   Expected<ExpressionFormat>
299   getImplicitFormat(const SourceMgr &SM) const override {
300     return Variable->getImplicitFormat();
301   }
302 };
303 
304 /// Type of functions evaluating a given binary operation.
305 using binop_eval_t = Expected<APInt> (*)(const APInt &, const APInt &, bool &);
306 
307 /// Class representing a single binary operation in the AST of an expression.
308 class LLVM_ABI_FOR_TEST BinaryOperation : public ExpressionAST {
309 private:
310   /// Left operand.
311   std::unique_ptr<ExpressionAST> LeftOperand;
312 
313   /// Right operand.
314   std::unique_ptr<ExpressionAST> RightOperand;
315 
316   /// Pointer to function that can evaluate this binary operation.
317   binop_eval_t EvalBinop;
318 
319 public:
320   BinaryOperation(StringRef ExpressionStr, binop_eval_t EvalBinop,
321                   std::unique_ptr<ExpressionAST> LeftOp,
322                   std::unique_ptr<ExpressionAST> RightOp)
323       : ExpressionAST(ExpressionStr), EvalBinop(EvalBinop) {
324     LeftOperand = std::move(LeftOp);
325     RightOperand = std::move(RightOp);
326   }
327 
328   /// Evaluates the value of the binary operation represented by this AST,
329   /// using EvalBinop on the result of recursively evaluating the operands.
330   /// \returns the expression value or an error if an undefined numeric
331   /// variable is used in one of the operands.
332   Expected<APInt> eval() const override;
333 
334   /// \returns the implicit format of this AST, if any, a diagnostic against
335   /// \p SM if the implicit formats of the AST's components conflict, or no
336   /// format if the AST has no implicit format (e.g. AST is made of a single
337   /// literal).
338   Expected<ExpressionFormat>
339   getImplicitFormat(const SourceMgr &SM) const override;
340 };
341 
342 class FileCheckPatternContext;
343 
344 /// Class representing a substitution to perform in the RegExStr string.
345 class Substitution {
346 protected:
347   /// Pointer to a class instance holding, among other things, the table with
348   /// the values of live string variables at the start of any given CHECK line.
349   /// Used for substituting string variables with the text they were defined
350   /// as. Expressions are linked to the numeric variables they use at
351   /// parse time and directly access the value of the numeric variable to
352   /// evaluate their value.
353   FileCheckPatternContext *Context;
354 
355   /// The string that needs to be substituted for something else. For a
356   /// string variable this is its name, otherwise this is the whole expression.
357   StringRef FromStr;
358 
359   // Index in RegExStr of where to do the substitution.
360   size_t InsertIdx;
361 
362 public:
363   Substitution(FileCheckPatternContext *Context, StringRef VarName,
364                size_t InsertIdx)
365       : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {}
366 
367   virtual ~Substitution() = default;
368 
369   /// \returns the string to be substituted for something else.
370   StringRef getFromString() const { return FromStr; }
371 
372   /// \returns the index where the substitution is to be performed in RegExStr.
373   size_t getIndex() const { return InsertIdx; }
374 
375   /// \returns a regular expression string that matches the result of the
376   /// substitution represented by this class instance or an error if
377   /// substitution failed.
378   virtual Expected<std::string> getResultRegex() const = 0;
379 
380   /// \returns a string containing the result of the substitution represented
381   /// by this class instance in a form suitable for diagnostics, or an error if
382   /// substitution failed.
383   virtual Expected<std::string> getResultForDiagnostics() const = 0;
384 };
385 
386 class LLVM_ABI_FOR_TEST StringSubstitution : public Substitution {
387 public:
388   StringSubstitution(FileCheckPatternContext *Context, StringRef VarName,
389                      size_t InsertIdx)
390       : Substitution(Context, VarName, InsertIdx) {}
391 
392   /// \returns the text that the string variable in this substitution matched
393   /// when defined, or an error if the variable is undefined.
394   Expected<std::string> getResultRegex() const override;
395 
396   /// \returns the text that the string variable in this substitution matched
397   /// when defined, in a form suitable for diagnostics, or an error if the
398   /// variable is undefined.
399   Expected<std::string> getResultForDiagnostics() const override;
400 };
401 
402 class LLVM_ABI_FOR_TEST NumericSubstitution : public Substitution {
403 private:
404   /// Pointer to the class representing the expression whose value is to be
405   /// substituted.
406   std::unique_ptr<Expression> ExpressionPointer;
407 
408 public:
409   NumericSubstitution(FileCheckPatternContext *Context, StringRef ExpressionStr,
410                       std::unique_ptr<Expression> ExpressionPointer,
411                       size_t InsertIdx)
412       : Substitution(Context, ExpressionStr, InsertIdx),
413         ExpressionPointer(std::move(ExpressionPointer)) {}
414 
415   /// \returns a string containing the result of evaluating the expression in
416   /// this substitution, or an error if evaluation failed.
417   Expected<std::string> getResultRegex() const override;
418 
419   /// \returns a string containing the result of evaluating the expression in
420   /// this substitution, in a form suitable for diagnostics, or an error if
421   /// evaluation failed.
422   Expected<std::string> getResultForDiagnostics() const override;
423 };
424 
425 //===----------------------------------------------------------------------===//
426 // Pattern handling code.
427 //===----------------------------------------------------------------------===//
428 
429 /// Class holding the Pattern global state, shared by all patterns: tables
430 /// holding values of variables and whether they are defined or not at any
431 /// given time in the matching process.
432 class FileCheckPatternContext {
433   friend class Pattern;
434 
435 private:
436   /// When matching a given pattern, this holds the value of all the string
437   /// variables defined in previous patterns. In a pattern, only the last
438   /// definition for a given variable is recorded in this table.
439   /// Back-references are used for uses after any the other definition.
440   StringMap<StringRef> GlobalVariableTable;
441 
442   /// Map of all string variables defined so far. Used at parse time to detect
443   /// a name conflict between a numeric variable and a string variable when
444   /// the former is defined on a later line than the latter.
445   StringMap<bool> DefinedVariableTable;
446 
447   /// When matching a given pattern, this holds the pointers to the classes
448   /// representing the numeric variables defined in previous patterns. When
449   /// matching a pattern all definitions for that pattern are recorded in the
450   /// NumericVariableDefs table in the Pattern instance of that pattern.
451   StringMap<NumericVariable *> GlobalNumericVariableTable;
452 
453   /// Pointer to the class instance representing the @LINE pseudo variable for
454   /// easily updating its value.
455   NumericVariable *LineVariable = nullptr;
456 
457   /// Vector holding pointers to all parsed numeric variables. Used to
458   /// automatically free them once they are guaranteed to no longer be used.
459   std::vector<std::unique_ptr<NumericVariable>> NumericVariables;
460 
461   /// Vector holding pointers to all parsed expressions. Used to automatically
462   /// free the expressions once they are guaranteed to no longer be used.
463   std::vector<std::unique_ptr<Expression>> Expressions;
464 
465   /// Vector holding pointers to all substitutions. Used to automatically free
466   /// them once they are guaranteed to no longer be used.
467   std::vector<std::unique_ptr<Substitution>> Substitutions;
468 
469 public:
470   /// \returns the value of string variable \p VarName or an error if no such
471   /// variable has been defined.
472   LLVM_ABI_FOR_TEST Expected<StringRef> getPatternVarValue(StringRef VarName);
473 
474   /// Defines string and numeric variables from definitions given on the
475   /// command line, passed as a vector of [#]VAR=VAL strings in
476   /// \p CmdlineDefines. \returns an error list containing diagnostics against
477   /// \p SM for all definition parsing failures, if any, or Success otherwise.
478   LLVM_ABI_FOR_TEST Error
479   defineCmdlineVariables(ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM);
480 
481   /// Create @LINE pseudo variable. Value is set when pattern are being
482   /// matched.
483   LLVM_ABI_FOR_TEST void createLineVariable();
484 
485   /// Undefines local variables (variables whose name does not start with a '$'
486   /// sign), i.e. removes them from GlobalVariableTable and from
487   /// GlobalNumericVariableTable and also clears the value of numeric
488   /// variables.
489   LLVM_ABI_FOR_TEST void clearLocalVars();
490 
491 private:
492   /// Makes a new numeric variable and registers it for destruction when the
493   /// context is destroyed.
494   template <class... Types> NumericVariable *makeNumericVariable(Types... args);
495 
496   /// Makes a new string substitution and registers it for destruction when the
497   /// context is destroyed.
498   Substitution *makeStringSubstitution(StringRef VarName, size_t InsertIdx);
499 
500   /// Makes a new numeric substitution and registers it for destruction when
501   /// the context is destroyed.
502   Substitution *makeNumericSubstitution(StringRef ExpressionStr,
503                                         std::unique_ptr<Expression> Expression,
504                                         size_t InsertIdx);
505 };
506 
507 /// Class to represent an error holding a diagnostic with location information
508 /// used when printing it.
509 class ErrorDiagnostic : public ErrorInfo<ErrorDiagnostic> {
510 private:
511   SMDiagnostic Diagnostic;
512   SMRange Range;
513 
514 public:
515   LLVM_ABI_FOR_TEST static char ID;
516 
517   ErrorDiagnostic(SMDiagnostic &&Diag, SMRange Range)
518       : Diagnostic(Diag), Range(Range) {}
519 
520   std::error_code convertToErrorCode() const override {
521     return inconvertibleErrorCode();
522   }
523 
524   /// Print diagnostic associated with this error when printing the error.
525   void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); }
526 
527   StringRef getMessage() const { return Diagnostic.getMessage(); }
528   SMRange getRange() const { return Range; }
529 
530   static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg,
531                    SMRange Range = std::nullopt) {
532     return make_error<ErrorDiagnostic>(
533         SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg), Range);
534   }
535 
536   static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) {
537     SMLoc Start = SMLoc::getFromPointer(Buffer.data());
538     SMLoc End = SMLoc::getFromPointer(Buffer.data() + Buffer.size());
539     return get(SM, Start, ErrMsg, SMRange(Start, End));
540   }
541 };
542 
543 class NotFoundError : public ErrorInfo<NotFoundError> {
544 public:
545   LLVM_ABI_FOR_TEST static char ID;
546 
547   std::error_code convertToErrorCode() const override {
548     return inconvertibleErrorCode();
549   }
550 
551   /// Print diagnostic associated with this error when printing the error.
552   void log(raw_ostream &OS) const override {
553     OS << "String not found in input";
554   }
555 };
556 
557 /// An error that has already been reported.
558 ///
559 /// This class is designed to support a function whose callers may need to know
560 /// whether the function encountered and reported an error but never need to
561 /// know the nature of that error.  For example, the function has a return type
562 /// of \c Error and always returns either \c ErrorReported or \c ErrorSuccess.
563 /// That interface is similar to that of a function returning bool to indicate
564 /// an error except, in the former case, (1) there is no confusion over polarity
565 /// and (2) the caller must either check the result or explicitly ignore it with
566 /// a call like \c consumeError.
567 class ErrorReported final : public ErrorInfo<ErrorReported> {
568 public:
569   static char ID;
570 
571   std::error_code convertToErrorCode() const override {
572     return inconvertibleErrorCode();
573   }
574 
575   /// Print diagnostic associated with this error when printing the error.
576   void log(raw_ostream &OS) const override {
577     OS << "error previously reported";
578   }
579 
580   static inline Error reportedOrSuccess(bool HasErrorReported) {
581     if (HasErrorReported)
582       return make_error<ErrorReported>();
583     return Error::success();
584   }
585 };
586 
587 class Pattern {
588   SMLoc PatternLoc;
589 
590   /// A fixed string to match as the pattern or empty if this pattern requires
591   /// a regex match.
592   StringRef FixedStr;
593 
594   /// A regex string to match as the pattern or empty if this pattern requires
595   /// a fixed string to match.
596   std::string RegExStr;
597 
598   /// Entries in this vector represent a substitution of a string variable or
599   /// an expression in the RegExStr regex at match time. For example, in the
600   /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]",
601   /// RegExStr will contain "foobaz" and we'll get two entries in this vector
602   /// that tells us to insert the value of string variable "bar" at offset 3
603   /// and the value of expression "N+1" at offset 6.
604   std::vector<Substitution *> Substitutions;
605 
606   /// Maps names of string variables defined in a pattern to the number of
607   /// their parenthesis group in RegExStr capturing their last definition.
608   ///
609   /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])",
610   /// RegExStr will be "foo(.*)baz(\1<quux value>(.*))" where <quux value> is
611   /// the value captured for QUUX on the earlier line where it was defined, and
612   /// VariableDefs will map "bar" to the third parenthesis group which captures
613   /// the second definition of "bar".
614   ///
615   /// Note: uses std::map rather than StringMap to be able to get the key when
616   /// iterating over values.
617   std::map<StringRef, unsigned> VariableDefs;
618 
619   /// Structure representing the definition of a numeric variable in a pattern.
620   /// It holds the pointer to the class instance holding the value and matching
621   /// format of the numeric variable whose value is being defined and the
622   /// number of the parenthesis group in RegExStr to capture that value.
623   struct NumericVariableMatch {
624     /// Pointer to class instance holding the value and matching format of the
625     /// numeric variable being defined.
626     NumericVariable *DefinedNumericVariable;
627 
628     /// Number of the parenthesis group in RegExStr that captures the value of
629     /// this numeric variable definition.
630     unsigned CaptureParenGroup;
631   };
632 
633   /// Holds the number of the parenthesis group in RegExStr and pointer to the
634   /// corresponding NumericVariable class instance of all numeric variable
635   /// definitions. Used to set the matched value of all those variables.
636   StringMap<NumericVariableMatch> NumericVariableDefs;
637 
638   /// Pointer to a class instance holding the global state shared by all
639   /// patterns:
640   /// - separate tables with the values of live string and numeric variables
641   ///   respectively at the start of any given CHECK line;
642   /// - table holding whether a string variable has been defined at any given
643   ///   point during the parsing phase.
644   FileCheckPatternContext *Context;
645 
646   Check::FileCheckType CheckTy;
647 
648   /// Line number for this CHECK pattern or std::nullopt if it is an implicit
649   /// pattern. Used to determine whether a variable definition is made on an
650   /// earlier line to the one with this CHECK.
651   std::optional<size_t> LineNumber;
652 
653   /// Ignore case while matching if set to true.
654   bool IgnoreCase = false;
655 
656 public:
657   Pattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
658           std::optional<size_t> Line = std::nullopt)
659       : Context(Context), CheckTy(Ty), LineNumber(Line) {}
660 
661   /// \returns the location in source code.
662   SMLoc getLoc() const { return PatternLoc; }
663 
664   /// \returns the pointer to the global state for all patterns in this
665   /// FileCheck instance.
666   FileCheckPatternContext *getContext() const { return Context; }
667 
668   /// \returns whether \p C is a valid first character for a variable name.
669   LLVM_ABI_FOR_TEST static bool isValidVarNameStart(char C);
670 
671   /// Parsing information about a variable.
672   struct VariableProperties {
673     StringRef Name;
674     bool IsPseudo;
675   };
676 
677   /// Parses the string at the start of \p Str for a variable name. \returns
678   /// a VariableProperties structure holding the variable name and whether it
679   /// is the name of a pseudo variable, or an error holding a diagnostic
680   /// against \p SM if parsing fail. If parsing was successful, also strips
681   /// \p Str from the variable name.
682   LLVM_ABI_FOR_TEST static Expected<VariableProperties>
683   parseVariable(StringRef &Str, const SourceMgr &SM);
684   /// Parses \p Expr for a numeric substitution block at line \p LineNumber,
685   /// or before input is parsed if \p LineNumber is None. Parameter
686   /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
687   /// expression and \p Context points to the class instance holding the live
688   /// string and numeric variables. \returns a pointer to the class instance
689   /// representing the expression whose value must be substitued, or an error
690   /// holding a diagnostic against \p SM if parsing fails. If substitution was
691   /// successful, sets \p DefinedNumericVariable to point to the class
692   /// representing the numeric variable defined in this numeric substitution
693   /// block, or std::nullopt if this block does not define any variable.
694   LLVM_ABI_FOR_TEST static Expected<std::unique_ptr<Expression>>
695   parseNumericSubstitutionBlock(
696       StringRef Expr, std::optional<NumericVariable *> &DefinedNumericVariable,
697       bool IsLegacyLineExpr, std::optional<size_t> LineNumber,
698       FileCheckPatternContext *Context, const SourceMgr &SM);
699   /// Parses the pattern in \p PatternStr and initializes this Pattern instance
700   /// accordingly.
701   ///
702   /// \p Prefix provides which prefix is being matched, \p Req describes the
703   /// global options that influence the parsing such as whitespace
704   /// canonicalization, \p SM provides the SourceMgr used for error reports.
705   /// \returns true in case of an error, false otherwise.
706   LLVM_ABI_FOR_TEST bool parsePattern(StringRef PatternStr, StringRef Prefix,
707                                       SourceMgr &SM,
708                                       const FileCheckRequest &Req);
709   struct Match {
710     size_t Pos;
711     size_t Len;
712   };
713   struct MatchResult {
714     std::optional<Match> TheMatch;
715     Error TheError;
716     MatchResult(size_t MatchPos, size_t MatchLen, Error E)
717         : TheMatch(Match{MatchPos, MatchLen}), TheError(std::move(E)) {}
718     MatchResult(Match M, Error E) : TheMatch(M), TheError(std::move(E)) {}
719     MatchResult(Error E) : TheError(std::move(E)) {}
720   };
721   /// Matches the pattern string against the input buffer \p Buffer.
722   ///
723   /// \returns either (1) an error resulting in no match or (2) a match possibly
724   /// with an error encountered while processing the match.
725   ///
726   /// The GlobalVariableTable StringMap in the FileCheckPatternContext class
727   /// instance provides the current values of FileCheck string variables and is
728   /// updated if this match defines new values. Likewise, the
729   /// GlobalNumericVariableTable StringMap in the same class provides the
730   /// current values of FileCheck numeric variables and is updated if this
731   /// match defines new numeric values.
732   LLVM_ABI_FOR_TEST MatchResult match(StringRef Buffer,
733                                       const SourceMgr &SM) const;
734   /// Prints the value of successful substitutions.
735   void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
736                           SMRange MatchRange, FileCheckDiag::MatchType MatchTy,
737                           std::vector<FileCheckDiag> *Diags) const;
738   void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
739                        std::vector<FileCheckDiag> *Diags) const;
740 
741   bool hasVariable() const {
742     return !(Substitutions.empty() && VariableDefs.empty());
743   }
744   LLVM_ABI_FOR_TEST void
745   printVariableDefs(const SourceMgr &SM, FileCheckDiag::MatchType MatchTy,
746                     std::vector<FileCheckDiag> *Diags) const;
747 
748   Check::FileCheckType getCheckTy() const { return CheckTy; }
749 
750   int getCount() const { return CheckTy.getCount(); }
751 
752 private:
753   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
754   void AddBackrefToRegEx(unsigned BackrefNum);
755   /// Computes an arbitrary estimate for the quality of matching this pattern
756   /// at the start of \p Buffer; a distance of zero should correspond to a
757   /// perfect match.
758   unsigned computeMatchDistance(StringRef Buffer) const;
759   /// Finds the closing sequence of a regex variable usage or definition.
760   ///
761   /// \p Str has to point in the beginning of the definition (right after the
762   /// opening sequence). \p SM holds the SourceMgr used for error reporting.
763   ///  \returns the offset of the closing sequence within Str, or npos if it
764   /// was not found.
765   static size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
766 
767   /// Parses \p Expr for the name of a numeric variable to be defined at line
768   /// \p LineNumber, or before input is parsed if \p LineNumber is None.
769   /// \returns a pointer to the class instance representing that variable,
770   /// creating it if needed, or an error holding a diagnostic against \p SM
771   /// should defining such a variable be invalid.
772   static Expected<NumericVariable *> parseNumericVariableDefinition(
773       StringRef &Expr, FileCheckPatternContext *Context,
774       std::optional<size_t> LineNumber, ExpressionFormat ImplicitFormat,
775       const SourceMgr &SM);
776   /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use
777   /// at line \p LineNumber, or before input is parsed if \p LineNumber is
778   /// None. Parameter \p Context points to the class instance holding the live
779   /// string and numeric variables. \returns the pointer to the class instance
780   /// representing that variable if successful, or an error holding a
781   /// diagnostic against \p SM otherwise.
782   static Expected<std::unique_ptr<NumericVariableUse>> parseNumericVariableUse(
783       StringRef Name, bool IsPseudo, std::optional<size_t> LineNumber,
784       FileCheckPatternContext *Context, const SourceMgr &SM);
785   enum class AllowedOperand { LineVar, LegacyLiteral, Any };
786   /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or
787   /// before input is parsed if \p LineNumber is None. Accepts literal values,
788   /// numeric variables and function calls, depending on the value of \p AO.
789   /// \p MaybeInvalidConstraint indicates whether the text being parsed could
790   /// be an invalid constraint. \p Context points to the class instance holding
791   /// the live string and numeric variables. \returns the class representing
792   /// that operand in the AST of the expression or an error holding a
793   /// diagnostic against \p SM otherwise. If \p Expr starts with a "(" this
794   /// function will attempt to parse a parenthesized expression.
795   static Expected<std::unique_ptr<ExpressionAST>>
796   parseNumericOperand(StringRef &Expr, AllowedOperand AO, bool ConstraintParsed,
797                       std::optional<size_t> LineNumber,
798                       FileCheckPatternContext *Context, const SourceMgr &SM);
799   /// Parses and updates \p RemainingExpr for a binary operation at line
800   /// \p LineNumber, or before input is parsed if \p LineNumber is None. The
801   /// left operand of this binary operation is given in \p LeftOp and \p Expr
802   /// holds the string for the full expression, including the left operand.
803   /// Parameter \p IsLegacyLineExpr indicates whether we are parsing a legacy
804   /// @LINE expression. Parameter \p Context points to the class instance
805   /// holding the live string and numeric variables. \returns the class
806   /// representing the binary operation in the AST of the expression, or an
807   /// error holding a diagnostic against \p SM otherwise.
808   static Expected<std::unique_ptr<ExpressionAST>>
809   parseBinop(StringRef Expr, StringRef &RemainingExpr,
810              std::unique_ptr<ExpressionAST> LeftOp, bool IsLegacyLineExpr,
811              std::optional<size_t> LineNumber, FileCheckPatternContext *Context,
812              const SourceMgr &SM);
813 
814   /// Parses a parenthesized expression inside \p Expr at line \p LineNumber, or
815   /// before input is parsed if \p LineNumber is None. \p Expr must start with
816   /// a '('. Accepts both literal values and numeric variables. Parameter \p
817   /// Context points to the class instance holding the live string and numeric
818   /// variables. \returns the class representing that operand in the AST of the
819   /// expression or an error holding a diagnostic against \p SM otherwise.
820   static Expected<std::unique_ptr<ExpressionAST>>
821   parseParenExpr(StringRef &Expr, std::optional<size_t> LineNumber,
822                  FileCheckPatternContext *Context, const SourceMgr &SM);
823 
824   /// Parses \p Expr for an argument list belonging to a call to function \p
825   /// FuncName at line \p LineNumber, or before input is parsed if \p LineNumber
826   /// is None. Parameter \p FuncLoc is the source location used for diagnostics.
827   /// Parameter \p Context points to the class instance holding the live string
828   /// and numeric variables. \returns the class representing that call in the
829   /// AST of the expression or an error holding a diagnostic against \p SM
830   /// otherwise.
831   static Expected<std::unique_ptr<ExpressionAST>>
832   parseCallExpr(StringRef &Expr, StringRef FuncName,
833                 std::optional<size_t> LineNumber,
834                 FileCheckPatternContext *Context, const SourceMgr &SM);
835 };
836 
837 //===----------------------------------------------------------------------===//
838 // Check Strings.
839 //===----------------------------------------------------------------------===//
840 
841 /// A check that we found in the input file.
842 struct FileCheckString {
843   /// The pattern to match.
844   Pattern Pat;
845 
846   /// Which prefix name this check matched.
847   StringRef Prefix;
848 
849   /// The location in the match file that the check string was specified.
850   SMLoc Loc;
851 
852   /// Hold the information about the DAG/NOT strings in the program, which are
853   /// not explicitly stored otherwise. This allows for better and more accurate
854   /// diagnostic messages.
855   struct DagNotPrefixInfo {
856     Pattern DagNotPat;
857     StringRef DagNotPrefix;
858 
859     DagNotPrefixInfo(const Pattern &P, StringRef S)
860         : DagNotPat(P), DagNotPrefix(S) {}
861   };
862 
863   /// Hold the DAG/NOT strings occurring in the input file.
864   std::vector<DagNotPrefixInfo> DagNotStrings;
865 
866   FileCheckString(Pattern &&P, StringRef S, SMLoc L,
867                   std::vector<DagNotPrefixInfo> &&D)
868       : Pat(std::move(P)), Prefix(S), Loc(L), DagNotStrings(std::move(D)) {}
869 
870   /// Matches check string and its "not strings" and/or "dag strings".
871   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
872                size_t &MatchLen, FileCheckRequest &Req,
873                std::vector<FileCheckDiag> *Diags) const;
874 
875   /// Verifies that there is a single line in the given \p Buffer. Errors are
876   /// reported against \p SM.
877   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
878   /// Verifies that there is no newline in the given \p Buffer. Errors are
879   /// reported against \p SM.
880   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
881   /// Verifies that none of the strings in \p NotStrings are found in the given
882   /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in
883   /// \p Diags according to the verbosity level set in \p Req.
884   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
885                 const std::vector<const DagNotPrefixInfo *> &NotStrings,
886                 const FileCheckRequest &Req,
887                 std::vector<FileCheckDiag> *Diags) const;
888   /// Matches "dag strings" and their mixed "not strings".
889   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
890                   std::vector<const DagNotPrefixInfo *> &NotStrings,
891                   const FileCheckRequest &Req,
892                   std::vector<FileCheckDiag> *Diags) const;
893 };
894 
895 } // namespace llvm
896 
897 #endif
898