1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // FileCheck does a line-by line check of a file that validates whether it 10 // contains the expected content. This is useful for regression tests etc. 11 // 12 // This file implements most of the API that will be used by the FileCheck utility 13 // as well as various unittests. 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/FileCheck/FileCheck.h" 17 #include "FileCheckImpl.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/Support/CheckedArithmetic.h" 23 #include "llvm/Support/FormatVariadic.h" 24 #include <cstdint> 25 #include <list> 26 #include <set> 27 #include <tuple> 28 #include <utility> 29 30 using namespace llvm; 31 32 StringRef ExpressionFormat::toString() const { 33 switch (Value) { 34 case Kind::NoFormat: 35 return StringRef("<none>"); 36 case Kind::Unsigned: 37 return StringRef("%u"); 38 case Kind::Signed: 39 return StringRef("%d"); 40 case Kind::HexUpper: 41 return StringRef("%X"); 42 case Kind::HexLower: 43 return StringRef("%x"); 44 } 45 llvm_unreachable("unknown expression format"); 46 } 47 48 Expected<std::string> ExpressionFormat::getWildcardRegex() const { 49 StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef(); 50 51 auto CreatePrecisionRegex = [&](StringRef S) { 52 return (Twine(AlternateFormPrefix) + S + Twine('{') + Twine(Precision) + 53 "}") 54 .str(); 55 }; 56 57 switch (Value) { 58 case Kind::Unsigned: 59 if (Precision) 60 return CreatePrecisionRegex("([1-9][0-9]*)?[0-9]"); 61 return std::string("[0-9]+"); 62 case Kind::Signed: 63 if (Precision) 64 return CreatePrecisionRegex("-?([1-9][0-9]*)?[0-9]"); 65 return std::string("-?[0-9]+"); 66 case Kind::HexUpper: 67 if (Precision) 68 return CreatePrecisionRegex("([1-9A-F][0-9A-F]*)?[0-9A-F]"); 69 return (Twine(AlternateFormPrefix) + Twine("[0-9A-F]+")).str(); 70 case Kind::HexLower: 71 if (Precision) 72 return CreatePrecisionRegex("([1-9a-f][0-9a-f]*)?[0-9a-f]"); 73 return (Twine(AlternateFormPrefix) + Twine("[0-9a-f]+")).str(); 74 default: 75 return createStringError(std::errc::invalid_argument, 76 "trying to match value with invalid format"); 77 } 78 } 79 80 Expected<std::string> 81 ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const { 82 APInt IntValue = IntegerValue.getAPIntValue(); 83 // Error out for values that cannot be represented by the appropriate 64-bit 84 // integer (e.g. int64_t for a signed format) to keep the getter of 85 // ExpressionValue as an APInt an NFC. 86 if (Value == Kind::Signed) { 87 if (!IntValue.isSignedIntN(64)) 88 return make_error<OverflowError>(); 89 } else { 90 if (!IntValue.isIntN(64)) 91 return make_error<OverflowError>(); 92 } 93 94 unsigned Radix; 95 bool UpperCase = false; 96 SmallString<8> AbsoluteValueStr; 97 StringRef SignPrefix = IntValue.isNegative() ? "-" : ""; 98 switch (Value) { 99 case Kind::Unsigned: 100 case Kind::Signed: 101 Radix = 10; 102 break; 103 case Kind::HexUpper: 104 UpperCase = true; 105 Radix = 16; 106 break; 107 case Kind::HexLower: 108 Radix = 16; 109 UpperCase = false; 110 break; 111 default: 112 return createStringError(std::errc::invalid_argument, 113 "trying to match value with invalid format"); 114 } 115 IntValue.abs().toString(AbsoluteValueStr, Radix, /*Signed=*/false, 116 /*formatAsCLiteral=*/false, 117 /*UpperCase=*/UpperCase); 118 119 StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef(); 120 121 if (Precision > AbsoluteValueStr.size()) { 122 unsigned LeadingZeros = Precision - AbsoluteValueStr.size(); 123 return (Twine(SignPrefix) + Twine(AlternateFormPrefix) + 124 std::string(LeadingZeros, '0') + AbsoluteValueStr) 125 .str(); 126 } 127 128 return (Twine(SignPrefix) + Twine(AlternateFormPrefix) + AbsoluteValueStr) 129 .str(); 130 } 131 132 Expected<ExpressionValue> 133 ExpressionFormat::valueFromStringRepr(StringRef StrVal, 134 const SourceMgr &SM) const { 135 bool ValueIsSigned = Value == Kind::Signed; 136 // Both the FileCheck utility and library only call this method with a valid 137 // value in StrVal. This is guaranteed by the regex returned by 138 // getWildcardRegex() above. Only underflow and overflow errors can thus 139 // occur. However new uses of this method could be added in the future so 140 // the error message does not make assumptions about StrVal. 141 StringRef IntegerParseErrorStr = "unable to represent numeric value"; 142 if (ValueIsSigned) { 143 int64_t SignedValue; 144 145 if (StrVal.getAsInteger(10, SignedValue)) 146 return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr); 147 148 return ExpressionValue(SignedValue); 149 } 150 151 bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower; 152 uint64_t UnsignedValue; 153 bool MissingFormPrefix = AlternateForm && !StrVal.consume_front("0x"); 154 (void)MissingFormPrefix; 155 assert(!MissingFormPrefix && "missing alternate form prefix"); 156 if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue)) 157 return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr); 158 159 return ExpressionValue(UnsignedValue); 160 } 161 162 Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand, 163 const ExpressionValue &RightOperand) { 164 bool Overflow; 165 APInt Result = LeftOperand.getAPIntValue().sadd_ov( 166 RightOperand.getAPIntValue(), Overflow); 167 if (Overflow || 168 (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1))) 169 return make_error<OverflowError>(); 170 171 if (Result.isNegative()) 172 return ExpressionValue(Result.getSExtValue()); 173 else 174 return ExpressionValue(Result.getZExtValue()); 175 } 176 177 Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand, 178 const ExpressionValue &RightOperand) { 179 bool Overflow; 180 APInt Result = LeftOperand.getAPIntValue().ssub_ov( 181 RightOperand.getAPIntValue(), Overflow); 182 if (Overflow || 183 (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1))) 184 return make_error<OverflowError>(); 185 186 if (Result.isNegative()) 187 return ExpressionValue(Result.getSExtValue()); 188 else 189 return ExpressionValue(Result.getZExtValue()); 190 } 191 192 Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand, 193 const ExpressionValue &RightOperand) { 194 bool Overflow; 195 APInt Result = LeftOperand.getAPIntValue().smul_ov( 196 RightOperand.getAPIntValue(), Overflow); 197 if (Overflow || 198 (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1))) 199 return make_error<OverflowError>(); 200 201 if (Result.isNegative()) 202 return ExpressionValue(Result.getSExtValue()); 203 else 204 return ExpressionValue(Result.getZExtValue()); 205 } 206 207 Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand, 208 const ExpressionValue &RightOperand) { 209 // Check for division by zero. 210 if (RightOperand.getAPIntValue().isZero()) 211 return make_error<OverflowError>(); 212 213 bool Overflow; 214 APInt Result = LeftOperand.getAPIntValue().sdiv_ov( 215 RightOperand.getAPIntValue(), Overflow); 216 if (Overflow || 217 (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1))) 218 return make_error<OverflowError>(); 219 220 if (Result.isNegative()) 221 return ExpressionValue(Result.getSExtValue()); 222 else 223 return ExpressionValue(Result.getZExtValue()); 224 } 225 226 Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand, 227 const ExpressionValue &RightOperand) { 228 return LeftOperand.getAPIntValue().slt(RightOperand.getAPIntValue()) 229 ? RightOperand 230 : LeftOperand; 231 } 232 233 Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand, 234 const ExpressionValue &RightOperand) { 235 if (cantFail(max(LeftOperand, RightOperand)).getAPIntValue() == 236 LeftOperand.getAPIntValue()) 237 return RightOperand; 238 239 return LeftOperand; 240 } 241 242 Expected<ExpressionValue> NumericVariableUse::eval() const { 243 std::optional<ExpressionValue> Value = Variable->getValue(); 244 if (Value) 245 return *Value; 246 247 return make_error<UndefVarError>(getExpressionStr()); 248 } 249 250 Expected<ExpressionValue> BinaryOperation::eval() const { 251 Expected<ExpressionValue> LeftOp = LeftOperand->eval(); 252 Expected<ExpressionValue> RightOp = RightOperand->eval(); 253 254 // Bubble up any error (e.g. undefined variables) in the recursive 255 // evaluation. 256 if (!LeftOp || !RightOp) { 257 Error Err = Error::success(); 258 if (!LeftOp) 259 Err = joinErrors(std::move(Err), LeftOp.takeError()); 260 if (!RightOp) 261 Err = joinErrors(std::move(Err), RightOp.takeError()); 262 return std::move(Err); 263 } 264 265 return EvalBinop(*LeftOp, *RightOp); 266 } 267 268 Expected<ExpressionFormat> 269 BinaryOperation::getImplicitFormat(const SourceMgr &SM) const { 270 Expected<ExpressionFormat> LeftFormat = LeftOperand->getImplicitFormat(SM); 271 Expected<ExpressionFormat> RightFormat = RightOperand->getImplicitFormat(SM); 272 if (!LeftFormat || !RightFormat) { 273 Error Err = Error::success(); 274 if (!LeftFormat) 275 Err = joinErrors(std::move(Err), LeftFormat.takeError()); 276 if (!RightFormat) 277 Err = joinErrors(std::move(Err), RightFormat.takeError()); 278 return std::move(Err); 279 } 280 281 if (*LeftFormat != ExpressionFormat::Kind::NoFormat && 282 *RightFormat != ExpressionFormat::Kind::NoFormat && 283 *LeftFormat != *RightFormat) 284 return ErrorDiagnostic::get( 285 SM, getExpressionStr(), 286 "implicit format conflict between '" + LeftOperand->getExpressionStr() + 287 "' (" + LeftFormat->toString() + ") and '" + 288 RightOperand->getExpressionStr() + "' (" + RightFormat->toString() + 289 "), need an explicit format specifier"); 290 291 return *LeftFormat != ExpressionFormat::Kind::NoFormat ? *LeftFormat 292 : *RightFormat; 293 } 294 295 Expected<std::string> NumericSubstitution::getResult() const { 296 assert(ExpressionPointer->getAST() != nullptr && 297 "Substituting empty expression"); 298 Expected<ExpressionValue> EvaluatedValue = 299 ExpressionPointer->getAST()->eval(); 300 if (!EvaluatedValue) 301 return EvaluatedValue.takeError(); 302 ExpressionFormat Format = ExpressionPointer->getFormat(); 303 return Format.getMatchingString(*EvaluatedValue); 304 } 305 306 Expected<std::string> StringSubstitution::getResult() const { 307 // Look up the value and escape it so that we can put it into the regex. 308 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr); 309 if (!VarVal) 310 return VarVal.takeError(); 311 return Regex::escape(*VarVal); 312 } 313 314 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); } 315 316 Expected<Pattern::VariableProperties> 317 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) { 318 if (Str.empty()) 319 return ErrorDiagnostic::get(SM, Str, "empty variable name"); 320 321 size_t I = 0; 322 bool IsPseudo = Str[0] == '@'; 323 324 // Global vars start with '$'. 325 if (Str[0] == '$' || IsPseudo) 326 ++I; 327 328 if (!isValidVarNameStart(Str[I++])) 329 return ErrorDiagnostic::get(SM, Str, "invalid variable name"); 330 331 for (size_t E = Str.size(); I != E; ++I) 332 // Variable names are composed of alphanumeric characters and underscores. 333 if (Str[I] != '_' && !isAlnum(Str[I])) 334 break; 335 336 StringRef Name = Str.take_front(I); 337 Str = Str.substr(I); 338 return VariableProperties {Name, IsPseudo}; 339 } 340 341 // StringRef holding all characters considered as horizontal whitespaces by 342 // FileCheck input canonicalization. 343 constexpr StringLiteral SpaceChars = " \t"; 344 345 // Parsing helper function that strips the first character in S and returns it. 346 static char popFront(StringRef &S) { 347 char C = S.front(); 348 S = S.drop_front(); 349 return C; 350 } 351 352 char OverflowError::ID = 0; 353 char UndefVarError::ID = 0; 354 char ErrorDiagnostic::ID = 0; 355 char NotFoundError::ID = 0; 356 char ErrorReported::ID = 0; 357 358 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition( 359 StringRef &Expr, FileCheckPatternContext *Context, 360 std::optional<size_t> LineNumber, ExpressionFormat ImplicitFormat, 361 const SourceMgr &SM) { 362 Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM); 363 if (!ParseVarResult) 364 return ParseVarResult.takeError(); 365 StringRef Name = ParseVarResult->Name; 366 367 if (ParseVarResult->IsPseudo) 368 return ErrorDiagnostic::get( 369 SM, Name, "definition of pseudo numeric variable unsupported"); 370 371 // Detect collisions between string and numeric variables when the latter 372 // is created later than the former. 373 if (Context->DefinedVariableTable.contains(Name)) 374 return ErrorDiagnostic::get( 375 SM, Name, "string variable with name '" + Name + "' already exists"); 376 377 Expr = Expr.ltrim(SpaceChars); 378 if (!Expr.empty()) 379 return ErrorDiagnostic::get( 380 SM, Expr, "unexpected characters after numeric variable name"); 381 382 NumericVariable *DefinedNumericVariable; 383 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 384 if (VarTableIter != Context->GlobalNumericVariableTable.end()) { 385 DefinedNumericVariable = VarTableIter->second; 386 if (DefinedNumericVariable->getImplicitFormat() != ImplicitFormat) 387 return ErrorDiagnostic::get( 388 SM, Expr, "format different from previous variable definition"); 389 } else 390 DefinedNumericVariable = 391 Context->makeNumericVariable(Name, ImplicitFormat, LineNumber); 392 393 return DefinedNumericVariable; 394 } 395 396 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse( 397 StringRef Name, bool IsPseudo, std::optional<size_t> LineNumber, 398 FileCheckPatternContext *Context, const SourceMgr &SM) { 399 if (IsPseudo && !Name.equals("@LINE")) 400 return ErrorDiagnostic::get( 401 SM, Name, "invalid pseudo numeric variable '" + Name + "'"); 402 403 // Numeric variable definitions and uses are parsed in the order in which 404 // they appear in the CHECK patterns. For each definition, the pointer to the 405 // class instance of the corresponding numeric variable definition is stored 406 // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer 407 // we get below is null, it means no such variable was defined before. When 408 // that happens, we create a dummy variable so that parsing can continue. All 409 // uses of undefined variables, whether string or numeric, are then diagnosed 410 // in printNoMatch() after failing to match. 411 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 412 NumericVariable *NumericVariable; 413 if (VarTableIter != Context->GlobalNumericVariableTable.end()) 414 NumericVariable = VarTableIter->second; 415 else { 416 NumericVariable = Context->makeNumericVariable( 417 Name, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 418 Context->GlobalNumericVariableTable[Name] = NumericVariable; 419 } 420 421 std::optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber(); 422 if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) 423 return ErrorDiagnostic::get( 424 SM, Name, 425 "numeric variable '" + Name + 426 "' defined earlier in the same CHECK directive"); 427 428 return std::make_unique<NumericVariableUse>(Name, NumericVariable); 429 } 430 431 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand( 432 StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint, 433 std::optional<size_t> LineNumber, FileCheckPatternContext *Context, 434 const SourceMgr &SM) { 435 if (Expr.startswith("(")) { 436 if (AO != AllowedOperand::Any) 437 return ErrorDiagnostic::get( 438 SM, Expr, "parenthesized expression not permitted here"); 439 return parseParenExpr(Expr, LineNumber, Context, SM); 440 } 441 442 if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { 443 // Try to parse as a numeric variable use. 444 Expected<Pattern::VariableProperties> ParseVarResult = 445 parseVariable(Expr, SM); 446 if (ParseVarResult) { 447 // Try to parse a function call. 448 if (Expr.ltrim(SpaceChars).startswith("(")) { 449 if (AO != AllowedOperand::Any) 450 return ErrorDiagnostic::get(SM, ParseVarResult->Name, 451 "unexpected function call"); 452 453 return parseCallExpr(Expr, ParseVarResult->Name, LineNumber, Context, 454 SM); 455 } 456 457 return parseNumericVariableUse(ParseVarResult->Name, 458 ParseVarResult->IsPseudo, LineNumber, 459 Context, SM); 460 } 461 462 if (AO == AllowedOperand::LineVar) 463 return ParseVarResult.takeError(); 464 // Ignore the error and retry parsing as a literal. 465 consumeError(ParseVarResult.takeError()); 466 } 467 468 // Otherwise, parse it as a literal. 469 int64_t SignedLiteralValue; 470 uint64_t UnsignedLiteralValue; 471 StringRef SaveExpr = Expr; 472 // Accept both signed and unsigned literal, default to signed literal. 473 if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0, 474 UnsignedLiteralValue)) 475 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 476 UnsignedLiteralValue); 477 Expr = SaveExpr; 478 if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue)) 479 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 480 SignedLiteralValue); 481 482 return ErrorDiagnostic::get( 483 SM, Expr, 484 Twine("invalid ") + 485 (MaybeInvalidConstraint ? "matching constraint or " : "") + 486 "operand format"); 487 } 488 489 Expected<std::unique_ptr<ExpressionAST>> 490 Pattern::parseParenExpr(StringRef &Expr, std::optional<size_t> LineNumber, 491 FileCheckPatternContext *Context, const SourceMgr &SM) { 492 Expr = Expr.ltrim(SpaceChars); 493 assert(Expr.startswith("(")); 494 495 // Parse right operand. 496 Expr.consume_front("("); 497 Expr = Expr.ltrim(SpaceChars); 498 if (Expr.empty()) 499 return ErrorDiagnostic::get(SM, Expr, "missing operand in expression"); 500 501 // Note: parseNumericOperand handles nested opening parentheses. 502 Expected<std::unique_ptr<ExpressionAST>> SubExprResult = parseNumericOperand( 503 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 504 Context, SM); 505 Expr = Expr.ltrim(SpaceChars); 506 while (SubExprResult && !Expr.empty() && !Expr.startswith(")")) { 507 StringRef OrigExpr = Expr; 508 SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false, 509 LineNumber, Context, SM); 510 Expr = Expr.ltrim(SpaceChars); 511 } 512 if (!SubExprResult) 513 return SubExprResult; 514 515 if (!Expr.consume_front(")")) { 516 return ErrorDiagnostic::get(SM, Expr, 517 "missing ')' at end of nested expression"); 518 } 519 return SubExprResult; 520 } 521 522 Expected<std::unique_ptr<ExpressionAST>> 523 Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr, 524 std::unique_ptr<ExpressionAST> LeftOp, 525 bool IsLegacyLineExpr, std::optional<size_t> LineNumber, 526 FileCheckPatternContext *Context, const SourceMgr &SM) { 527 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 528 if (RemainingExpr.empty()) 529 return std::move(LeftOp); 530 531 // Check if this is a supported operation and select a function to perform 532 // it. 533 SMLoc OpLoc = SMLoc::getFromPointer(RemainingExpr.data()); 534 char Operator = popFront(RemainingExpr); 535 binop_eval_t EvalBinop; 536 switch (Operator) { 537 case '+': 538 EvalBinop = operator+; 539 break; 540 case '-': 541 EvalBinop = operator-; 542 break; 543 default: 544 return ErrorDiagnostic::get( 545 SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); 546 } 547 548 // Parse right operand. 549 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 550 if (RemainingExpr.empty()) 551 return ErrorDiagnostic::get(SM, RemainingExpr, 552 "missing operand in expression"); 553 // The second operand in a legacy @LINE expression is always a literal. 554 AllowedOperand AO = 555 IsLegacyLineExpr ? AllowedOperand::LegacyLiteral : AllowedOperand::Any; 556 Expected<std::unique_ptr<ExpressionAST>> RightOpResult = 557 parseNumericOperand(RemainingExpr, AO, /*MaybeInvalidConstraint=*/false, 558 LineNumber, Context, SM); 559 if (!RightOpResult) 560 return RightOpResult; 561 562 Expr = Expr.drop_back(RemainingExpr.size()); 563 return std::make_unique<BinaryOperation>(Expr, EvalBinop, std::move(LeftOp), 564 std::move(*RightOpResult)); 565 } 566 567 Expected<std::unique_ptr<ExpressionAST>> 568 Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName, 569 std::optional<size_t> LineNumber, 570 FileCheckPatternContext *Context, const SourceMgr &SM) { 571 Expr = Expr.ltrim(SpaceChars); 572 assert(Expr.startswith("(")); 573 574 auto OptFunc = StringSwitch<binop_eval_t>(FuncName) 575 .Case("add", operator+) 576 .Case("div", operator/) 577 .Case("max", max) 578 .Case("min", min) 579 .Case("mul", operator*) 580 .Case("sub", operator-) 581 .Default(nullptr); 582 583 if (!OptFunc) 584 return ErrorDiagnostic::get( 585 SM, FuncName, Twine("call to undefined function '") + FuncName + "'"); 586 587 Expr.consume_front("("); 588 Expr = Expr.ltrim(SpaceChars); 589 590 // Parse call arguments, which are comma separated. 591 SmallVector<std::unique_ptr<ExpressionAST>, 4> Args; 592 while (!Expr.empty() && !Expr.startswith(")")) { 593 if (Expr.startswith(",")) 594 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 595 596 // Parse the argument, which is an arbitary expression. 597 StringRef OuterBinOpExpr = Expr; 598 Expected<std::unique_ptr<ExpressionAST>> Arg = parseNumericOperand( 599 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 600 Context, SM); 601 while (Arg && !Expr.empty()) { 602 Expr = Expr.ltrim(SpaceChars); 603 // Have we reached an argument terminator? 604 if (Expr.startswith(",") || Expr.startswith(")")) 605 break; 606 607 // Arg = Arg <op> <expr> 608 Arg = parseBinop(OuterBinOpExpr, Expr, std::move(*Arg), false, LineNumber, 609 Context, SM); 610 } 611 612 // Prefer an expression error over a generic invalid argument message. 613 if (!Arg) 614 return Arg.takeError(); 615 Args.push_back(std::move(*Arg)); 616 617 // Have we parsed all available arguments? 618 Expr = Expr.ltrim(SpaceChars); 619 if (!Expr.consume_front(",")) 620 break; 621 622 Expr = Expr.ltrim(SpaceChars); 623 if (Expr.startswith(")")) 624 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 625 } 626 627 if (!Expr.consume_front(")")) 628 return ErrorDiagnostic::get(SM, Expr, 629 "missing ')' at end of call expression"); 630 631 const unsigned NumArgs = Args.size(); 632 if (NumArgs == 2) 633 return std::make_unique<BinaryOperation>(Expr, *OptFunc, std::move(Args[0]), 634 std::move(Args[1])); 635 636 // TODO: Support more than binop_eval_t. 637 return ErrorDiagnostic::get(SM, FuncName, 638 Twine("function '") + FuncName + 639 Twine("' takes 2 arguments but ") + 640 Twine(NumArgs) + " given"); 641 } 642 643 Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock( 644 StringRef Expr, std::optional<NumericVariable *> &DefinedNumericVariable, 645 bool IsLegacyLineExpr, std::optional<size_t> LineNumber, 646 FileCheckPatternContext *Context, const SourceMgr &SM) { 647 std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr; 648 StringRef DefExpr = StringRef(); 649 DefinedNumericVariable = std::nullopt; 650 ExpressionFormat ExplicitFormat = ExpressionFormat(); 651 unsigned Precision = 0; 652 653 // Parse format specifier (NOTE: ',' is also an argument seperator). 654 size_t FormatSpecEnd = Expr.find(','); 655 size_t FunctionStart = Expr.find('('); 656 if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) { 657 StringRef FormatExpr = Expr.take_front(FormatSpecEnd); 658 Expr = Expr.drop_front(FormatSpecEnd + 1); 659 FormatExpr = FormatExpr.trim(SpaceChars); 660 if (!FormatExpr.consume_front("%")) 661 return ErrorDiagnostic::get( 662 SM, FormatExpr, 663 "invalid matching format specification in expression"); 664 665 // Parse alternate form flag. 666 SMLoc AlternateFormFlagLoc = SMLoc::getFromPointer(FormatExpr.data()); 667 bool AlternateForm = FormatExpr.consume_front("#"); 668 669 // Parse precision. 670 if (FormatExpr.consume_front(".")) { 671 if (FormatExpr.consumeInteger(10, Precision)) 672 return ErrorDiagnostic::get(SM, FormatExpr, 673 "invalid precision in format specifier"); 674 } 675 676 if (!FormatExpr.empty()) { 677 // Check for unknown matching format specifier and set matching format in 678 // class instance representing this expression. 679 SMLoc FmtLoc = SMLoc::getFromPointer(FormatExpr.data()); 680 switch (popFront(FormatExpr)) { 681 case 'u': 682 ExplicitFormat = 683 ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 684 break; 685 case 'd': 686 ExplicitFormat = 687 ExpressionFormat(ExpressionFormat::Kind::Signed, Precision); 688 break; 689 case 'x': 690 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexLower, 691 Precision, AlternateForm); 692 break; 693 case 'X': 694 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexUpper, 695 Precision, AlternateForm); 696 break; 697 default: 698 return ErrorDiagnostic::get(SM, FmtLoc, 699 "invalid format specifier in expression"); 700 } 701 } 702 703 if (AlternateForm && ExplicitFormat != ExpressionFormat::Kind::HexLower && 704 ExplicitFormat != ExpressionFormat::Kind::HexUpper) 705 return ErrorDiagnostic::get( 706 SM, AlternateFormFlagLoc, 707 "alternate form only supported for hex values"); 708 709 FormatExpr = FormatExpr.ltrim(SpaceChars); 710 if (!FormatExpr.empty()) 711 return ErrorDiagnostic::get( 712 SM, FormatExpr, 713 "invalid matching format specification in expression"); 714 } 715 716 // Save variable definition expression if any. 717 size_t DefEnd = Expr.find(':'); 718 if (DefEnd != StringRef::npos) { 719 DefExpr = Expr.substr(0, DefEnd); 720 Expr = Expr.substr(DefEnd + 1); 721 } 722 723 // Parse matching constraint. 724 Expr = Expr.ltrim(SpaceChars); 725 bool HasParsedValidConstraint = false; 726 if (Expr.consume_front("==")) 727 HasParsedValidConstraint = true; 728 729 // Parse the expression itself. 730 Expr = Expr.ltrim(SpaceChars); 731 if (Expr.empty()) { 732 if (HasParsedValidConstraint) 733 return ErrorDiagnostic::get( 734 SM, Expr, "empty numeric expression should not have a constraint"); 735 } else { 736 Expr = Expr.rtrim(SpaceChars); 737 StringRef OuterBinOpExpr = Expr; 738 // The first operand in a legacy @LINE expression is always the @LINE 739 // pseudo variable. 740 AllowedOperand AO = 741 IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; 742 Expected<std::unique_ptr<ExpressionAST>> ParseResult = parseNumericOperand( 743 Expr, AO, !HasParsedValidConstraint, LineNumber, Context, SM); 744 while (ParseResult && !Expr.empty()) { 745 ParseResult = parseBinop(OuterBinOpExpr, Expr, std::move(*ParseResult), 746 IsLegacyLineExpr, LineNumber, Context, SM); 747 // Legacy @LINE expressions only allow 2 operands. 748 if (ParseResult && IsLegacyLineExpr && !Expr.empty()) 749 return ErrorDiagnostic::get( 750 SM, Expr, 751 "unexpected characters at end of expression '" + Expr + "'"); 752 } 753 if (!ParseResult) 754 return ParseResult.takeError(); 755 ExpressionASTPointer = std::move(*ParseResult); 756 } 757 758 // Select format of the expression, i.e. (i) its explicit format, if any, 759 // otherwise (ii) its implicit format, if any, otherwise (iii) the default 760 // format (unsigned). Error out in case of conflicting implicit format 761 // without explicit format. 762 ExpressionFormat Format; 763 if (ExplicitFormat) 764 Format = ExplicitFormat; 765 else if (ExpressionASTPointer) { 766 Expected<ExpressionFormat> ImplicitFormat = 767 ExpressionASTPointer->getImplicitFormat(SM); 768 if (!ImplicitFormat) 769 return ImplicitFormat.takeError(); 770 Format = *ImplicitFormat; 771 } 772 if (!Format) 773 Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 774 775 std::unique_ptr<Expression> ExpressionPointer = 776 std::make_unique<Expression>(std::move(ExpressionASTPointer), Format); 777 778 // Parse the numeric variable definition. 779 if (DefEnd != StringRef::npos) { 780 DefExpr = DefExpr.ltrim(SpaceChars); 781 Expected<NumericVariable *> ParseResult = parseNumericVariableDefinition( 782 DefExpr, Context, LineNumber, ExpressionPointer->getFormat(), SM); 783 784 if (!ParseResult) 785 return ParseResult.takeError(); 786 DefinedNumericVariable = *ParseResult; 787 } 788 789 return std::move(ExpressionPointer); 790 } 791 792 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix, 793 SourceMgr &SM, const FileCheckRequest &Req) { 794 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; 795 IgnoreCase = Req.IgnoreCase; 796 797 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 798 799 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 800 // Ignore trailing whitespace. 801 while (!PatternStr.empty() && 802 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 803 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 804 805 // Check that there is something on the line. 806 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 807 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 808 "found empty check string with prefix '" + Prefix + ":'"); 809 return true; 810 } 811 812 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 813 SM.PrintMessage( 814 PatternLoc, SourceMgr::DK_Error, 815 "found non-empty check string for empty check with prefix '" + Prefix + 816 ":'"); 817 return true; 818 } 819 820 if (CheckTy == Check::CheckEmpty) { 821 RegExStr = "(\n$)"; 822 return false; 823 } 824 825 // If literal check, set fixed string. 826 if (CheckTy.isLiteralMatch()) { 827 FixedStr = PatternStr; 828 return false; 829 } 830 831 // Check to see if this is a fixed string, or if it has regex pieces. 832 if (!MatchFullLinesHere && 833 (PatternStr.size() < 2 || 834 (!PatternStr.contains("{{") && !PatternStr.contains("[[")))) { 835 FixedStr = PatternStr; 836 return false; 837 } 838 839 if (MatchFullLinesHere) { 840 RegExStr += '^'; 841 if (!Req.NoCanonicalizeWhiteSpace) 842 RegExStr += " *"; 843 } 844 845 // Paren value #0 is for the fully matched string. Any new parenthesized 846 // values add from there. 847 unsigned CurParen = 1; 848 849 // Otherwise, there is at least one regex piece. Build up the regex pattern 850 // by escaping scary characters in fixed strings, building up one big regex. 851 while (!PatternStr.empty()) { 852 // RegEx matches. 853 if (PatternStr.startswith("{{")) { 854 // This is the start of a regex match. Scan for the }}. 855 size_t End = PatternStr.find("}}"); 856 if (End == StringRef::npos) { 857 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 858 SourceMgr::DK_Error, 859 "found start of regex string with no end '}}'"); 860 return true; 861 } 862 863 // Enclose {{}} patterns in parens just like [[]] even though we're not 864 // capturing the result for any purpose. This is required in case the 865 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 866 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 867 RegExStr += '('; 868 ++CurParen; 869 870 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 871 return true; 872 RegExStr += ')'; 873 874 PatternStr = PatternStr.substr(End + 2); 875 continue; 876 } 877 878 // String and numeric substitution blocks. Pattern substitution blocks come 879 // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some 880 // other regex) and assigns it to the string variable 'foo'. The latter 881 // substitutes foo's value. Numeric substitution blocks recognize the same 882 // form as string ones, but start with a '#' sign after the double 883 // brackets. They also accept a combined form which sets a numeric variable 884 // to the evaluation of an expression. Both string and numeric variable 885 // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be 886 // valid, as this helps catch some common errors. If there are extra '['s 887 // before the "[[", treat them literally. 888 if (PatternStr.startswith("[[") && !PatternStr.startswith("[[[")) { 889 StringRef UnparsedPatternStr = PatternStr.substr(2); 890 // Find the closing bracket pair ending the match. End is going to be an 891 // offset relative to the beginning of the match string. 892 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); 893 StringRef MatchStr = UnparsedPatternStr.substr(0, End); 894 bool IsNumBlock = MatchStr.consume_front("#"); 895 896 if (End == StringRef::npos) { 897 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 898 SourceMgr::DK_Error, 899 "Invalid substitution block, no ]] found"); 900 return true; 901 } 902 // Strip the substitution block we are parsing. End points to the start 903 // of the "]]" closing the expression so account for it in computing the 904 // index of the first unparsed character. 905 PatternStr = UnparsedPatternStr.substr(End + 2); 906 907 bool IsDefinition = false; 908 bool SubstNeeded = false; 909 // Whether the substitution block is a legacy use of @LINE with string 910 // substitution block syntax. 911 bool IsLegacyLineExpr = false; 912 StringRef DefName; 913 StringRef SubstStr; 914 StringRef MatchRegexp; 915 std::string WildcardRegexp; 916 size_t SubstInsertIdx = RegExStr.size(); 917 918 // Parse string variable or legacy @LINE expression. 919 if (!IsNumBlock) { 920 size_t VarEndIdx = MatchStr.find(':'); 921 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); 922 if (SpacePos != StringRef::npos) { 923 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), 924 SourceMgr::DK_Error, "unexpected whitespace"); 925 return true; 926 } 927 928 // Get the name (e.g. "foo") and verify it is well formed. 929 StringRef OrigMatchStr = MatchStr; 930 Expected<Pattern::VariableProperties> ParseVarResult = 931 parseVariable(MatchStr, SM); 932 if (!ParseVarResult) { 933 logAllUnhandledErrors(ParseVarResult.takeError(), errs()); 934 return true; 935 } 936 StringRef Name = ParseVarResult->Name; 937 bool IsPseudo = ParseVarResult->IsPseudo; 938 939 IsDefinition = (VarEndIdx != StringRef::npos); 940 SubstNeeded = !IsDefinition; 941 if (IsDefinition) { 942 if ((IsPseudo || !MatchStr.consume_front(":"))) { 943 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 944 SourceMgr::DK_Error, 945 "invalid name in string variable definition"); 946 return true; 947 } 948 949 // Detect collisions between string and numeric variables when the 950 // former is created later than the latter. 951 if (Context->GlobalNumericVariableTable.contains(Name)) { 952 SM.PrintMessage( 953 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 954 "numeric variable with name '" + Name + "' already exists"); 955 return true; 956 } 957 DefName = Name; 958 MatchRegexp = MatchStr; 959 } else { 960 if (IsPseudo) { 961 MatchStr = OrigMatchStr; 962 IsLegacyLineExpr = IsNumBlock = true; 963 } else { 964 if (!MatchStr.empty()) { 965 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 966 SourceMgr::DK_Error, 967 "invalid name in string variable use"); 968 return true; 969 } 970 SubstStr = Name; 971 } 972 } 973 } 974 975 // Parse numeric substitution block. 976 std::unique_ptr<Expression> ExpressionPointer; 977 std::optional<NumericVariable *> DefinedNumericVariable; 978 if (IsNumBlock) { 979 Expected<std::unique_ptr<Expression>> ParseResult = 980 parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, 981 IsLegacyLineExpr, LineNumber, Context, 982 SM); 983 if (!ParseResult) { 984 logAllUnhandledErrors(ParseResult.takeError(), errs()); 985 return true; 986 } 987 ExpressionPointer = std::move(*ParseResult); 988 SubstNeeded = ExpressionPointer->getAST() != nullptr; 989 if (DefinedNumericVariable) { 990 IsDefinition = true; 991 DefName = (*DefinedNumericVariable)->getName(); 992 } 993 if (SubstNeeded) 994 SubstStr = MatchStr; 995 else { 996 ExpressionFormat Format = ExpressionPointer->getFormat(); 997 WildcardRegexp = cantFail(Format.getWildcardRegex()); 998 MatchRegexp = WildcardRegexp; 999 } 1000 } 1001 1002 // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]]. 1003 if (IsDefinition) { 1004 RegExStr += '('; 1005 ++SubstInsertIdx; 1006 1007 if (IsNumBlock) { 1008 NumericVariableMatch NumericVariableDefinition = { 1009 *DefinedNumericVariable, CurParen}; 1010 NumericVariableDefs[DefName] = NumericVariableDefinition; 1011 // This store is done here rather than in match() to allow 1012 // parseNumericVariableUse() to get the pointer to the class instance 1013 // of the right variable definition corresponding to a given numeric 1014 // variable use. 1015 Context->GlobalNumericVariableTable[DefName] = 1016 *DefinedNumericVariable; 1017 } else { 1018 VariableDefs[DefName] = CurParen; 1019 // Mark string variable as defined to detect collisions between 1020 // string and numeric variables in parseNumericVariableUse() and 1021 // defineCmdlineVariables() when the latter is created later than the 1022 // former. We cannot reuse GlobalVariableTable for this by populating 1023 // it with an empty string since we would then lose the ability to 1024 // detect the use of an undefined variable in match(). 1025 Context->DefinedVariableTable[DefName] = true; 1026 } 1027 1028 ++CurParen; 1029 } 1030 1031 if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) 1032 return true; 1033 1034 if (IsDefinition) 1035 RegExStr += ')'; 1036 1037 // Handle substitutions: [[foo]] and [[#<foo expr>]]. 1038 if (SubstNeeded) { 1039 // Handle substitution of string variables that were defined earlier on 1040 // the same line by emitting a backreference. Expressions do not 1041 // support substituting a numeric variable defined on the same line. 1042 if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) { 1043 unsigned CaptureParenGroup = VariableDefs[SubstStr]; 1044 if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { 1045 SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), 1046 SourceMgr::DK_Error, 1047 "Can't back-reference more than 9 variables"); 1048 return true; 1049 } 1050 AddBackrefToRegEx(CaptureParenGroup); 1051 } else { 1052 // Handle substitution of string variables ([[<var>]]) defined in 1053 // previous CHECK patterns, and substitution of expressions. 1054 Substitution *Substitution = 1055 IsNumBlock 1056 ? Context->makeNumericSubstitution( 1057 SubstStr, std::move(ExpressionPointer), SubstInsertIdx) 1058 : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); 1059 Substitutions.push_back(Substitution); 1060 } 1061 } 1062 1063 continue; 1064 } 1065 1066 // Handle fixed string matches. 1067 // Find the end, which is the start of the next regex. 1068 size_t FixedMatchEnd = 1069 std::min(PatternStr.find("{{", 1), PatternStr.find("[[", 1)); 1070 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 1071 PatternStr = PatternStr.substr(FixedMatchEnd); 1072 } 1073 1074 if (MatchFullLinesHere) { 1075 if (!Req.NoCanonicalizeWhiteSpace) 1076 RegExStr += " *"; 1077 RegExStr += '$'; 1078 } 1079 1080 return false; 1081 } 1082 1083 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 1084 Regex R(RS); 1085 std::string Error; 1086 if (!R.isValid(Error)) { 1087 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 1088 "invalid regex: " + Error); 1089 return true; 1090 } 1091 1092 RegExStr += RS.str(); 1093 CurParen += R.getNumMatches(); 1094 return false; 1095 } 1096 1097 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 1098 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 1099 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 1100 RegExStr += Backref; 1101 } 1102 1103 Pattern::MatchResult Pattern::match(StringRef Buffer, 1104 const SourceMgr &SM) const { 1105 // If this is the EOF pattern, match it immediately. 1106 if (CheckTy == Check::CheckEOF) 1107 return MatchResult(Buffer.size(), 0, Error::success()); 1108 1109 // If this is a fixed string pattern, just match it now. 1110 if (!FixedStr.empty()) { 1111 size_t Pos = 1112 IgnoreCase ? Buffer.find_insensitive(FixedStr) : Buffer.find(FixedStr); 1113 if (Pos == StringRef::npos) 1114 return make_error<NotFoundError>(); 1115 return MatchResult(Pos, /*MatchLen=*/FixedStr.size(), Error::success()); 1116 } 1117 1118 // Regex match. 1119 1120 // If there are substitutions, we need to create a temporary string with the 1121 // actual value. 1122 StringRef RegExToMatch = RegExStr; 1123 std::string TmpStr; 1124 if (!Substitutions.empty()) { 1125 TmpStr = RegExStr; 1126 if (LineNumber) 1127 Context->LineVariable->setValue(ExpressionValue(*LineNumber)); 1128 1129 size_t InsertOffset = 0; 1130 // Substitute all string variables and expressions whose values are only 1131 // now known. Use of string variables defined on the same line are handled 1132 // by back-references. 1133 Error Errs = Error::success(); 1134 for (const auto &Substitution : Substitutions) { 1135 // Substitute and check for failure (e.g. use of undefined variable). 1136 Expected<std::string> Value = Substitution->getResult(); 1137 if (!Value) { 1138 // Convert to an ErrorDiagnostic to get location information. This is 1139 // done here rather than printMatch/printNoMatch since now we know which 1140 // substitution block caused the overflow. 1141 Errs = joinErrors(std::move(Errs), 1142 handleErrors( 1143 Value.takeError(), 1144 [&](const OverflowError &E) { 1145 return ErrorDiagnostic::get( 1146 SM, Substitution->getFromString(), 1147 "unable to substitute variable or " 1148 "numeric expression: overflow error"); 1149 }, 1150 [&SM](const UndefVarError &E) { 1151 return ErrorDiagnostic::get(SM, E.getVarName(), 1152 E.message()); 1153 })); 1154 continue; 1155 } 1156 1157 // Plop it into the regex at the adjusted offset. 1158 TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, 1159 Value->begin(), Value->end()); 1160 InsertOffset += Value->size(); 1161 } 1162 if (Errs) 1163 return std::move(Errs); 1164 1165 // Match the newly constructed regex. 1166 RegExToMatch = TmpStr; 1167 } 1168 1169 SmallVector<StringRef, 4> MatchInfo; 1170 unsigned int Flags = Regex::Newline; 1171 if (IgnoreCase) 1172 Flags |= Regex::IgnoreCase; 1173 if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) 1174 return make_error<NotFoundError>(); 1175 1176 // Successful regex match. 1177 assert(!MatchInfo.empty() && "Didn't get any match"); 1178 StringRef FullMatch = MatchInfo[0]; 1179 1180 // If this defines any string variables, remember their values. 1181 for (const auto &VariableDef : VariableDefs) { 1182 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 1183 Context->GlobalVariableTable[VariableDef.first] = 1184 MatchInfo[VariableDef.second]; 1185 } 1186 1187 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after 1188 // the required preceding newline, which is consumed by the pattern in the 1189 // case of CHECK-EMPTY but not CHECK-NEXT. 1190 size_t MatchStartSkip = CheckTy == Check::CheckEmpty; 1191 Match TheMatch; 1192 TheMatch.Pos = FullMatch.data() - Buffer.data() + MatchStartSkip; 1193 TheMatch.Len = FullMatch.size() - MatchStartSkip; 1194 1195 // If this defines any numeric variables, remember their values. 1196 for (const auto &NumericVariableDef : NumericVariableDefs) { 1197 const NumericVariableMatch &NumericVariableMatch = 1198 NumericVariableDef.getValue(); 1199 unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; 1200 assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); 1201 NumericVariable *DefinedNumericVariable = 1202 NumericVariableMatch.DefinedNumericVariable; 1203 1204 StringRef MatchedValue = MatchInfo[CaptureParenGroup]; 1205 ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat(); 1206 Expected<ExpressionValue> Value = 1207 Format.valueFromStringRepr(MatchedValue, SM); 1208 if (!Value) 1209 return MatchResult(TheMatch, Value.takeError()); 1210 DefinedNumericVariable->setValue(*Value, MatchedValue); 1211 } 1212 1213 return MatchResult(TheMatch, Error::success()); 1214 } 1215 1216 unsigned Pattern::computeMatchDistance(StringRef Buffer) const { 1217 // Just compute the number of matching characters. For regular expressions, we 1218 // just compare against the regex itself and hope for the best. 1219 // 1220 // FIXME: One easy improvement here is have the regex lib generate a single 1221 // example regular expression which matches, and use that as the example 1222 // string. 1223 StringRef ExampleString(FixedStr); 1224 if (ExampleString.empty()) 1225 ExampleString = RegExStr; 1226 1227 // Only compare up to the first line in the buffer, or the string size. 1228 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 1229 BufferPrefix = BufferPrefix.split('\n').first; 1230 return BufferPrefix.edit_distance(ExampleString); 1231 } 1232 1233 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, 1234 SMRange Range, 1235 FileCheckDiag::MatchType MatchTy, 1236 std::vector<FileCheckDiag> *Diags) const { 1237 // Print what we know about substitutions. 1238 if (!Substitutions.empty()) { 1239 for (const auto &Substitution : Substitutions) { 1240 SmallString<256> Msg; 1241 raw_svector_ostream OS(Msg); 1242 1243 Expected<std::string> MatchedValue = Substitution->getResult(); 1244 // Substitution failures are handled in printNoMatch(). 1245 if (!MatchedValue) { 1246 consumeError(MatchedValue.takeError()); 1247 continue; 1248 } 1249 1250 OS << "with \""; 1251 OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; 1252 OS.write_escaped(*MatchedValue) << "\""; 1253 1254 // We report only the start of the match/search range to suggest we are 1255 // reporting the substitutions as set at the start of the match/search. 1256 // Indicating a non-zero-length range might instead seem to imply that the 1257 // substitution matches or was captured from exactly that range. 1258 if (Diags) 1259 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, 1260 SMRange(Range.Start, Range.Start), OS.str()); 1261 else 1262 SM.PrintMessage(Range.Start, SourceMgr::DK_Note, OS.str()); 1263 } 1264 } 1265 } 1266 1267 void Pattern::printVariableDefs(const SourceMgr &SM, 1268 FileCheckDiag::MatchType MatchTy, 1269 std::vector<FileCheckDiag> *Diags) const { 1270 if (VariableDefs.empty() && NumericVariableDefs.empty()) 1271 return; 1272 // Build list of variable captures. 1273 struct VarCapture { 1274 StringRef Name; 1275 SMRange Range; 1276 }; 1277 SmallVector<VarCapture, 2> VarCaptures; 1278 for (const auto &VariableDef : VariableDefs) { 1279 VarCapture VC; 1280 VC.Name = VariableDef.first; 1281 StringRef Value = Context->GlobalVariableTable[VC.Name]; 1282 SMLoc Start = SMLoc::getFromPointer(Value.data()); 1283 SMLoc End = SMLoc::getFromPointer(Value.data() + Value.size()); 1284 VC.Range = SMRange(Start, End); 1285 VarCaptures.push_back(VC); 1286 } 1287 for (const auto &VariableDef : NumericVariableDefs) { 1288 VarCapture VC; 1289 VC.Name = VariableDef.getKey(); 1290 std::optional<StringRef> StrValue = 1291 VariableDef.getValue().DefinedNumericVariable->getStringValue(); 1292 if (!StrValue) 1293 continue; 1294 SMLoc Start = SMLoc::getFromPointer(StrValue->data()); 1295 SMLoc End = SMLoc::getFromPointer(StrValue->data() + StrValue->size()); 1296 VC.Range = SMRange(Start, End); 1297 VarCaptures.push_back(VC); 1298 } 1299 // Sort variable captures by the order in which they matched the input. 1300 // Ranges shouldn't be overlapping, so we can just compare the start. 1301 llvm::sort(VarCaptures, [](const VarCapture &A, const VarCapture &B) { 1302 if (&A == &B) 1303 return false; 1304 assert(A.Range.Start != B.Range.Start && 1305 "unexpected overlapping variable captures"); 1306 return A.Range.Start.getPointer() < B.Range.Start.getPointer(); 1307 }); 1308 // Create notes for the sorted captures. 1309 for (const VarCapture &VC : VarCaptures) { 1310 SmallString<256> Msg; 1311 raw_svector_ostream OS(Msg); 1312 OS << "captured var \"" << VC.Name << "\""; 1313 if (Diags) 1314 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, VC.Range, OS.str()); 1315 else 1316 SM.PrintMessage(VC.Range.Start, SourceMgr::DK_Note, OS.str(), VC.Range); 1317 } 1318 } 1319 1320 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, 1321 const SourceMgr &SM, SMLoc Loc, 1322 Check::FileCheckType CheckTy, 1323 StringRef Buffer, size_t Pos, size_t Len, 1324 std::vector<FileCheckDiag> *Diags, 1325 bool AdjustPrevDiags = false) { 1326 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); 1327 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); 1328 SMRange Range(Start, End); 1329 if (Diags) { 1330 if (AdjustPrevDiags) { 1331 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 1332 for (auto I = Diags->rbegin(), E = Diags->rend(); 1333 I != E && I->CheckLoc == CheckLoc; ++I) 1334 I->MatchTy = MatchTy; 1335 } else 1336 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); 1337 } 1338 return Range; 1339 } 1340 1341 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, 1342 std::vector<FileCheckDiag> *Diags) const { 1343 // Attempt to find the closest/best fuzzy match. Usually an error happens 1344 // because some string in the output didn't exactly match. In these cases, we 1345 // would like to show the user a best guess at what "should have" matched, to 1346 // save them having to actually check the input manually. 1347 size_t NumLinesForward = 0; 1348 size_t Best = StringRef::npos; 1349 double BestQuality = 0; 1350 1351 // Use an arbitrary 4k limit on how far we will search. 1352 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 1353 if (Buffer[i] == '\n') 1354 ++NumLinesForward; 1355 1356 // Patterns have leading whitespace stripped, so skip whitespace when 1357 // looking for something which looks like a pattern. 1358 if (Buffer[i] == ' ' || Buffer[i] == '\t') 1359 continue; 1360 1361 // Compute the "quality" of this match as an arbitrary combination of the 1362 // match distance and the number of lines skipped to get to this match. 1363 unsigned Distance = computeMatchDistance(Buffer.substr(i)); 1364 double Quality = Distance + (NumLinesForward / 100.); 1365 1366 if (Quality < BestQuality || Best == StringRef::npos) { 1367 Best = i; 1368 BestQuality = Quality; 1369 } 1370 } 1371 1372 // Print the "possible intended match here" line if we found something 1373 // reasonable and not equal to what we showed in the "scanning from here" 1374 // line. 1375 if (Best && Best != StringRef::npos && BestQuality < 50) { 1376 SMRange MatchRange = 1377 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), 1378 getCheckTy(), Buffer, Best, 0, Diags); 1379 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, 1380 "possible intended match here"); 1381 1382 // FIXME: If we wanted to be really friendly we would show why the match 1383 // failed, as it can be hard to spot simple one character differences. 1384 } 1385 } 1386 1387 Expected<StringRef> 1388 FileCheckPatternContext::getPatternVarValue(StringRef VarName) { 1389 auto VarIter = GlobalVariableTable.find(VarName); 1390 if (VarIter == GlobalVariableTable.end()) 1391 return make_error<UndefVarError>(VarName); 1392 1393 return VarIter->second; 1394 } 1395 1396 template <class... Types> 1397 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) { 1398 NumericVariables.push_back(std::make_unique<NumericVariable>(args...)); 1399 return NumericVariables.back().get(); 1400 } 1401 1402 Substitution * 1403 FileCheckPatternContext::makeStringSubstitution(StringRef VarName, 1404 size_t InsertIdx) { 1405 Substitutions.push_back( 1406 std::make_unique<StringSubstitution>(this, VarName, InsertIdx)); 1407 return Substitutions.back().get(); 1408 } 1409 1410 Substitution *FileCheckPatternContext::makeNumericSubstitution( 1411 StringRef ExpressionStr, std::unique_ptr<Expression> Expression, 1412 size_t InsertIdx) { 1413 Substitutions.push_back(std::make_unique<NumericSubstitution>( 1414 this, ExpressionStr, std::move(Expression), InsertIdx)); 1415 return Substitutions.back().get(); 1416 } 1417 1418 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 1419 // Offset keeps track of the current offset within the input Str 1420 size_t Offset = 0; 1421 // [...] Nesting depth 1422 size_t BracketDepth = 0; 1423 1424 while (!Str.empty()) { 1425 if (Str.startswith("]]") && BracketDepth == 0) 1426 return Offset; 1427 if (Str[0] == '\\') { 1428 // Backslash escapes the next char within regexes, so skip them both. 1429 Str = Str.substr(2); 1430 Offset += 2; 1431 } else { 1432 switch (Str[0]) { 1433 default: 1434 break; 1435 case '[': 1436 BracketDepth++; 1437 break; 1438 case ']': 1439 if (BracketDepth == 0) { 1440 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 1441 SourceMgr::DK_Error, 1442 "missing closing \"]\" for regex variable"); 1443 exit(1); 1444 } 1445 BracketDepth--; 1446 break; 1447 } 1448 Str = Str.substr(1); 1449 Offset++; 1450 } 1451 } 1452 1453 return StringRef::npos; 1454 } 1455 1456 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, 1457 SmallVectorImpl<char> &OutputBuffer) { 1458 OutputBuffer.reserve(MB.getBufferSize()); 1459 1460 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 1461 Ptr != End; ++Ptr) { 1462 // Eliminate trailing dosish \r. 1463 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 1464 continue; 1465 } 1466 1467 // If current char is not a horizontal whitespace or if horizontal 1468 // whitespace canonicalization is disabled, dump it to output as is. 1469 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 1470 OutputBuffer.push_back(*Ptr); 1471 continue; 1472 } 1473 1474 // Otherwise, add one space and advance over neighboring space. 1475 OutputBuffer.push_back(' '); 1476 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 1477 ++Ptr; 1478 } 1479 1480 // Add a null byte and then return all but that byte. 1481 OutputBuffer.push_back('\0'); 1482 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 1483 } 1484 1485 FileCheckDiag::FileCheckDiag(const SourceMgr &SM, 1486 const Check::FileCheckType &CheckTy, 1487 SMLoc CheckLoc, MatchType MatchTy, 1488 SMRange InputRange, StringRef Note) 1489 : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy), Note(Note) { 1490 auto Start = SM.getLineAndColumn(InputRange.Start); 1491 auto End = SM.getLineAndColumn(InputRange.End); 1492 InputStartLine = Start.first; 1493 InputStartCol = Start.second; 1494 InputEndLine = End.first; 1495 InputEndCol = End.second; 1496 } 1497 1498 static bool IsPartOfWord(char c) { 1499 return (isAlnum(c) || c == '-' || c == '_'); 1500 } 1501 1502 Check::FileCheckType &Check::FileCheckType::setCount(int C) { 1503 assert(Count > 0 && "zero and negative counts are not supported"); 1504 assert((C == 1 || Kind == CheckPlain) && 1505 "count supported only for plain CHECK directives"); 1506 Count = C; 1507 return *this; 1508 } 1509 1510 std::string Check::FileCheckType::getModifiersDescription() const { 1511 if (Modifiers.none()) 1512 return ""; 1513 std::string Ret; 1514 raw_string_ostream OS(Ret); 1515 OS << '{'; 1516 if (isLiteralMatch()) 1517 OS << "LITERAL"; 1518 OS << '}'; 1519 return OS.str(); 1520 } 1521 1522 std::string Check::FileCheckType::getDescription(StringRef Prefix) const { 1523 // Append directive modifiers. 1524 auto WithModifiers = [this, Prefix](StringRef Str) -> std::string { 1525 return (Prefix + Str + getModifiersDescription()).str(); 1526 }; 1527 1528 switch (Kind) { 1529 case Check::CheckNone: 1530 return "invalid"; 1531 case Check::CheckMisspelled: 1532 return "misspelled"; 1533 case Check::CheckPlain: 1534 if (Count > 1) 1535 return WithModifiers("-COUNT"); 1536 return WithModifiers(""); 1537 case Check::CheckNext: 1538 return WithModifiers("-NEXT"); 1539 case Check::CheckSame: 1540 return WithModifiers("-SAME"); 1541 case Check::CheckNot: 1542 return WithModifiers("-NOT"); 1543 case Check::CheckDAG: 1544 return WithModifiers("-DAG"); 1545 case Check::CheckLabel: 1546 return WithModifiers("-LABEL"); 1547 case Check::CheckEmpty: 1548 return WithModifiers("-EMPTY"); 1549 case Check::CheckComment: 1550 return std::string(Prefix); 1551 case Check::CheckEOF: 1552 return "implicit EOF"; 1553 case Check::CheckBadNot: 1554 return "bad NOT"; 1555 case Check::CheckBadCount: 1556 return "bad COUNT"; 1557 } 1558 llvm_unreachable("unknown FileCheckType"); 1559 } 1560 1561 static std::pair<Check::FileCheckType, StringRef> 1562 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix, 1563 bool &Misspelled) { 1564 if (Buffer.size() <= Prefix.size()) 1565 return {Check::CheckNone, StringRef()}; 1566 1567 StringRef Rest = Buffer.drop_front(Prefix.size()); 1568 // Check for comment. 1569 if (llvm::is_contained(Req.CommentPrefixes, Prefix)) { 1570 if (Rest.consume_front(":")) 1571 return {Check::CheckComment, Rest}; 1572 // Ignore a comment prefix if it has a suffix like "-NOT". 1573 return {Check::CheckNone, StringRef()}; 1574 } 1575 1576 auto ConsumeModifiers = [&](Check::FileCheckType Ret) 1577 -> std::pair<Check::FileCheckType, StringRef> { 1578 if (Rest.consume_front(":")) 1579 return {Ret, Rest}; 1580 if (!Rest.consume_front("{")) 1581 return {Check::CheckNone, StringRef()}; 1582 1583 // Parse the modifiers, speparated by commas. 1584 do { 1585 // Allow whitespace in modifiers list. 1586 Rest = Rest.ltrim(); 1587 if (Rest.consume_front("LITERAL")) 1588 Ret.setLiteralMatch(); 1589 else 1590 return {Check::CheckNone, Rest}; 1591 // Allow whitespace in modifiers list. 1592 Rest = Rest.ltrim(); 1593 } while (Rest.consume_front(",")); 1594 if (!Rest.consume_front("}:")) 1595 return {Check::CheckNone, Rest}; 1596 return {Ret, Rest}; 1597 }; 1598 1599 // Verify that the prefix is followed by directive modifiers or a colon. 1600 if (Rest.consume_front(":")) 1601 return {Check::CheckPlain, Rest}; 1602 if (Rest.front() == '{') 1603 return ConsumeModifiers(Check::CheckPlain); 1604 1605 if (Rest.consume_front("_")) 1606 Misspelled = true; 1607 else if (!Rest.consume_front("-")) 1608 return {Check::CheckNone, StringRef()}; 1609 1610 if (Rest.consume_front("COUNT-")) { 1611 int64_t Count; 1612 if (Rest.consumeInteger(10, Count)) 1613 // Error happened in parsing integer. 1614 return {Check::CheckBadCount, Rest}; 1615 if (Count <= 0 || Count > INT32_MAX) 1616 return {Check::CheckBadCount, Rest}; 1617 if (Rest.front() != ':' && Rest.front() != '{') 1618 return {Check::CheckBadCount, Rest}; 1619 return ConsumeModifiers( 1620 Check::FileCheckType(Check::CheckPlain).setCount(Count)); 1621 } 1622 1623 // You can't combine -NOT with another suffix. 1624 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 1625 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 1626 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 1627 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 1628 return {Check::CheckBadNot, Rest}; 1629 1630 if (Rest.consume_front("NEXT")) 1631 return ConsumeModifiers(Check::CheckNext); 1632 1633 if (Rest.consume_front("SAME")) 1634 return ConsumeModifiers(Check::CheckSame); 1635 1636 if (Rest.consume_front("NOT")) 1637 return ConsumeModifiers(Check::CheckNot); 1638 1639 if (Rest.consume_front("DAG")) 1640 return ConsumeModifiers(Check::CheckDAG); 1641 1642 if (Rest.consume_front("LABEL")) 1643 return ConsumeModifiers(Check::CheckLabel); 1644 1645 if (Rest.consume_front("EMPTY")) 1646 return ConsumeModifiers(Check::CheckEmpty); 1647 1648 return {Check::CheckNone, Rest}; 1649 } 1650 1651 static std::pair<Check::FileCheckType, StringRef> 1652 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { 1653 bool Misspelled = false; 1654 auto Res = FindCheckType(Req, Buffer, Prefix, Misspelled); 1655 if (Res.first != Check::CheckNone && Misspelled) 1656 return {Check::CheckMisspelled, Res.second}; 1657 return Res; 1658 } 1659 1660 // From the given position, find the next character after the word. 1661 static size_t SkipWord(StringRef Str, size_t Loc) { 1662 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 1663 ++Loc; 1664 return Loc; 1665 } 1666 1667 /// Searches the buffer for the first prefix in the prefix regular expression. 1668 /// 1669 /// This searches the buffer using the provided regular expression, however it 1670 /// enforces constraints beyond that: 1671 /// 1) The found prefix must not be a suffix of something that looks like 1672 /// a valid prefix. 1673 /// 2) The found prefix must be followed by a valid check type suffix using \c 1674 /// FindCheckType above. 1675 /// 1676 /// \returns a pair of StringRefs into the Buffer, which combines: 1677 /// - the first match of the regular expression to satisfy these two is 1678 /// returned, 1679 /// otherwise an empty StringRef is returned to indicate failure. 1680 /// - buffer rewound to the location right after parsed suffix, for parsing 1681 /// to continue from 1682 /// 1683 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 1684 /// start at the beginning of the returned prefix, increment \p LineNumber for 1685 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 1686 /// check found by examining the suffix. 1687 /// 1688 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 1689 /// is unspecified. 1690 static std::pair<StringRef, StringRef> 1691 FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE, 1692 StringRef &Buffer, unsigned &LineNumber, 1693 Check::FileCheckType &CheckTy) { 1694 SmallVector<StringRef, 2> Matches; 1695 1696 while (!Buffer.empty()) { 1697 // Find the first (longest) match using the RE. 1698 if (!PrefixRE.match(Buffer, &Matches)) 1699 // No match at all, bail. 1700 return {StringRef(), StringRef()}; 1701 1702 StringRef Prefix = Matches[0]; 1703 Matches.clear(); 1704 1705 assert(Prefix.data() >= Buffer.data() && 1706 Prefix.data() < Buffer.data() + Buffer.size() && 1707 "Prefix doesn't start inside of buffer!"); 1708 size_t Loc = Prefix.data() - Buffer.data(); 1709 StringRef Skipped = Buffer.substr(0, Loc); 1710 Buffer = Buffer.drop_front(Loc); 1711 LineNumber += Skipped.count('\n'); 1712 1713 // Check that the matched prefix isn't a suffix of some other check-like 1714 // word. 1715 // FIXME: This is a very ad-hoc check. it would be better handled in some 1716 // other way. Among other things it seems hard to distinguish between 1717 // intentional and unintentional uses of this feature. 1718 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 1719 // Now extract the type. 1720 StringRef AfterSuffix; 1721 std::tie(CheckTy, AfterSuffix) = FindCheckType(Req, Buffer, Prefix); 1722 1723 // If we've found a valid check type for this prefix, we're done. 1724 if (CheckTy != Check::CheckNone) 1725 return {Prefix, AfterSuffix}; 1726 } 1727 1728 // If we didn't successfully find a prefix, we need to skip this invalid 1729 // prefix and continue scanning. We directly skip the prefix that was 1730 // matched and any additional parts of that check-like word. 1731 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 1732 } 1733 1734 // We ran out of buffer while skipping partial matches so give up. 1735 return {StringRef(), StringRef()}; 1736 } 1737 1738 void FileCheckPatternContext::createLineVariable() { 1739 assert(!LineVariable && "@LINE pseudo numeric variable already created"); 1740 StringRef LineName = "@LINE"; 1741 LineVariable = makeNumericVariable( 1742 LineName, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 1743 GlobalNumericVariableTable[LineName] = LineVariable; 1744 } 1745 1746 FileCheck::FileCheck(FileCheckRequest Req) 1747 : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()), 1748 CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {} 1749 1750 FileCheck::~FileCheck() = default; 1751 1752 bool FileCheck::readCheckFile( 1753 SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 1754 std::pair<unsigned, unsigned> *ImpPatBufferIDRange) { 1755 if (ImpPatBufferIDRange) 1756 ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0; 1757 1758 Error DefineError = 1759 PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); 1760 if (DefineError) { 1761 logAllUnhandledErrors(std::move(DefineError), errs()); 1762 return true; 1763 } 1764 1765 PatternContext->createLineVariable(); 1766 1767 std::vector<Pattern> ImplicitNegativeChecks; 1768 for (StringRef PatternString : Req.ImplicitCheckNot) { 1769 // Create a buffer with fake command line content in order to display the 1770 // command line option responsible for the specific implicit CHECK-NOT. 1771 std::string Prefix = "-implicit-check-not='"; 1772 std::string Suffix = "'"; 1773 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 1774 (Prefix + PatternString + Suffix).str(), "command line"); 1775 1776 StringRef PatternInBuffer = 1777 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 1778 unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 1779 if (ImpPatBufferIDRange) { 1780 if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) { 1781 ImpPatBufferIDRange->first = BufferID; 1782 ImpPatBufferIDRange->second = BufferID + 1; 1783 } else { 1784 assert(BufferID == ImpPatBufferIDRange->second && 1785 "expected consecutive source buffer IDs"); 1786 ++ImpPatBufferIDRange->second; 1787 } 1788 } 1789 1790 ImplicitNegativeChecks.push_back( 1791 Pattern(Check::CheckNot, PatternContext.get())); 1792 ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, 1793 "IMPLICIT-CHECK", SM, Req); 1794 } 1795 1796 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 1797 1798 // LineNumber keeps track of the line on which CheckPrefix instances are 1799 // found. 1800 unsigned LineNumber = 1; 1801 1802 std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(), 1803 Req.CheckPrefixes.end()); 1804 const size_t DistinctPrefixes = PrefixesNotFound.size(); 1805 while (true) { 1806 Check::FileCheckType CheckTy; 1807 1808 // See if a prefix occurs in the memory buffer. 1809 StringRef UsedPrefix; 1810 StringRef AfterSuffix; 1811 std::tie(UsedPrefix, AfterSuffix) = 1812 FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy); 1813 if (UsedPrefix.empty()) 1814 break; 1815 if (CheckTy != Check::CheckComment) 1816 PrefixesNotFound.erase(UsedPrefix); 1817 1818 assert(UsedPrefix.data() == Buffer.data() && 1819 "Failed to move Buffer's start forward, or pointed prefix outside " 1820 "of the buffer!"); 1821 assert(AfterSuffix.data() >= Buffer.data() && 1822 AfterSuffix.data() < Buffer.data() + Buffer.size() && 1823 "Parsing after suffix doesn't start inside of buffer!"); 1824 1825 // Location to use for error messages. 1826 const char *UsedPrefixStart = UsedPrefix.data(); 1827 1828 // Skip the buffer to the end of parsed suffix (or just prefix, if no good 1829 // suffix was processed). 1830 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) 1831 : AfterSuffix; 1832 1833 // Complain about misspelled directives. 1834 if (CheckTy == Check::CheckMisspelled) { 1835 StringRef UsedDirective(UsedPrefix.data(), 1836 AfterSuffix.data() - UsedPrefix.data()); 1837 SM.PrintMessage(SMLoc::getFromPointer(UsedDirective.data()), 1838 SourceMgr::DK_Error, 1839 "misspelled directive '" + UsedDirective + "'"); 1840 return true; 1841 } 1842 1843 // Complain about useful-looking but unsupported suffixes. 1844 if (CheckTy == Check::CheckBadNot) { 1845 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1846 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 1847 return true; 1848 } 1849 1850 // Complain about invalid count specification. 1851 if (CheckTy == Check::CheckBadCount) { 1852 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1853 "invalid count in -COUNT specification on prefix '" + 1854 UsedPrefix + "'"); 1855 return true; 1856 } 1857 1858 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 1859 // leading whitespace. 1860 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 1861 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 1862 1863 // Scan ahead to the end of line. 1864 size_t EOL = Buffer.find_first_of("\n\r"); 1865 1866 // Remember the location of the start of the pattern, for diagnostics. 1867 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 1868 1869 // Extract the pattern from the buffer. 1870 StringRef PatternBuffer = Buffer.substr(0, EOL); 1871 Buffer = Buffer.substr(EOL); 1872 1873 // If this is a comment, we're done. 1874 if (CheckTy == Check::CheckComment) 1875 continue; 1876 1877 // Parse the pattern. 1878 Pattern P(CheckTy, PatternContext.get(), LineNumber); 1879 if (P.parsePattern(PatternBuffer, UsedPrefix, SM, Req)) 1880 return true; 1881 1882 // Verify that CHECK-LABEL lines do not define or use variables 1883 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 1884 SM.PrintMessage( 1885 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 1886 "found '" + UsedPrefix + "-LABEL:'" 1887 " with variable definition or use"); 1888 return true; 1889 } 1890 1891 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 1892 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 1893 CheckTy == Check::CheckEmpty) && 1894 CheckStrings->empty()) { 1895 StringRef Type = CheckTy == Check::CheckNext 1896 ? "NEXT" 1897 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 1898 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 1899 SourceMgr::DK_Error, 1900 "found '" + UsedPrefix + "-" + Type + 1901 "' without previous '" + UsedPrefix + ": line"); 1902 return true; 1903 } 1904 1905 // Handle CHECK-DAG/-NOT. 1906 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 1907 DagNotMatches.push_back(P); 1908 continue; 1909 } 1910 1911 // Okay, add the string we captured to the output vector and move on. 1912 CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); 1913 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 1914 DagNotMatches = ImplicitNegativeChecks; 1915 } 1916 1917 // When there are no used prefixes we report an error except in the case that 1918 // no prefix is specified explicitly but -implicit-check-not is specified. 1919 const bool NoPrefixesFound = PrefixesNotFound.size() == DistinctPrefixes; 1920 const bool SomePrefixesUnexpectedlyNotUsed = 1921 !Req.AllowUnusedPrefixes && !PrefixesNotFound.empty(); 1922 if ((NoPrefixesFound || SomePrefixesUnexpectedlyNotUsed) && 1923 (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) { 1924 errs() << "error: no check strings found with prefix" 1925 << (PrefixesNotFound.size() > 1 ? "es " : " "); 1926 bool First = true; 1927 for (StringRef MissingPrefix : PrefixesNotFound) { 1928 if (!First) 1929 errs() << ", "; 1930 errs() << "\'" << MissingPrefix << ":'"; 1931 First = false; 1932 } 1933 errs() << '\n'; 1934 return true; 1935 } 1936 1937 // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs, 1938 // and use the first prefix as a filler for the error message. 1939 if (!DagNotMatches.empty()) { 1940 CheckStrings->emplace_back( 1941 Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), 1942 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); 1943 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 1944 } 1945 1946 return false; 1947 } 1948 1949 /// Returns either (1) \c ErrorSuccess if there was no error or (2) 1950 /// \c ErrorReported if an error was reported, such as an unexpected match. 1951 static Error printMatch(bool ExpectedMatch, const SourceMgr &SM, 1952 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 1953 int MatchedCount, StringRef Buffer, 1954 Pattern::MatchResult MatchResult, 1955 const FileCheckRequest &Req, 1956 std::vector<FileCheckDiag> *Diags) { 1957 // Suppress some verbosity if there's no error. 1958 bool HasError = !ExpectedMatch || MatchResult.TheError; 1959 bool PrintDiag = true; 1960 if (!HasError) { 1961 if (!Req.Verbose) 1962 return ErrorReported::reportedOrSuccess(HasError); 1963 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) 1964 return ErrorReported::reportedOrSuccess(HasError); 1965 // Due to their verbosity, we don't print verbose diagnostics here if we're 1966 // gathering them for Diags to be rendered elsewhere, but we always print 1967 // other diagnostics. 1968 PrintDiag = !Diags; 1969 } 1970 1971 // Add "found" diagnostic, substitutions, and variable definitions to Diags. 1972 FileCheckDiag::MatchType MatchTy = ExpectedMatch 1973 ? FileCheckDiag::MatchFoundAndExpected 1974 : FileCheckDiag::MatchFoundButExcluded; 1975 SMRange MatchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 1976 Buffer, MatchResult.TheMatch->Pos, 1977 MatchResult.TheMatch->Len, Diags); 1978 if (Diags) { 1979 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, Diags); 1980 Pat.printVariableDefs(SM, MatchTy, Diags); 1981 } 1982 if (!PrintDiag) { 1983 assert(!HasError && "expected to report more diagnostics for error"); 1984 return ErrorReported::reportedOrSuccess(HasError); 1985 } 1986 1987 // Print the match. 1988 std::string Message = formatv("{0}: {1} string found in input", 1989 Pat.getCheckTy().getDescription(Prefix), 1990 (ExpectedMatch ? "expected" : "excluded")) 1991 .str(); 1992 if (Pat.getCount() > 1) 1993 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 1994 SM.PrintMessage( 1995 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); 1996 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", 1997 {MatchRange}); 1998 1999 // Print additional information, which can be useful even if there are errors. 2000 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, nullptr); 2001 Pat.printVariableDefs(SM, MatchTy, nullptr); 2002 2003 // Print errors and add them to Diags. We report these errors after the match 2004 // itself because we found them after the match. If we had found them before 2005 // the match, we'd be in printNoMatch. 2006 handleAllErrors(std::move(MatchResult.TheError), 2007 [&](const ErrorDiagnostic &E) { 2008 E.log(errs()); 2009 if (Diags) { 2010 Diags->emplace_back(SM, Pat.getCheckTy(), Loc, 2011 FileCheckDiag::MatchFoundErrorNote, 2012 E.getRange(), E.getMessage().str()); 2013 } 2014 }); 2015 return ErrorReported::reportedOrSuccess(HasError); 2016 } 2017 2018 /// Returns either (1) \c ErrorSuccess if there was no error, or (2) 2019 /// \c ErrorReported if an error was reported, such as an expected match not 2020 /// found. 2021 static Error printNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2022 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2023 int MatchedCount, StringRef Buffer, Error MatchError, 2024 bool VerboseVerbose, 2025 std::vector<FileCheckDiag> *Diags) { 2026 // Print any pattern errors, and record them to be added to Diags later. 2027 bool HasError = ExpectedMatch; 2028 bool HasPatternError = false; 2029 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2030 ? FileCheckDiag::MatchNoneButExpected 2031 : FileCheckDiag::MatchNoneAndExcluded; 2032 SmallVector<std::string, 4> ErrorMsgs; 2033 handleAllErrors( 2034 std::move(MatchError), 2035 [&](const ErrorDiagnostic &E) { 2036 HasError = HasPatternError = true; 2037 MatchTy = FileCheckDiag::MatchNoneForInvalidPattern; 2038 E.log(errs()); 2039 if (Diags) 2040 ErrorMsgs.push_back(E.getMessage().str()); 2041 }, 2042 // NotFoundError is why printNoMatch was invoked. 2043 [](const NotFoundError &E) {}); 2044 2045 // Suppress some verbosity if there's no error. 2046 bool PrintDiag = true; 2047 if (!HasError) { 2048 if (!VerboseVerbose) 2049 return ErrorReported::reportedOrSuccess(HasError); 2050 // Due to their verbosity, we don't print verbose diagnostics here if we're 2051 // gathering them for Diags to be rendered elsewhere, but we always print 2052 // other diagnostics. 2053 PrintDiag = !Diags; 2054 } 2055 2056 // Add "not found" diagnostic, substitutions, and pattern errors to Diags. 2057 // 2058 // We handle Diags a little differently than the errors we print directly: 2059 // we add the "not found" diagnostic to Diags even if there are pattern 2060 // errors. The reason is that we need to attach pattern errors as notes 2061 // somewhere in the input, and the input search range from the "not found" 2062 // diagnostic is all we have to anchor them. 2063 SMRange SearchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2064 Buffer, 0, Buffer.size(), Diags); 2065 if (Diags) { 2066 SMRange NoteRange = SMRange(SearchRange.Start, SearchRange.Start); 2067 for (StringRef ErrorMsg : ErrorMsgs) 2068 Diags->emplace_back(SM, Pat.getCheckTy(), Loc, MatchTy, NoteRange, 2069 ErrorMsg); 2070 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, Diags); 2071 } 2072 if (!PrintDiag) { 2073 assert(!HasError && "expected to report more diagnostics for error"); 2074 return ErrorReported::reportedOrSuccess(HasError); 2075 } 2076 2077 // Print "not found" diagnostic, except that's implied if we already printed a 2078 // pattern error. 2079 if (!HasPatternError) { 2080 std::string Message = formatv("{0}: {1} string not found in input", 2081 Pat.getCheckTy().getDescription(Prefix), 2082 (ExpectedMatch ? "expected" : "excluded")) 2083 .str(); 2084 if (Pat.getCount() > 1) 2085 Message += 2086 formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2087 SM.PrintMessage(Loc, 2088 ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, 2089 Message); 2090 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, 2091 "scanning from here"); 2092 } 2093 2094 // Print additional information, which can be useful even after a pattern 2095 // error. 2096 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, nullptr); 2097 if (ExpectedMatch) 2098 Pat.printFuzzyMatch(SM, Buffer, Diags); 2099 return ErrorReported::reportedOrSuccess(HasError); 2100 } 2101 2102 /// Returns either (1) \c ErrorSuccess if there was no error, or (2) 2103 /// \c ErrorReported if an error was reported. 2104 static Error reportMatchResult(bool ExpectedMatch, const SourceMgr &SM, 2105 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2106 int MatchedCount, StringRef Buffer, 2107 Pattern::MatchResult MatchResult, 2108 const FileCheckRequest &Req, 2109 std::vector<FileCheckDiag> *Diags) { 2110 if (MatchResult.TheMatch) 2111 return printMatch(ExpectedMatch, SM, Prefix, Loc, Pat, MatchedCount, Buffer, 2112 std::move(MatchResult), Req, Diags); 2113 return printNoMatch(ExpectedMatch, SM, Prefix, Loc, Pat, MatchedCount, Buffer, 2114 std::move(MatchResult.TheError), Req.VerboseVerbose, 2115 Diags); 2116 } 2117 2118 /// Counts the number of newlines in the specified range. 2119 static unsigned CountNumNewlinesBetween(StringRef Range, 2120 const char *&FirstNewLine) { 2121 unsigned NumNewLines = 0; 2122 while (true) { 2123 // Scan for newline. 2124 Range = Range.substr(Range.find_first_of("\n\r")); 2125 if (Range.empty()) 2126 return NumNewLines; 2127 2128 ++NumNewLines; 2129 2130 // Handle \n\r and \r\n as a single newline. 2131 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 2132 (Range[0] != Range[1])) 2133 Range = Range.substr(1); 2134 Range = Range.substr(1); 2135 2136 if (NumNewLines == 1) 2137 FirstNewLine = Range.begin(); 2138 } 2139 } 2140 2141 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, 2142 bool IsLabelScanMode, size_t &MatchLen, 2143 FileCheckRequest &Req, 2144 std::vector<FileCheckDiag> *Diags) const { 2145 size_t LastPos = 0; 2146 std::vector<const Pattern *> NotStrings; 2147 2148 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 2149 // bounds; we have not processed variable definitions within the bounded block 2150 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 2151 // over the block again (including the last CHECK-LABEL) in normal mode. 2152 if (!IsLabelScanMode) { 2153 // Match "dag strings" (with mixed "not strings" if any). 2154 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); 2155 if (LastPos == StringRef::npos) 2156 return StringRef::npos; 2157 } 2158 2159 // Match itself from the last position after matching CHECK-DAG. 2160 size_t LastMatchEnd = LastPos; 2161 size_t FirstMatchPos = 0; 2162 // Go match the pattern Count times. Majority of patterns only match with 2163 // count 1 though. 2164 assert(Pat.getCount() != 0 && "pattern count can not be zero"); 2165 for (int i = 1; i <= Pat.getCount(); i++) { 2166 StringRef MatchBuffer = Buffer.substr(LastMatchEnd); 2167 // get a match at current start point 2168 Pattern::MatchResult MatchResult = Pat.match(MatchBuffer, SM); 2169 2170 // report 2171 if (Error Err = reportMatchResult(/*ExpectedMatch=*/true, SM, Prefix, Loc, 2172 Pat, i, MatchBuffer, 2173 std::move(MatchResult), Req, Diags)) { 2174 cantFail(handleErrors(std::move(Err), [&](const ErrorReported &E) {})); 2175 return StringRef::npos; 2176 } 2177 2178 size_t MatchPos = MatchResult.TheMatch->Pos; 2179 if (i == 1) 2180 FirstMatchPos = LastPos + MatchPos; 2181 2182 // move start point after the match 2183 LastMatchEnd += MatchPos + MatchResult.TheMatch->Len; 2184 } 2185 // Full match len counts from first match pos. 2186 MatchLen = LastMatchEnd - FirstMatchPos; 2187 2188 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 2189 // or CHECK-NOT 2190 if (!IsLabelScanMode) { 2191 size_t MatchPos = FirstMatchPos - LastPos; 2192 StringRef MatchBuffer = Buffer.substr(LastPos); 2193 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 2194 2195 // If this check is a "CHECK-NEXT", verify that the previous match was on 2196 // the previous line (i.e. that there is one newline between them). 2197 if (CheckNext(SM, SkippedRegion)) { 2198 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2199 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2200 Diags, Req.Verbose); 2201 return StringRef::npos; 2202 } 2203 2204 // If this check is a "CHECK-SAME", verify that the previous match was on 2205 // the same line (i.e. that there is no newline between them). 2206 if (CheckSame(SM, SkippedRegion)) { 2207 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2208 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2209 Diags, Req.Verbose); 2210 return StringRef::npos; 2211 } 2212 2213 // If this match had "not strings", verify that they don't exist in the 2214 // skipped region. 2215 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2216 return StringRef::npos; 2217 } 2218 2219 return FirstMatchPos; 2220 } 2221 2222 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 2223 if (Pat.getCheckTy() != Check::CheckNext && 2224 Pat.getCheckTy() != Check::CheckEmpty) 2225 return false; 2226 2227 Twine CheckName = 2228 Prefix + 2229 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 2230 2231 // Count the number of newlines between the previous match and this one. 2232 const char *FirstNewLine = nullptr; 2233 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2234 2235 if (NumNewLines == 0) { 2236 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2237 CheckName + ": is on the same line as previous match"); 2238 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2239 "'next' match was here"); 2240 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2241 "previous match ended here"); 2242 return true; 2243 } 2244 2245 if (NumNewLines != 1) { 2246 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2247 CheckName + 2248 ": is not on the line after the previous match"); 2249 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2250 "'next' match was here"); 2251 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2252 "previous match ended here"); 2253 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 2254 "non-matching line after previous match is here"); 2255 return true; 2256 } 2257 2258 return false; 2259 } 2260 2261 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 2262 if (Pat.getCheckTy() != Check::CheckSame) 2263 return false; 2264 2265 // Count the number of newlines between the previous match and this one. 2266 const char *FirstNewLine = nullptr; 2267 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2268 2269 if (NumNewLines != 0) { 2270 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2271 Prefix + 2272 "-SAME: is not on the same line as the previous match"); 2273 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2274 "'next' match was here"); 2275 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2276 "previous match ended here"); 2277 return true; 2278 } 2279 2280 return false; 2281 } 2282 2283 bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 2284 const std::vector<const Pattern *> &NotStrings, 2285 const FileCheckRequest &Req, 2286 std::vector<FileCheckDiag> *Diags) const { 2287 bool DirectiveFail = false; 2288 for (const Pattern *Pat : NotStrings) { 2289 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 2290 Pattern::MatchResult MatchResult = Pat->match(Buffer, SM); 2291 if (Error Err = reportMatchResult(/*ExpectedMatch=*/false, SM, Prefix, 2292 Pat->getLoc(), *Pat, 1, Buffer, 2293 std::move(MatchResult), Req, Diags)) { 2294 cantFail(handleErrors(std::move(Err), [&](const ErrorReported &E) {})); 2295 DirectiveFail = true; 2296 continue; 2297 } 2298 } 2299 return DirectiveFail; 2300 } 2301 2302 size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 2303 std::vector<const Pattern *> &NotStrings, 2304 const FileCheckRequest &Req, 2305 std::vector<FileCheckDiag> *Diags) const { 2306 if (DagNotStrings.empty()) 2307 return 0; 2308 2309 // The start of the search range. 2310 size_t StartPos = 0; 2311 2312 struct MatchRange { 2313 size_t Pos; 2314 size_t End; 2315 }; 2316 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match 2317 // ranges are erased from this list once they are no longer in the search 2318 // range. 2319 std::list<MatchRange> MatchRanges; 2320 2321 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG 2322 // group, so we don't use a range-based for loop here. 2323 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); 2324 PatItr != PatEnd; ++PatItr) { 2325 const Pattern &Pat = *PatItr; 2326 assert((Pat.getCheckTy() == Check::CheckDAG || 2327 Pat.getCheckTy() == Check::CheckNot) && 2328 "Invalid CHECK-DAG or CHECK-NOT!"); 2329 2330 if (Pat.getCheckTy() == Check::CheckNot) { 2331 NotStrings.push_back(&Pat); 2332 continue; 2333 } 2334 2335 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 2336 2337 // CHECK-DAG always matches from the start. 2338 size_t MatchLen = 0, MatchPos = StartPos; 2339 2340 // Search for a match that doesn't overlap a previous match in this 2341 // CHECK-DAG group. 2342 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { 2343 StringRef MatchBuffer = Buffer.substr(MatchPos); 2344 Pattern::MatchResult MatchResult = Pat.match(MatchBuffer, SM); 2345 // With a group of CHECK-DAGs, a single mismatching means the match on 2346 // that group of CHECK-DAGs fails immediately. 2347 if (MatchResult.TheError || Req.VerboseVerbose) { 2348 if (Error Err = reportMatchResult(/*ExpectedMatch=*/true, SM, Prefix, 2349 Pat.getLoc(), Pat, 1, MatchBuffer, 2350 std::move(MatchResult), Req, Diags)) { 2351 cantFail( 2352 handleErrors(std::move(Err), [&](const ErrorReported &E) {})); 2353 return StringRef::npos; 2354 } 2355 } 2356 MatchLen = MatchResult.TheMatch->Len; 2357 // Re-calc it as the offset relative to the start of the original 2358 // string. 2359 MatchPos += MatchResult.TheMatch->Pos; 2360 MatchRange M{MatchPos, MatchPos + MatchLen}; 2361 if (Req.AllowDeprecatedDagOverlap) { 2362 // We don't need to track all matches in this mode, so we just maintain 2363 // one match range that encompasses the current CHECK-DAG group's 2364 // matches. 2365 if (MatchRanges.empty()) 2366 MatchRanges.insert(MatchRanges.end(), M); 2367 else { 2368 auto Block = MatchRanges.begin(); 2369 Block->Pos = std::min(Block->Pos, M.Pos); 2370 Block->End = std::max(Block->End, M.End); 2371 } 2372 break; 2373 } 2374 // Iterate previous matches until overlapping match or insertion point. 2375 bool Overlap = false; 2376 for (; MI != ME; ++MI) { 2377 if (M.Pos < MI->End) { 2378 // !Overlap => New match has no overlap and is before this old match. 2379 // Overlap => New match overlaps this old match. 2380 Overlap = MI->Pos < M.End; 2381 break; 2382 } 2383 } 2384 if (!Overlap) { 2385 // Insert non-overlapping match into list. 2386 MatchRanges.insert(MI, M); 2387 break; 2388 } 2389 if (Req.VerboseVerbose) { 2390 // Due to their verbosity, we don't print verbose diagnostics here if 2391 // we're gathering them for a different rendering, but we always print 2392 // other diagnostics. 2393 if (!Diags) { 2394 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); 2395 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); 2396 SMRange OldRange(OldStart, OldEnd); 2397 SM.PrintMessage(OldStart, SourceMgr::DK_Note, 2398 "match discarded, overlaps earlier DAG match here", 2399 {OldRange}); 2400 } else { 2401 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 2402 for (auto I = Diags->rbegin(), E = Diags->rend(); 2403 I != E && I->CheckLoc == CheckLoc; ++I) 2404 I->MatchTy = FileCheckDiag::MatchFoundButDiscarded; 2405 } 2406 } 2407 MatchPos = MI->End; 2408 } 2409 if (!Req.VerboseVerbose) 2410 cantFail(printMatch( 2411 /*ExpectedMatch=*/true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, 2412 Pattern::MatchResult(MatchPos, MatchLen, Error::success()), Req, 2413 Diags)); 2414 2415 // Handle the end of a CHECK-DAG group. 2416 if (std::next(PatItr) == PatEnd || 2417 std::next(PatItr)->getCheckTy() == Check::CheckNot) { 2418 if (!NotStrings.empty()) { 2419 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to 2420 // CHECK-DAG, verify that there are no 'not' strings occurred in that 2421 // region. 2422 StringRef SkippedRegion = 2423 Buffer.slice(StartPos, MatchRanges.begin()->Pos); 2424 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2425 return StringRef::npos; 2426 // Clear "not strings". 2427 NotStrings.clear(); 2428 } 2429 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the 2430 // end of this CHECK-DAG group's match range. 2431 StartPos = MatchRanges.rbegin()->End; 2432 // Don't waste time checking for (impossible) overlaps before that. 2433 MatchRanges.clear(); 2434 } 2435 } 2436 2437 return StartPos; 2438 } 2439 2440 static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes, 2441 ArrayRef<StringRef> SuppliedPrefixes) { 2442 for (StringRef Prefix : SuppliedPrefixes) { 2443 if (Prefix.empty()) { 2444 errs() << "error: supplied " << Kind << " prefix must not be the empty " 2445 << "string\n"; 2446 return false; 2447 } 2448 static const Regex Validator("^[a-zA-Z0-9_-]*$"); 2449 if (!Validator.match(Prefix)) { 2450 errs() << "error: supplied " << Kind << " prefix must start with a " 2451 << "letter and contain only alphanumeric characters, hyphens, and " 2452 << "underscores: '" << Prefix << "'\n"; 2453 return false; 2454 } 2455 if (!UniquePrefixes.insert(Prefix).second) { 2456 errs() << "error: supplied " << Kind << " prefix must be unique among " 2457 << "check and comment prefixes: '" << Prefix << "'\n"; 2458 return false; 2459 } 2460 } 2461 return true; 2462 } 2463 2464 static const char *DefaultCheckPrefixes[] = {"CHECK"}; 2465 static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; 2466 2467 bool FileCheck::ValidateCheckPrefixes() { 2468 StringSet<> UniquePrefixes; 2469 // Add default prefixes to catch user-supplied duplicates of them below. 2470 if (Req.CheckPrefixes.empty()) { 2471 for (const char *Prefix : DefaultCheckPrefixes) 2472 UniquePrefixes.insert(Prefix); 2473 } 2474 if (Req.CommentPrefixes.empty()) { 2475 for (const char *Prefix : DefaultCommentPrefixes) 2476 UniquePrefixes.insert(Prefix); 2477 } 2478 // Do not validate the default prefixes, or diagnostics about duplicates might 2479 // incorrectly indicate that they were supplied by the user. 2480 if (!ValidatePrefixes("check", UniquePrefixes, Req.CheckPrefixes)) 2481 return false; 2482 if (!ValidatePrefixes("comment", UniquePrefixes, Req.CommentPrefixes)) 2483 return false; 2484 return true; 2485 } 2486 2487 Regex FileCheck::buildCheckPrefixRegex() { 2488 if (Req.CheckPrefixes.empty()) { 2489 for (const char *Prefix : DefaultCheckPrefixes) 2490 Req.CheckPrefixes.push_back(Prefix); 2491 Req.IsDefaultCheckPrefix = true; 2492 } 2493 if (Req.CommentPrefixes.empty()) { 2494 for (const char *Prefix : DefaultCommentPrefixes) 2495 Req.CommentPrefixes.push_back(Prefix); 2496 } 2497 2498 // We already validated the contents of CheckPrefixes and CommentPrefixes so 2499 // just concatenate them as alternatives. 2500 SmallString<32> PrefixRegexStr; 2501 for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) { 2502 if (I != 0) 2503 PrefixRegexStr.push_back('|'); 2504 PrefixRegexStr.append(Req.CheckPrefixes[I]); 2505 } 2506 for (StringRef Prefix : Req.CommentPrefixes) { 2507 PrefixRegexStr.push_back('|'); 2508 PrefixRegexStr.append(Prefix); 2509 } 2510 2511 return Regex(PrefixRegexStr); 2512 } 2513 2514 Error FileCheckPatternContext::defineCmdlineVariables( 2515 ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) { 2516 assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && 2517 "Overriding defined variable with command-line variable definitions"); 2518 2519 if (CmdlineDefines.empty()) 2520 return Error::success(); 2521 2522 // Create a string representing the vector of command-line definitions. Each 2523 // definition is on its own line and prefixed with a definition number to 2524 // clarify which definition a given diagnostic corresponds to. 2525 unsigned I = 0; 2526 Error Errs = Error::success(); 2527 std::string CmdlineDefsDiag; 2528 SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices; 2529 for (StringRef CmdlineDef : CmdlineDefines) { 2530 std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); 2531 size_t EqIdx = CmdlineDef.find('='); 2532 if (EqIdx == StringRef::npos) { 2533 CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); 2534 continue; 2535 } 2536 // Numeric variable definition. 2537 if (CmdlineDef[0] == '#') { 2538 // Append a copy of the command-line definition adapted to use the same 2539 // format as in the input file to be able to reuse 2540 // parseNumericSubstitutionBlock. 2541 CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); 2542 std::string SubstitutionStr = std::string(CmdlineDef); 2543 SubstitutionStr[EqIdx] = ':'; 2544 CmdlineDefsIndices.push_back( 2545 std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); 2546 CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); 2547 } else { 2548 CmdlineDefsDiag += DefPrefix; 2549 CmdlineDefsIndices.push_back( 2550 std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); 2551 CmdlineDefsDiag += (CmdlineDef + "\n").str(); 2552 } 2553 } 2554 2555 // Create a buffer with fake command line content in order to display 2556 // parsing diagnostic with location information and point to the 2557 // global definition with invalid syntax. 2558 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer = 2559 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); 2560 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); 2561 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); 2562 2563 for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) { 2564 StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, 2565 CmdlineDefIndices.second); 2566 if (CmdlineDef.empty()) { 2567 Errs = joinErrors( 2568 std::move(Errs), 2569 ErrorDiagnostic::get(SM, CmdlineDef, 2570 "missing equal sign in global definition")); 2571 continue; 2572 } 2573 2574 // Numeric variable definition. 2575 if (CmdlineDef[0] == '#') { 2576 // Now parse the definition both to check that the syntax is correct and 2577 // to create the necessary class instance. 2578 StringRef CmdlineDefExpr = CmdlineDef.substr(1); 2579 std::optional<NumericVariable *> DefinedNumericVariable; 2580 Expected<std::unique_ptr<Expression>> ExpressionResult = 2581 Pattern::parseNumericSubstitutionBlock(CmdlineDefExpr, 2582 DefinedNumericVariable, false, 2583 std::nullopt, this, SM); 2584 if (!ExpressionResult) { 2585 Errs = joinErrors(std::move(Errs), ExpressionResult.takeError()); 2586 continue; 2587 } 2588 std::unique_ptr<Expression> Expression = std::move(*ExpressionResult); 2589 // Now evaluate the expression whose value this variable should be set 2590 // to, since the expression of a command-line variable definition should 2591 // only use variables defined earlier on the command-line. If not, this 2592 // is an error and we report it. 2593 Expected<ExpressionValue> Value = Expression->getAST()->eval(); 2594 if (!Value) { 2595 Errs = joinErrors(std::move(Errs), Value.takeError()); 2596 continue; 2597 } 2598 2599 assert(DefinedNumericVariable && "No variable defined"); 2600 (*DefinedNumericVariable)->setValue(*Value); 2601 2602 // Record this variable definition. 2603 GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = 2604 *DefinedNumericVariable; 2605 } else { 2606 // String variable definition. 2607 std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('='); 2608 StringRef CmdlineName = CmdlineNameVal.first; 2609 StringRef OrigCmdlineName = CmdlineName; 2610 Expected<Pattern::VariableProperties> ParseVarResult = 2611 Pattern::parseVariable(CmdlineName, SM); 2612 if (!ParseVarResult) { 2613 Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); 2614 continue; 2615 } 2616 // Check that CmdlineName does not denote a pseudo variable is only 2617 // composed of the parsed numeric variable. This catches cases like 2618 // "FOO+2" in a "FOO+2=10" definition. 2619 if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { 2620 Errs = joinErrors(std::move(Errs), 2621 ErrorDiagnostic::get( 2622 SM, OrigCmdlineName, 2623 "invalid name in string variable definition '" + 2624 OrigCmdlineName + "'")); 2625 continue; 2626 } 2627 StringRef Name = ParseVarResult->Name; 2628 2629 // Detect collisions between string and numeric variables when the former 2630 // is created later than the latter. 2631 if (GlobalNumericVariableTable.contains(Name)) { 2632 Errs = joinErrors(std::move(Errs), 2633 ErrorDiagnostic::get(SM, Name, 2634 "numeric variable with name '" + 2635 Name + "' already exists")); 2636 continue; 2637 } 2638 GlobalVariableTable.insert(CmdlineNameVal); 2639 // Mark the string variable as defined to detect collisions between 2640 // string and numeric variables in defineCmdlineVariables when the latter 2641 // is created later than the former. We cannot reuse GlobalVariableTable 2642 // for this by populating it with an empty string since we would then 2643 // lose the ability to detect the use of an undefined variable in 2644 // match(). 2645 DefinedVariableTable[Name] = true; 2646 } 2647 } 2648 2649 return Errs; 2650 } 2651 2652 void FileCheckPatternContext::clearLocalVars() { 2653 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars; 2654 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable) 2655 if (Var.first()[0] != '$') 2656 LocalPatternVars.push_back(Var.first()); 2657 2658 // Numeric substitution reads the value of a variable directly, not via 2659 // GlobalNumericVariableTable. Therefore, we clear local variables by 2660 // clearing their value which will lead to a numeric substitution failure. We 2661 // also mark the variable for removal from GlobalNumericVariableTable since 2662 // this is what defineCmdlineVariables checks to decide that no global 2663 // variable has been defined. 2664 for (const auto &Var : GlobalNumericVariableTable) 2665 if (Var.first()[0] != '$') { 2666 Var.getValue()->clearValue(); 2667 LocalNumericVars.push_back(Var.first()); 2668 } 2669 2670 for (const auto &Var : LocalPatternVars) 2671 GlobalVariableTable.erase(Var); 2672 for (const auto &Var : LocalNumericVars) 2673 GlobalNumericVariableTable.erase(Var); 2674 } 2675 2676 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, 2677 std::vector<FileCheckDiag> *Diags) { 2678 bool ChecksFailed = false; 2679 2680 unsigned i = 0, j = 0, e = CheckStrings->size(); 2681 while (true) { 2682 StringRef CheckRegion; 2683 if (j == e) { 2684 CheckRegion = Buffer; 2685 } else { 2686 const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; 2687 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 2688 ++j; 2689 continue; 2690 } 2691 2692 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 2693 size_t MatchLabelLen = 0; 2694 size_t MatchLabelPos = 2695 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); 2696 if (MatchLabelPos == StringRef::npos) 2697 // Immediately bail if CHECK-LABEL fails, nothing else we can do. 2698 return false; 2699 2700 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 2701 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 2702 ++j; 2703 } 2704 2705 // Do not clear the first region as it's the one before the first 2706 // CHECK-LABEL and it would clear variables defined on the command-line 2707 // before they get used. 2708 if (i != 0 && Req.EnableVarScope) 2709 PatternContext->clearLocalVars(); 2710 2711 for (; i != j; ++i) { 2712 const FileCheckString &CheckStr = (*CheckStrings)[i]; 2713 2714 // Check each string within the scanned region, including a second check 2715 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 2716 size_t MatchLen = 0; 2717 size_t MatchPos = 2718 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); 2719 2720 if (MatchPos == StringRef::npos) { 2721 ChecksFailed = true; 2722 i = j; 2723 break; 2724 } 2725 2726 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 2727 } 2728 2729 if (j == e) 2730 break; 2731 } 2732 2733 // Success if no checks failed. 2734 return !ChecksFailed; 2735 } 2736