1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // FileCheck does a line-by line check of a file that validates whether it 10 // contains the expected content. This is useful for regression tests etc. 11 // 12 // This file implements most of the API that will be used by the FileCheck utility 13 // as well as various unittests. 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/FileCheck/FileCheck.h" 17 #include "FileCheckImpl.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include <cstdint> 24 #include <list> 25 #include <set> 26 #include <tuple> 27 #include <utility> 28 29 using namespace llvm; 30 31 StringRef ExpressionFormat::toString() const { 32 switch (Value) { 33 case Kind::NoFormat: 34 return StringRef("<none>"); 35 case Kind::Unsigned: 36 return StringRef("%u"); 37 case Kind::Signed: 38 return StringRef("%d"); 39 case Kind::HexUpper: 40 return StringRef("%X"); 41 case Kind::HexLower: 42 return StringRef("%x"); 43 } 44 llvm_unreachable("unknown expression format"); 45 } 46 47 Expected<std::string> ExpressionFormat::getWildcardRegex() const { 48 StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef(); 49 50 auto CreatePrecisionRegex = [&](StringRef S) { 51 return (Twine(AlternateFormPrefix) + S + Twine('{') + Twine(Precision) + 52 "}") 53 .str(); 54 }; 55 56 switch (Value) { 57 case Kind::Unsigned: 58 if (Precision) 59 return CreatePrecisionRegex("([1-9][0-9]*)?[0-9]"); 60 return std::string("[0-9]+"); 61 case Kind::Signed: 62 if (Precision) 63 return CreatePrecisionRegex("-?([1-9][0-9]*)?[0-9]"); 64 return std::string("-?[0-9]+"); 65 case Kind::HexUpper: 66 if (Precision) 67 return CreatePrecisionRegex("([1-9A-F][0-9A-F]*)?[0-9A-F]"); 68 return (Twine(AlternateFormPrefix) + Twine("[0-9A-F]+")).str(); 69 case Kind::HexLower: 70 if (Precision) 71 return CreatePrecisionRegex("([1-9a-f][0-9a-f]*)?[0-9a-f]"); 72 return (Twine(AlternateFormPrefix) + Twine("[0-9a-f]+")).str(); 73 default: 74 return createStringError(std::errc::invalid_argument, 75 "trying to match value with invalid format"); 76 } 77 } 78 79 Expected<std::string> 80 ExpressionFormat::getMatchingString(APInt IntValue) const { 81 if (Value != Kind::Signed && IntValue.isNegative()) 82 return make_error<OverflowError>(); 83 84 unsigned Radix; 85 bool UpperCase = false; 86 SmallString<8> AbsoluteValueStr; 87 StringRef SignPrefix = IntValue.isNegative() ? "-" : ""; 88 switch (Value) { 89 case Kind::Unsigned: 90 case Kind::Signed: 91 Radix = 10; 92 break; 93 case Kind::HexUpper: 94 UpperCase = true; 95 Radix = 16; 96 break; 97 case Kind::HexLower: 98 Radix = 16; 99 UpperCase = false; 100 break; 101 default: 102 return createStringError(std::errc::invalid_argument, 103 "trying to match value with invalid format"); 104 } 105 IntValue.abs().toString(AbsoluteValueStr, Radix, /*Signed=*/false, 106 /*formatAsCLiteral=*/false, 107 /*UpperCase=*/UpperCase); 108 109 StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef(); 110 111 if (Precision > AbsoluteValueStr.size()) { 112 unsigned LeadingZeros = Precision - AbsoluteValueStr.size(); 113 return (Twine(SignPrefix) + Twine(AlternateFormPrefix) + 114 std::string(LeadingZeros, '0') + AbsoluteValueStr) 115 .str(); 116 } 117 118 return (Twine(SignPrefix) + Twine(AlternateFormPrefix) + AbsoluteValueStr) 119 .str(); 120 } 121 122 static unsigned nextAPIntBitWidth(unsigned BitWidth) { 123 return (BitWidth < APInt::APINT_BITS_PER_WORD) ? APInt::APINT_BITS_PER_WORD 124 : BitWidth * 2; 125 } 126 127 static APInt toSigned(APInt AbsVal, bool Negative) { 128 if (AbsVal.isSignBitSet()) 129 AbsVal = AbsVal.zext(nextAPIntBitWidth(AbsVal.getBitWidth())); 130 APInt Result = AbsVal; 131 if (Negative) 132 Result.negate(); 133 return Result; 134 } 135 136 APInt ExpressionFormat::valueFromStringRepr(StringRef StrVal, 137 const SourceMgr &SM) const { 138 bool ValueIsSigned = Value == Kind::Signed; 139 bool Negative = StrVal.consume_front("-"); 140 bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower; 141 bool MissingFormPrefix = 142 !ValueIsSigned && AlternateForm && !StrVal.consume_front("0x"); 143 (void)MissingFormPrefix; 144 assert(!MissingFormPrefix && "missing alternate form prefix"); 145 APInt ResultValue; 146 [[maybe_unused]] bool ParseFailure = 147 StrVal.getAsInteger(Hex ? 16 : 10, ResultValue); 148 // Both the FileCheck utility and library only call this method with a valid 149 // value in StrVal. This is guaranteed by the regex returned by 150 // getWildcardRegex() above. 151 assert(!ParseFailure && "unable to represent numeric value"); 152 return toSigned(ResultValue, Negative); 153 } 154 155 Expected<APInt> llvm::exprAdd(const APInt &LeftOperand, 156 const APInt &RightOperand, bool &Overflow) { 157 return LeftOperand.sadd_ov(RightOperand, Overflow); 158 } 159 160 Expected<APInt> llvm::exprSub(const APInt &LeftOperand, 161 const APInt &RightOperand, bool &Overflow) { 162 return LeftOperand.ssub_ov(RightOperand, Overflow); 163 } 164 165 Expected<APInt> llvm::exprMul(const APInt &LeftOperand, 166 const APInt &RightOperand, bool &Overflow) { 167 return LeftOperand.smul_ov(RightOperand, Overflow); 168 } 169 170 Expected<APInt> llvm::exprDiv(const APInt &LeftOperand, 171 const APInt &RightOperand, bool &Overflow) { 172 // Check for division by zero. 173 if (RightOperand.isZero()) 174 return make_error<OverflowError>(); 175 176 return LeftOperand.sdiv_ov(RightOperand, Overflow); 177 } 178 179 Expected<APInt> llvm::exprMax(const APInt &LeftOperand, 180 const APInt &RightOperand, bool &Overflow) { 181 Overflow = false; 182 return LeftOperand.slt(RightOperand) ? RightOperand : LeftOperand; 183 } 184 185 Expected<APInt> llvm::exprMin(const APInt &LeftOperand, 186 const APInt &RightOperand, bool &Overflow) { 187 Overflow = false; 188 if (cantFail(exprMax(LeftOperand, RightOperand, Overflow)) == LeftOperand) 189 return RightOperand; 190 191 return LeftOperand; 192 } 193 194 Expected<APInt> NumericVariableUse::eval() const { 195 std::optional<APInt> Value = Variable->getValue(); 196 if (Value) 197 return *Value; 198 199 return make_error<UndefVarError>(getExpressionStr()); 200 } 201 202 Expected<APInt> BinaryOperation::eval() const { 203 Expected<APInt> MaybeLeftOp = LeftOperand->eval(); 204 Expected<APInt> MaybeRightOp = RightOperand->eval(); 205 206 // Bubble up any error (e.g. undefined variables) in the recursive 207 // evaluation. 208 if (!MaybeLeftOp || !MaybeRightOp) { 209 Error Err = Error::success(); 210 if (!MaybeLeftOp) 211 Err = joinErrors(std::move(Err), MaybeLeftOp.takeError()); 212 if (!MaybeRightOp) 213 Err = joinErrors(std::move(Err), MaybeRightOp.takeError()); 214 return std::move(Err); 215 } 216 217 APInt LeftOp = *MaybeLeftOp; 218 APInt RightOp = *MaybeRightOp; 219 bool Overflow; 220 // Ensure both operands have the same bitwidth. 221 unsigned LeftBitWidth = LeftOp.getBitWidth(); 222 unsigned RightBitWidth = RightOp.getBitWidth(); 223 unsigned NewBitWidth = std::max(LeftBitWidth, RightBitWidth); 224 LeftOp = LeftOp.sext(NewBitWidth); 225 RightOp = RightOp.sext(NewBitWidth); 226 do { 227 Expected<APInt> MaybeResult = EvalBinop(LeftOp, RightOp, Overflow); 228 if (!MaybeResult) 229 return MaybeResult.takeError(); 230 231 if (!Overflow) 232 return MaybeResult; 233 234 NewBitWidth = nextAPIntBitWidth(NewBitWidth); 235 LeftOp = LeftOp.sext(NewBitWidth); 236 RightOp = RightOp.sext(NewBitWidth); 237 } while (true); 238 } 239 240 Expected<ExpressionFormat> 241 BinaryOperation::getImplicitFormat(const SourceMgr &SM) const { 242 Expected<ExpressionFormat> LeftFormat = LeftOperand->getImplicitFormat(SM); 243 Expected<ExpressionFormat> RightFormat = RightOperand->getImplicitFormat(SM); 244 if (!LeftFormat || !RightFormat) { 245 Error Err = Error::success(); 246 if (!LeftFormat) 247 Err = joinErrors(std::move(Err), LeftFormat.takeError()); 248 if (!RightFormat) 249 Err = joinErrors(std::move(Err), RightFormat.takeError()); 250 return std::move(Err); 251 } 252 253 if (*LeftFormat != ExpressionFormat::Kind::NoFormat && 254 *RightFormat != ExpressionFormat::Kind::NoFormat && 255 *LeftFormat != *RightFormat) 256 return ErrorDiagnostic::get( 257 SM, getExpressionStr(), 258 "implicit format conflict between '" + LeftOperand->getExpressionStr() + 259 "' (" + LeftFormat->toString() + ") and '" + 260 RightOperand->getExpressionStr() + "' (" + RightFormat->toString() + 261 "), need an explicit format specifier"); 262 263 return *LeftFormat != ExpressionFormat::Kind::NoFormat ? *LeftFormat 264 : *RightFormat; 265 } 266 267 Expected<std::string> NumericSubstitution::getResultRegex() const { 268 assert(ExpressionPointer->getAST() != nullptr && 269 "Substituting empty expression"); 270 Expected<APInt> EvaluatedValue = ExpressionPointer->getAST()->eval(); 271 if (!EvaluatedValue) 272 return EvaluatedValue.takeError(); 273 ExpressionFormat Format = ExpressionPointer->getFormat(); 274 return Format.getMatchingString(*EvaluatedValue); 275 } 276 277 Expected<std::string> NumericSubstitution::getResultForDiagnostics() const { 278 // The "regex" returned by getResultRegex() is just a numeric value 279 // like '42', '0x2A', '-17', 'DEADBEEF' etc. This is already suitable for use 280 // in diagnostics. 281 Expected<std::string> Literal = getResultRegex(); 282 if (!Literal) 283 return Literal; 284 285 return "\"" + std::move(*Literal) + "\""; 286 } 287 288 Expected<std::string> StringSubstitution::getResultRegex() const { 289 // Look up the value and escape it so that we can put it into the regex. 290 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr); 291 if (!VarVal) 292 return VarVal.takeError(); 293 return Regex::escape(*VarVal); 294 } 295 296 Expected<std::string> StringSubstitution::getResultForDiagnostics() const { 297 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr); 298 if (!VarVal) 299 return VarVal.takeError(); 300 301 std::string Result; 302 Result.reserve(VarVal->size() + 2); 303 raw_string_ostream OS(Result); 304 305 OS << '"'; 306 // Escape the string if it contains any characters that 307 // make it hard to read, such as non-printable characters (including all 308 // whitespace except space) and double quotes. These are the characters that 309 // are escaped by write_escaped(), except we do not include backslashes, 310 // because they are common in Windows paths and escaping them would make the 311 // output harder to read. However, when we do escape, backslashes are escaped 312 // as well, otherwise the output would be ambiguous. 313 const bool NeedsEscaping = 314 llvm::any_of(*VarVal, [](char C) { return !isPrint(C) || C == '"'; }); 315 if (NeedsEscaping) 316 OS.write_escaped(*VarVal); 317 else 318 OS << *VarVal; 319 OS << '"'; 320 if (NeedsEscaping) 321 OS << " (escaped value)"; 322 323 return Result; 324 } 325 326 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); } 327 328 Expected<Pattern::VariableProperties> 329 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) { 330 if (Str.empty()) 331 return ErrorDiagnostic::get(SM, Str, "empty variable name"); 332 333 size_t I = 0; 334 bool IsPseudo = Str[0] == '@'; 335 336 // Global vars start with '$'. 337 if (Str[0] == '$' || IsPseudo) 338 ++I; 339 340 if (I == Str.size()) 341 return ErrorDiagnostic::get(SM, Str.substr(I), 342 StringRef("empty ") + 343 (IsPseudo ? "pseudo " : "global ") + 344 "variable name"); 345 346 if (!isValidVarNameStart(Str[I++])) 347 return ErrorDiagnostic::get(SM, Str, "invalid variable name"); 348 349 for (size_t E = Str.size(); I != E; ++I) 350 // Variable names are composed of alphanumeric characters and underscores. 351 if (Str[I] != '_' && !isAlnum(Str[I])) 352 break; 353 354 StringRef Name = Str.take_front(I); 355 Str = Str.substr(I); 356 return VariableProperties {Name, IsPseudo}; 357 } 358 359 // StringRef holding all characters considered as horizontal whitespaces by 360 // FileCheck input canonicalization. 361 constexpr StringLiteral SpaceChars = " \t"; 362 363 // Parsing helper function that strips the first character in S and returns it. 364 static char popFront(StringRef &S) { 365 char C = S.front(); 366 S = S.drop_front(); 367 return C; 368 } 369 370 char OverflowError::ID = 0; 371 char UndefVarError::ID = 0; 372 char ErrorDiagnostic::ID = 0; 373 char NotFoundError::ID = 0; 374 char ErrorReported::ID = 0; 375 376 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition( 377 StringRef &Expr, FileCheckPatternContext *Context, 378 std::optional<size_t> LineNumber, ExpressionFormat ImplicitFormat, 379 const SourceMgr &SM) { 380 Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM); 381 if (!ParseVarResult) 382 return ParseVarResult.takeError(); 383 StringRef Name = ParseVarResult->Name; 384 385 if (ParseVarResult->IsPseudo) 386 return ErrorDiagnostic::get( 387 SM, Name, "definition of pseudo numeric variable unsupported"); 388 389 // Detect collisions between string and numeric variables when the latter 390 // is created later than the former. 391 if (Context->DefinedVariableTable.contains(Name)) 392 return ErrorDiagnostic::get( 393 SM, Name, "string variable with name '" + Name + "' already exists"); 394 395 Expr = Expr.ltrim(SpaceChars); 396 if (!Expr.empty()) 397 return ErrorDiagnostic::get( 398 SM, Expr, "unexpected characters after numeric variable name"); 399 400 NumericVariable *DefinedNumericVariable; 401 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 402 if (VarTableIter != Context->GlobalNumericVariableTable.end()) { 403 DefinedNumericVariable = VarTableIter->second; 404 if (DefinedNumericVariable->getImplicitFormat() != ImplicitFormat) 405 return ErrorDiagnostic::get( 406 SM, Expr, "format different from previous variable definition"); 407 } else 408 DefinedNumericVariable = 409 Context->makeNumericVariable(Name, ImplicitFormat, LineNumber); 410 411 return DefinedNumericVariable; 412 } 413 414 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse( 415 StringRef Name, bool IsPseudo, std::optional<size_t> LineNumber, 416 FileCheckPatternContext *Context, const SourceMgr &SM) { 417 if (IsPseudo && Name != "@LINE") 418 return ErrorDiagnostic::get( 419 SM, Name, "invalid pseudo numeric variable '" + Name + "'"); 420 421 // Numeric variable definitions and uses are parsed in the order in which 422 // they appear in the CHECK patterns. For each definition, the pointer to the 423 // class instance of the corresponding numeric variable definition is stored 424 // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer 425 // we get below is null, it means no such variable was defined before. When 426 // that happens, we create a dummy variable so that parsing can continue. All 427 // uses of undefined variables, whether string or numeric, are then diagnosed 428 // in printNoMatch() after failing to match. 429 auto [VarTableIter, Inserted] = 430 Context->GlobalNumericVariableTable.try_emplace(Name); 431 if (Inserted) 432 VarTableIter->second = Context->makeNumericVariable( 433 Name, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 434 NumericVariable *NumericVariable = VarTableIter->second; 435 436 std::optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber(); 437 if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) 438 return ErrorDiagnostic::get( 439 SM, Name, 440 "numeric variable '" + Name + 441 "' defined earlier in the same CHECK directive"); 442 443 return std::make_unique<NumericVariableUse>(Name, NumericVariable); 444 } 445 446 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand( 447 StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint, 448 std::optional<size_t> LineNumber, FileCheckPatternContext *Context, 449 const SourceMgr &SM) { 450 if (Expr.starts_with("(")) { 451 if (AO != AllowedOperand::Any) 452 return ErrorDiagnostic::get( 453 SM, Expr, "parenthesized expression not permitted here"); 454 return parseParenExpr(Expr, LineNumber, Context, SM); 455 } 456 457 if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { 458 // Try to parse as a numeric variable use. 459 Expected<Pattern::VariableProperties> ParseVarResult = 460 parseVariable(Expr, SM); 461 if (ParseVarResult) { 462 // Try to parse a function call. 463 if (Expr.ltrim(SpaceChars).starts_with("(")) { 464 if (AO != AllowedOperand::Any) 465 return ErrorDiagnostic::get(SM, ParseVarResult->Name, 466 "unexpected function call"); 467 468 return parseCallExpr(Expr, ParseVarResult->Name, LineNumber, Context, 469 SM); 470 } 471 472 return parseNumericVariableUse(ParseVarResult->Name, 473 ParseVarResult->IsPseudo, LineNumber, 474 Context, SM); 475 } 476 477 if (AO == AllowedOperand::LineVar) 478 return ParseVarResult.takeError(); 479 // Ignore the error and retry parsing as a literal. 480 consumeError(ParseVarResult.takeError()); 481 } 482 483 // Otherwise, parse it as a literal. 484 APInt LiteralValue; 485 StringRef SaveExpr = Expr; 486 bool Negative = Expr.consume_front("-"); 487 if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0, 488 LiteralValue)) { 489 LiteralValue = toSigned(LiteralValue, Negative); 490 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 491 LiteralValue); 492 } 493 return ErrorDiagnostic::get( 494 SM, SaveExpr, 495 Twine("invalid ") + 496 (MaybeInvalidConstraint ? "matching constraint or " : "") + 497 "operand format"); 498 } 499 500 Expected<std::unique_ptr<ExpressionAST>> 501 Pattern::parseParenExpr(StringRef &Expr, std::optional<size_t> LineNumber, 502 FileCheckPatternContext *Context, const SourceMgr &SM) { 503 Expr = Expr.ltrim(SpaceChars); 504 assert(Expr.starts_with("(")); 505 506 // Parse right operand. 507 Expr.consume_front("("); 508 Expr = Expr.ltrim(SpaceChars); 509 if (Expr.empty()) 510 return ErrorDiagnostic::get(SM, Expr, "missing operand in expression"); 511 512 // Note: parseNumericOperand handles nested opening parentheses. 513 Expected<std::unique_ptr<ExpressionAST>> SubExprResult = parseNumericOperand( 514 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 515 Context, SM); 516 Expr = Expr.ltrim(SpaceChars); 517 while (SubExprResult && !Expr.empty() && !Expr.starts_with(")")) { 518 StringRef OrigExpr = Expr; 519 SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false, 520 LineNumber, Context, SM); 521 Expr = Expr.ltrim(SpaceChars); 522 } 523 if (!SubExprResult) 524 return SubExprResult; 525 526 if (!Expr.consume_front(")")) { 527 return ErrorDiagnostic::get(SM, Expr, 528 "missing ')' at end of nested expression"); 529 } 530 return SubExprResult; 531 } 532 533 Expected<std::unique_ptr<ExpressionAST>> 534 Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr, 535 std::unique_ptr<ExpressionAST> LeftOp, 536 bool IsLegacyLineExpr, std::optional<size_t> LineNumber, 537 FileCheckPatternContext *Context, const SourceMgr &SM) { 538 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 539 if (RemainingExpr.empty()) 540 return std::move(LeftOp); 541 542 // Check if this is a supported operation and select a function to perform 543 // it. 544 SMLoc OpLoc = SMLoc::getFromPointer(RemainingExpr.data()); 545 char Operator = popFront(RemainingExpr); 546 binop_eval_t EvalBinop; 547 switch (Operator) { 548 case '+': 549 EvalBinop = exprAdd; 550 break; 551 case '-': 552 EvalBinop = exprSub; 553 break; 554 default: 555 return ErrorDiagnostic::get( 556 SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); 557 } 558 559 // Parse right operand. 560 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 561 if (RemainingExpr.empty()) 562 return ErrorDiagnostic::get(SM, RemainingExpr, 563 "missing operand in expression"); 564 // The second operand in a legacy @LINE expression is always a literal. 565 AllowedOperand AO = 566 IsLegacyLineExpr ? AllowedOperand::LegacyLiteral : AllowedOperand::Any; 567 Expected<std::unique_ptr<ExpressionAST>> RightOpResult = 568 parseNumericOperand(RemainingExpr, AO, /*MaybeInvalidConstraint=*/false, 569 LineNumber, Context, SM); 570 if (!RightOpResult) 571 return RightOpResult; 572 573 Expr = Expr.drop_back(RemainingExpr.size()); 574 return std::make_unique<BinaryOperation>(Expr, EvalBinop, std::move(LeftOp), 575 std::move(*RightOpResult)); 576 } 577 578 Expected<std::unique_ptr<ExpressionAST>> 579 Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName, 580 std::optional<size_t> LineNumber, 581 FileCheckPatternContext *Context, const SourceMgr &SM) { 582 Expr = Expr.ltrim(SpaceChars); 583 assert(Expr.starts_with("(")); 584 585 auto OptFunc = StringSwitch<binop_eval_t>(FuncName) 586 .Case("add", exprAdd) 587 .Case("div", exprDiv) 588 .Case("max", exprMax) 589 .Case("min", exprMin) 590 .Case("mul", exprMul) 591 .Case("sub", exprSub) 592 .Default(nullptr); 593 594 if (!OptFunc) 595 return ErrorDiagnostic::get( 596 SM, FuncName, Twine("call to undefined function '") + FuncName + "'"); 597 598 Expr.consume_front("("); 599 Expr = Expr.ltrim(SpaceChars); 600 601 // Parse call arguments, which are comma separated. 602 SmallVector<std::unique_ptr<ExpressionAST>, 4> Args; 603 while (!Expr.empty() && !Expr.starts_with(")")) { 604 if (Expr.starts_with(",")) 605 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 606 607 // Parse the argument, which is an arbitary expression. 608 StringRef OuterBinOpExpr = Expr; 609 Expected<std::unique_ptr<ExpressionAST>> Arg = parseNumericOperand( 610 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 611 Context, SM); 612 while (Arg && !Expr.empty()) { 613 Expr = Expr.ltrim(SpaceChars); 614 // Have we reached an argument terminator? 615 if (Expr.starts_with(",") || Expr.starts_with(")")) 616 break; 617 618 // Arg = Arg <op> <expr> 619 Arg = parseBinop(OuterBinOpExpr, Expr, std::move(*Arg), false, LineNumber, 620 Context, SM); 621 } 622 623 // Prefer an expression error over a generic invalid argument message. 624 if (!Arg) 625 return Arg.takeError(); 626 Args.push_back(std::move(*Arg)); 627 628 // Have we parsed all available arguments? 629 Expr = Expr.ltrim(SpaceChars); 630 if (!Expr.consume_front(",")) 631 break; 632 633 Expr = Expr.ltrim(SpaceChars); 634 if (Expr.starts_with(")")) 635 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 636 } 637 638 if (!Expr.consume_front(")")) 639 return ErrorDiagnostic::get(SM, Expr, 640 "missing ')' at end of call expression"); 641 642 const unsigned NumArgs = Args.size(); 643 if (NumArgs == 2) 644 return std::make_unique<BinaryOperation>(Expr, *OptFunc, std::move(Args[0]), 645 std::move(Args[1])); 646 647 // TODO: Support more than binop_eval_t. 648 return ErrorDiagnostic::get(SM, FuncName, 649 Twine("function '") + FuncName + 650 Twine("' takes 2 arguments but ") + 651 Twine(NumArgs) + " given"); 652 } 653 654 Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock( 655 StringRef Expr, std::optional<NumericVariable *> &DefinedNumericVariable, 656 bool IsLegacyLineExpr, std::optional<size_t> LineNumber, 657 FileCheckPatternContext *Context, const SourceMgr &SM) { 658 std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr; 659 StringRef DefExpr = StringRef(); 660 DefinedNumericVariable = std::nullopt; 661 ExpressionFormat ExplicitFormat = ExpressionFormat(); 662 unsigned Precision = 0; 663 664 // Parse format specifier (NOTE: ',' is also an argument separator). 665 size_t FormatSpecEnd = Expr.find(','); 666 size_t FunctionStart = Expr.find('('); 667 if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) { 668 StringRef FormatExpr = Expr.take_front(FormatSpecEnd); 669 Expr = Expr.drop_front(FormatSpecEnd + 1); 670 FormatExpr = FormatExpr.trim(SpaceChars); 671 if (!FormatExpr.consume_front("%")) 672 return ErrorDiagnostic::get( 673 SM, FormatExpr, 674 "invalid matching format specification in expression"); 675 676 // Parse alternate form flag. 677 SMLoc AlternateFormFlagLoc = SMLoc::getFromPointer(FormatExpr.data()); 678 bool AlternateForm = FormatExpr.consume_front("#"); 679 680 // Parse precision. 681 if (FormatExpr.consume_front(".")) { 682 if (FormatExpr.consumeInteger(10, Precision)) 683 return ErrorDiagnostic::get(SM, FormatExpr, 684 "invalid precision in format specifier"); 685 } 686 687 if (!FormatExpr.empty()) { 688 // Check for unknown matching format specifier and set matching format in 689 // class instance representing this expression. 690 SMLoc FmtLoc = SMLoc::getFromPointer(FormatExpr.data()); 691 switch (popFront(FormatExpr)) { 692 case 'u': 693 ExplicitFormat = 694 ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 695 break; 696 case 'd': 697 ExplicitFormat = 698 ExpressionFormat(ExpressionFormat::Kind::Signed, Precision); 699 break; 700 case 'x': 701 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexLower, 702 Precision, AlternateForm); 703 break; 704 case 'X': 705 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexUpper, 706 Precision, AlternateForm); 707 break; 708 default: 709 return ErrorDiagnostic::get(SM, FmtLoc, 710 "invalid format specifier in expression"); 711 } 712 } 713 714 if (AlternateForm && ExplicitFormat != ExpressionFormat::Kind::HexLower && 715 ExplicitFormat != ExpressionFormat::Kind::HexUpper) 716 return ErrorDiagnostic::get( 717 SM, AlternateFormFlagLoc, 718 "alternate form only supported for hex values"); 719 720 FormatExpr = FormatExpr.ltrim(SpaceChars); 721 if (!FormatExpr.empty()) 722 return ErrorDiagnostic::get( 723 SM, FormatExpr, 724 "invalid matching format specification in expression"); 725 } 726 727 // Save variable definition expression if any. 728 size_t DefEnd = Expr.find(':'); 729 if (DefEnd != StringRef::npos) { 730 DefExpr = Expr.substr(0, DefEnd); 731 Expr = Expr.substr(DefEnd + 1); 732 } 733 734 // Parse matching constraint. 735 Expr = Expr.ltrim(SpaceChars); 736 bool HasParsedValidConstraint = Expr.consume_front("=="); 737 738 // Parse the expression itself. 739 Expr = Expr.ltrim(SpaceChars); 740 if (Expr.empty()) { 741 if (HasParsedValidConstraint) 742 return ErrorDiagnostic::get( 743 SM, Expr, "empty numeric expression should not have a constraint"); 744 } else { 745 Expr = Expr.rtrim(SpaceChars); 746 StringRef OuterBinOpExpr = Expr; 747 // The first operand in a legacy @LINE expression is always the @LINE 748 // pseudo variable. 749 AllowedOperand AO = 750 IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; 751 Expected<std::unique_ptr<ExpressionAST>> ParseResult = parseNumericOperand( 752 Expr, AO, !HasParsedValidConstraint, LineNumber, Context, SM); 753 while (ParseResult && !Expr.empty()) { 754 ParseResult = parseBinop(OuterBinOpExpr, Expr, std::move(*ParseResult), 755 IsLegacyLineExpr, LineNumber, Context, SM); 756 // Legacy @LINE expressions only allow 2 operands. 757 if (ParseResult && IsLegacyLineExpr && !Expr.empty()) 758 return ErrorDiagnostic::get( 759 SM, Expr, 760 "unexpected characters at end of expression '" + Expr + "'"); 761 } 762 if (!ParseResult) 763 return ParseResult.takeError(); 764 ExpressionASTPointer = std::move(*ParseResult); 765 } 766 767 // Select format of the expression, i.e. (i) its explicit format, if any, 768 // otherwise (ii) its implicit format, if any, otherwise (iii) the default 769 // format (unsigned). Error out in case of conflicting implicit format 770 // without explicit format. 771 ExpressionFormat Format; 772 if (ExplicitFormat) 773 Format = ExplicitFormat; 774 else if (ExpressionASTPointer) { 775 Expected<ExpressionFormat> ImplicitFormat = 776 ExpressionASTPointer->getImplicitFormat(SM); 777 if (!ImplicitFormat) 778 return ImplicitFormat.takeError(); 779 Format = *ImplicitFormat; 780 } 781 if (!Format) 782 Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 783 784 std::unique_ptr<Expression> ExpressionPointer = 785 std::make_unique<Expression>(std::move(ExpressionASTPointer), Format); 786 787 // Parse the numeric variable definition. 788 if (DefEnd != StringRef::npos) { 789 DefExpr = DefExpr.ltrim(SpaceChars); 790 Expected<NumericVariable *> ParseResult = parseNumericVariableDefinition( 791 DefExpr, Context, LineNumber, ExpressionPointer->getFormat(), SM); 792 793 if (!ParseResult) 794 return ParseResult.takeError(); 795 DefinedNumericVariable = *ParseResult; 796 } 797 798 return std::move(ExpressionPointer); 799 } 800 801 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix, 802 SourceMgr &SM, const FileCheckRequest &Req) { 803 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; 804 IgnoreCase = Req.IgnoreCase; 805 806 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 807 808 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 809 // Ignore trailing whitespace. 810 PatternStr = PatternStr.rtrim(" \t"); 811 812 // Check that there is something on the line. 813 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 814 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 815 "found empty check string with prefix '" + Prefix + ":'"); 816 return true; 817 } 818 819 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 820 SM.PrintMessage( 821 PatternLoc, SourceMgr::DK_Error, 822 "found non-empty check string for empty check with prefix '" + Prefix + 823 ":'"); 824 return true; 825 } 826 827 if (CheckTy == Check::CheckEmpty) { 828 RegExStr = "(\n$)"; 829 return false; 830 } 831 832 // If literal check, set fixed string. 833 if (CheckTy.isLiteralMatch()) { 834 FixedStr = PatternStr; 835 return false; 836 } 837 838 // Check to see if this is a fixed string, or if it has regex pieces. 839 if (!MatchFullLinesHere && 840 (PatternStr.size() < 2 || 841 (!PatternStr.contains("{{") && !PatternStr.contains("[[")))) { 842 FixedStr = PatternStr; 843 return false; 844 } 845 846 if (MatchFullLinesHere) { 847 RegExStr += '^'; 848 if (!Req.NoCanonicalizeWhiteSpace) 849 RegExStr += " *"; 850 } 851 852 // Paren value #0 is for the fully matched string. Any new parenthesized 853 // values add from there. 854 unsigned CurParen = 1; 855 856 // Otherwise, there is at least one regex piece. Build up the regex pattern 857 // by escaping scary characters in fixed strings, building up one big regex. 858 while (!PatternStr.empty()) { 859 // RegEx matches. 860 if (PatternStr.starts_with("{{")) { 861 // This is the start of a regex match. Scan for the }}. 862 size_t End = PatternStr.find("}}"); 863 if (End == StringRef::npos) { 864 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 865 SourceMgr::DK_Error, 866 "found start of regex string with no end '}}'"); 867 return true; 868 } 869 870 // Enclose {{}} patterns in parens just like [[]] even though we're not 871 // capturing the result for any purpose. This is required in case the 872 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 873 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 874 bool HasAlternation = PatternStr.contains('|'); 875 if (HasAlternation) { 876 RegExStr += '('; 877 ++CurParen; 878 } 879 880 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 881 return true; 882 if (HasAlternation) 883 RegExStr += ')'; 884 885 PatternStr = PatternStr.substr(End + 2); 886 continue; 887 } 888 889 // String and numeric substitution blocks. Pattern substitution blocks come 890 // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some 891 // other regex) and assigns it to the string variable 'foo'. The latter 892 // substitutes foo's value. Numeric substitution blocks recognize the same 893 // form as string ones, but start with a '#' sign after the double 894 // brackets. They also accept a combined form which sets a numeric variable 895 // to the evaluation of an expression. Both string and numeric variable 896 // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be 897 // valid, as this helps catch some common errors. If there are extra '['s 898 // before the "[[", treat them literally. 899 if (PatternStr.starts_with("[[") && !PatternStr.starts_with("[[[")) { 900 StringRef UnparsedPatternStr = PatternStr.substr(2); 901 // Find the closing bracket pair ending the match. End is going to be an 902 // offset relative to the beginning of the match string. 903 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); 904 StringRef MatchStr = UnparsedPatternStr.substr(0, End); 905 bool IsNumBlock = MatchStr.consume_front("#"); 906 907 if (End == StringRef::npos) { 908 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 909 SourceMgr::DK_Error, 910 "Invalid substitution block, no ]] found"); 911 return true; 912 } 913 // Strip the substitution block we are parsing. End points to the start 914 // of the "]]" closing the expression so account for it in computing the 915 // index of the first unparsed character. 916 PatternStr = UnparsedPatternStr.substr(End + 2); 917 918 bool IsDefinition = false; 919 bool SubstNeeded = false; 920 // Whether the substitution block is a legacy use of @LINE with string 921 // substitution block syntax. 922 bool IsLegacyLineExpr = false; 923 StringRef DefName; 924 StringRef SubstStr; 925 StringRef MatchRegexp; 926 std::string WildcardRegexp; 927 size_t SubstInsertIdx = RegExStr.size(); 928 929 // Parse string variable or legacy @LINE expression. 930 if (!IsNumBlock) { 931 size_t VarEndIdx = MatchStr.find(':'); 932 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); 933 if (SpacePos != StringRef::npos) { 934 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), 935 SourceMgr::DK_Error, "unexpected whitespace"); 936 return true; 937 } 938 939 // Get the name (e.g. "foo") and verify it is well formed. 940 StringRef OrigMatchStr = MatchStr; 941 Expected<Pattern::VariableProperties> ParseVarResult = 942 parseVariable(MatchStr, SM); 943 if (!ParseVarResult) { 944 logAllUnhandledErrors(ParseVarResult.takeError(), errs()); 945 return true; 946 } 947 StringRef Name = ParseVarResult->Name; 948 bool IsPseudo = ParseVarResult->IsPseudo; 949 950 IsDefinition = (VarEndIdx != StringRef::npos); 951 SubstNeeded = !IsDefinition; 952 if (IsDefinition) { 953 if ((IsPseudo || !MatchStr.consume_front(":"))) { 954 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 955 SourceMgr::DK_Error, 956 "invalid name in string variable definition"); 957 return true; 958 } 959 960 // Detect collisions between string and numeric variables when the 961 // former is created later than the latter. 962 if (Context->GlobalNumericVariableTable.contains(Name)) { 963 SM.PrintMessage( 964 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 965 "numeric variable with name '" + Name + "' already exists"); 966 return true; 967 } 968 DefName = Name; 969 MatchRegexp = MatchStr; 970 } else { 971 if (IsPseudo) { 972 MatchStr = OrigMatchStr; 973 IsLegacyLineExpr = IsNumBlock = true; 974 } else { 975 if (!MatchStr.empty()) { 976 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 977 SourceMgr::DK_Error, 978 "invalid name in string variable use"); 979 return true; 980 } 981 SubstStr = Name; 982 } 983 } 984 } 985 986 // Parse numeric substitution block. 987 std::unique_ptr<Expression> ExpressionPointer; 988 std::optional<NumericVariable *> DefinedNumericVariable; 989 if (IsNumBlock) { 990 Expected<std::unique_ptr<Expression>> ParseResult = 991 parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, 992 IsLegacyLineExpr, LineNumber, Context, 993 SM); 994 if (!ParseResult) { 995 logAllUnhandledErrors(ParseResult.takeError(), errs()); 996 return true; 997 } 998 ExpressionPointer = std::move(*ParseResult); 999 SubstNeeded = ExpressionPointer->getAST() != nullptr; 1000 if (DefinedNumericVariable) { 1001 IsDefinition = true; 1002 DefName = (*DefinedNumericVariable)->getName(); 1003 } 1004 if (SubstNeeded) 1005 SubstStr = MatchStr; 1006 else { 1007 ExpressionFormat Format = ExpressionPointer->getFormat(); 1008 WildcardRegexp = cantFail(Format.getWildcardRegex()); 1009 MatchRegexp = WildcardRegexp; 1010 } 1011 } 1012 1013 // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]]. 1014 if (IsDefinition) { 1015 RegExStr += '('; 1016 ++SubstInsertIdx; 1017 1018 if (IsNumBlock) { 1019 NumericVariableMatch NumericVariableDefinition = { 1020 *DefinedNumericVariable, CurParen}; 1021 NumericVariableDefs[DefName] = NumericVariableDefinition; 1022 // This store is done here rather than in match() to allow 1023 // parseNumericVariableUse() to get the pointer to the class instance 1024 // of the right variable definition corresponding to a given numeric 1025 // variable use. 1026 Context->GlobalNumericVariableTable[DefName] = 1027 *DefinedNumericVariable; 1028 } else { 1029 VariableDefs[DefName] = CurParen; 1030 // Mark string variable as defined to detect collisions between 1031 // string and numeric variables in parseNumericVariableUse() and 1032 // defineCmdlineVariables() when the latter is created later than the 1033 // former. We cannot reuse GlobalVariableTable for this by populating 1034 // it with an empty string since we would then lose the ability to 1035 // detect the use of an undefined variable in match(). 1036 Context->DefinedVariableTable[DefName] = true; 1037 } 1038 1039 ++CurParen; 1040 } 1041 1042 if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) 1043 return true; 1044 1045 if (IsDefinition) 1046 RegExStr += ')'; 1047 1048 // Handle substitutions: [[foo]] and [[#<foo expr>]]. 1049 if (SubstNeeded) { 1050 // Handle substitution of string variables that were defined earlier on 1051 // the same line by emitting a backreference. Expressions do not 1052 // support substituting a numeric variable defined on the same line. 1053 decltype(VariableDefs)::iterator It; 1054 if (!IsNumBlock && 1055 (It = VariableDefs.find(SubstStr)) != VariableDefs.end()) { 1056 unsigned CaptureParenGroup = It->second; 1057 if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { 1058 SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), 1059 SourceMgr::DK_Error, 1060 "Can't back-reference more than 9 variables"); 1061 return true; 1062 } 1063 AddBackrefToRegEx(CaptureParenGroup); 1064 } else { 1065 // Handle substitution of string variables ([[<var>]]) defined in 1066 // previous CHECK patterns, and substitution of expressions. 1067 Substitution *Substitution = 1068 IsNumBlock 1069 ? Context->makeNumericSubstitution( 1070 SubstStr, std::move(ExpressionPointer), SubstInsertIdx) 1071 : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); 1072 Substitutions.push_back(Substitution); 1073 } 1074 } 1075 1076 continue; 1077 } 1078 1079 // Handle fixed string matches. 1080 // Find the end, which is the start of the next regex. 1081 size_t FixedMatchEnd = 1082 std::min(PatternStr.find("{{", 1), PatternStr.find("[[", 1)); 1083 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 1084 PatternStr = PatternStr.substr(FixedMatchEnd); 1085 } 1086 1087 if (MatchFullLinesHere) { 1088 if (!Req.NoCanonicalizeWhiteSpace) 1089 RegExStr += " *"; 1090 RegExStr += '$'; 1091 } 1092 1093 return false; 1094 } 1095 1096 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 1097 Regex R(RS); 1098 std::string Error; 1099 if (!R.isValid(Error)) { 1100 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 1101 "invalid regex: " + Error); 1102 return true; 1103 } 1104 1105 RegExStr += RS.str(); 1106 CurParen += R.getNumMatches(); 1107 return false; 1108 } 1109 1110 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 1111 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 1112 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 1113 RegExStr += Backref; 1114 } 1115 1116 Pattern::MatchResult Pattern::match(StringRef Buffer, 1117 const SourceMgr &SM) const { 1118 // If this is the EOF pattern, match it immediately. 1119 if (CheckTy == Check::CheckEOF) 1120 return MatchResult(Buffer.size(), 0, Error::success()); 1121 1122 // If this is a fixed string pattern, just match it now. 1123 if (!FixedStr.empty()) { 1124 size_t Pos = 1125 IgnoreCase ? Buffer.find_insensitive(FixedStr) : Buffer.find(FixedStr); 1126 if (Pos == StringRef::npos) 1127 return make_error<NotFoundError>(); 1128 return MatchResult(Pos, /*MatchLen=*/FixedStr.size(), Error::success()); 1129 } 1130 1131 // Regex match. 1132 1133 // If there are substitutions, we need to create a temporary string with the 1134 // actual value. 1135 StringRef RegExToMatch = RegExStr; 1136 std::string TmpStr; 1137 if (!Substitutions.empty()) { 1138 TmpStr = RegExStr; 1139 if (LineNumber) 1140 Context->LineVariable->setValue( 1141 APInt(sizeof(*LineNumber) * 8, *LineNumber)); 1142 1143 size_t InsertOffset = 0; 1144 // Substitute all string variables and expressions whose values are only 1145 // now known. Use of string variables defined on the same line are handled 1146 // by back-references. 1147 Error Errs = Error::success(); 1148 for (const auto &Substitution : Substitutions) { 1149 // Substitute and check for failure (e.g. use of undefined variable). 1150 Expected<std::string> Value = Substitution->getResultRegex(); 1151 if (!Value) { 1152 // Convert to an ErrorDiagnostic to get location information. This is 1153 // done here rather than printMatch/printNoMatch since now we know which 1154 // substitution block caused the overflow. 1155 Errs = joinErrors(std::move(Errs), 1156 handleErrors( 1157 Value.takeError(), 1158 [&](const OverflowError &E) { 1159 return ErrorDiagnostic::get( 1160 SM, Substitution->getFromString(), 1161 "unable to substitute variable or " 1162 "numeric expression: overflow error"); 1163 }, 1164 [&SM](const UndefVarError &E) { 1165 return ErrorDiagnostic::get(SM, E.getVarName(), 1166 E.message()); 1167 })); 1168 continue; 1169 } 1170 1171 // Plop it into the regex at the adjusted offset. 1172 TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, 1173 Value->begin(), Value->end()); 1174 InsertOffset += Value->size(); 1175 } 1176 if (Errs) 1177 return std::move(Errs); 1178 1179 // Match the newly constructed regex. 1180 RegExToMatch = TmpStr; 1181 } 1182 1183 SmallVector<StringRef, 4> MatchInfo; 1184 unsigned int Flags = Regex::Newline; 1185 if (IgnoreCase) 1186 Flags |= Regex::IgnoreCase; 1187 if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) 1188 return make_error<NotFoundError>(); 1189 1190 // Successful regex match. 1191 assert(!MatchInfo.empty() && "Didn't get any match"); 1192 StringRef FullMatch = MatchInfo[0]; 1193 1194 // If this defines any string variables, remember their values. 1195 for (const auto &VariableDef : VariableDefs) { 1196 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 1197 Context->GlobalVariableTable[VariableDef.first] = 1198 MatchInfo[VariableDef.second]; 1199 } 1200 1201 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after 1202 // the required preceding newline, which is consumed by the pattern in the 1203 // case of CHECK-EMPTY but not CHECK-NEXT. 1204 size_t MatchStartSkip = CheckTy == Check::CheckEmpty; 1205 Match TheMatch; 1206 TheMatch.Pos = FullMatch.data() - Buffer.data() + MatchStartSkip; 1207 TheMatch.Len = FullMatch.size() - MatchStartSkip; 1208 1209 // If this defines any numeric variables, remember their values. 1210 for (const auto &NumericVariableDef : NumericVariableDefs) { 1211 const NumericVariableMatch &NumericVariableMatch = 1212 NumericVariableDef.getValue(); 1213 unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; 1214 assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); 1215 NumericVariable *DefinedNumericVariable = 1216 NumericVariableMatch.DefinedNumericVariable; 1217 1218 StringRef MatchedValue = MatchInfo[CaptureParenGroup]; 1219 ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat(); 1220 APInt Value = Format.valueFromStringRepr(MatchedValue, SM); 1221 DefinedNumericVariable->setValue(Value, MatchedValue); 1222 } 1223 1224 return MatchResult(TheMatch, Error::success()); 1225 } 1226 1227 unsigned Pattern::computeMatchDistance(StringRef Buffer) const { 1228 // Just compute the number of matching characters. For regular expressions, we 1229 // just compare against the regex itself and hope for the best. 1230 // 1231 // FIXME: One easy improvement here is have the regex lib generate a single 1232 // example regular expression which matches, and use that as the example 1233 // string. 1234 StringRef ExampleString(FixedStr); 1235 if (ExampleString.empty()) 1236 ExampleString = RegExStr; 1237 1238 // Only compare up to the first line in the buffer, or the string size. 1239 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 1240 BufferPrefix = BufferPrefix.split('\n').first; 1241 return BufferPrefix.edit_distance(ExampleString); 1242 } 1243 1244 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, 1245 SMRange Range, 1246 FileCheckDiag::MatchType MatchTy, 1247 std::vector<FileCheckDiag> *Diags) const { 1248 // Print what we know about substitutions. 1249 if (!Substitutions.empty()) { 1250 for (const auto &Substitution : Substitutions) { 1251 SmallString<256> Msg; 1252 raw_svector_ostream OS(Msg); 1253 1254 Expected<std::string> MatchedValue = 1255 Substitution->getResultForDiagnostics(); 1256 // Substitution failures are handled in printNoMatch(). 1257 if (!MatchedValue) { 1258 consumeError(MatchedValue.takeError()); 1259 continue; 1260 } 1261 1262 OS << "with \""; 1263 OS.write_escaped(Substitution->getFromString()) << "\" equal to "; 1264 OS << *MatchedValue; 1265 1266 // We report only the start of the match/search range to suggest we are 1267 // reporting the substitutions as set at the start of the match/search. 1268 // Indicating a non-zero-length range might instead seem to imply that the 1269 // substitution matches or was captured from exactly that range. 1270 if (Diags) 1271 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, 1272 SMRange(Range.Start, Range.Start), OS.str()); 1273 else 1274 SM.PrintMessage(Range.Start, SourceMgr::DK_Note, OS.str()); 1275 } 1276 } 1277 } 1278 1279 void Pattern::printVariableDefs(const SourceMgr &SM, 1280 FileCheckDiag::MatchType MatchTy, 1281 std::vector<FileCheckDiag> *Diags) const { 1282 if (VariableDefs.empty() && NumericVariableDefs.empty()) 1283 return; 1284 // Build list of variable captures. 1285 struct VarCapture { 1286 StringRef Name; 1287 SMRange Range; 1288 }; 1289 SmallVector<VarCapture, 2> VarCaptures; 1290 for (const auto &VariableDef : VariableDefs) { 1291 VarCapture VC; 1292 VC.Name = VariableDef.first; 1293 StringRef Value = Context->GlobalVariableTable[VC.Name]; 1294 SMLoc Start = SMLoc::getFromPointer(Value.data()); 1295 SMLoc End = SMLoc::getFromPointer(Value.data() + Value.size()); 1296 VC.Range = SMRange(Start, End); 1297 VarCaptures.push_back(VC); 1298 } 1299 for (const auto &VariableDef : NumericVariableDefs) { 1300 VarCapture VC; 1301 VC.Name = VariableDef.getKey(); 1302 std::optional<StringRef> StrValue = 1303 VariableDef.getValue().DefinedNumericVariable->getStringValue(); 1304 if (!StrValue) 1305 continue; 1306 SMLoc Start = SMLoc::getFromPointer(StrValue->data()); 1307 SMLoc End = SMLoc::getFromPointer(StrValue->data() + StrValue->size()); 1308 VC.Range = SMRange(Start, End); 1309 VarCaptures.push_back(VC); 1310 } 1311 // Sort variable captures by the order in which they matched the input. 1312 // Ranges shouldn't be overlapping, so we can just compare the start. 1313 llvm::sort(VarCaptures, [](const VarCapture &A, const VarCapture &B) { 1314 if (&A == &B) 1315 return false; 1316 assert(A.Range.Start != B.Range.Start && 1317 "unexpected overlapping variable captures"); 1318 return A.Range.Start.getPointer() < B.Range.Start.getPointer(); 1319 }); 1320 // Create notes for the sorted captures. 1321 for (const VarCapture &VC : VarCaptures) { 1322 SmallString<256> Msg; 1323 raw_svector_ostream OS(Msg); 1324 OS << "captured var \"" << VC.Name << "\""; 1325 if (Diags) 1326 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, VC.Range, OS.str()); 1327 else 1328 SM.PrintMessage(VC.Range.Start, SourceMgr::DK_Note, OS.str(), VC.Range); 1329 } 1330 } 1331 1332 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, 1333 const SourceMgr &SM, SMLoc Loc, 1334 Check::FileCheckType CheckTy, 1335 StringRef Buffer, size_t Pos, size_t Len, 1336 std::vector<FileCheckDiag> *Diags, 1337 bool AdjustPrevDiags = false) { 1338 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); 1339 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); 1340 SMRange Range(Start, End); 1341 if (Diags) { 1342 if (AdjustPrevDiags) { 1343 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 1344 for (auto I = Diags->rbegin(), E = Diags->rend(); 1345 I != E && I->CheckLoc == CheckLoc; ++I) 1346 I->MatchTy = MatchTy; 1347 } else 1348 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); 1349 } 1350 return Range; 1351 } 1352 1353 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, 1354 std::vector<FileCheckDiag> *Diags) const { 1355 // Attempt to find the closest/best fuzzy match. Usually an error happens 1356 // because some string in the output didn't exactly match. In these cases, we 1357 // would like to show the user a best guess at what "should have" matched, to 1358 // save them having to actually check the input manually. 1359 size_t NumLinesForward = 0; 1360 size_t Best = StringRef::npos; 1361 double BestQuality = 0; 1362 1363 // Use an arbitrary 4k limit on how far we will search. 1364 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 1365 if (Buffer[i] == '\n') 1366 ++NumLinesForward; 1367 1368 // Patterns have leading whitespace stripped, so skip whitespace when 1369 // looking for something which looks like a pattern. 1370 if (Buffer[i] == ' ' || Buffer[i] == '\t') 1371 continue; 1372 1373 // Compute the "quality" of this match as an arbitrary combination of the 1374 // match distance and the number of lines skipped to get to this match. 1375 unsigned Distance = computeMatchDistance(Buffer.substr(i)); 1376 double Quality = Distance + (NumLinesForward / 100.); 1377 1378 if (Quality < BestQuality || Best == StringRef::npos) { 1379 Best = i; 1380 BestQuality = Quality; 1381 } 1382 } 1383 1384 // Print the "possible intended match here" line if we found something 1385 // reasonable and not equal to what we showed in the "scanning from here" 1386 // line. 1387 if (Best && Best != StringRef::npos && BestQuality < 50) { 1388 SMRange MatchRange = 1389 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), 1390 getCheckTy(), Buffer, Best, 0, Diags); 1391 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, 1392 "possible intended match here"); 1393 1394 // FIXME: If we wanted to be really friendly we would show why the match 1395 // failed, as it can be hard to spot simple one character differences. 1396 } 1397 } 1398 1399 Expected<StringRef> 1400 FileCheckPatternContext::getPatternVarValue(StringRef VarName) { 1401 auto VarIter = GlobalVariableTable.find(VarName); 1402 if (VarIter == GlobalVariableTable.end()) 1403 return make_error<UndefVarError>(VarName); 1404 1405 return VarIter->second; 1406 } 1407 1408 template <class... Types> 1409 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) { 1410 NumericVariables.push_back(std::make_unique<NumericVariable>(args...)); 1411 return NumericVariables.back().get(); 1412 } 1413 1414 Substitution * 1415 FileCheckPatternContext::makeStringSubstitution(StringRef VarName, 1416 size_t InsertIdx) { 1417 Substitutions.push_back( 1418 std::make_unique<StringSubstitution>(this, VarName, InsertIdx)); 1419 return Substitutions.back().get(); 1420 } 1421 1422 Substitution *FileCheckPatternContext::makeNumericSubstitution( 1423 StringRef ExpressionStr, std::unique_ptr<Expression> Expression, 1424 size_t InsertIdx) { 1425 Substitutions.push_back(std::make_unique<NumericSubstitution>( 1426 this, ExpressionStr, std::move(Expression), InsertIdx)); 1427 return Substitutions.back().get(); 1428 } 1429 1430 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 1431 // Offset keeps track of the current offset within the input Str 1432 size_t Offset = 0; 1433 // [...] Nesting depth 1434 size_t BracketDepth = 0; 1435 1436 while (!Str.empty()) { 1437 if (Str.starts_with("]]") && BracketDepth == 0) 1438 return Offset; 1439 if (Str[0] == '\\') { 1440 // Backslash escapes the next char within regexes, so skip them both. 1441 Str = Str.substr(2); 1442 Offset += 2; 1443 } else { 1444 switch (Str[0]) { 1445 default: 1446 break; 1447 case '[': 1448 BracketDepth++; 1449 break; 1450 case ']': 1451 if (BracketDepth == 0) { 1452 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 1453 SourceMgr::DK_Error, 1454 "missing closing \"]\" for regex variable"); 1455 exit(1); 1456 } 1457 BracketDepth--; 1458 break; 1459 } 1460 Str = Str.substr(1); 1461 Offset++; 1462 } 1463 } 1464 1465 return StringRef::npos; 1466 } 1467 1468 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, 1469 SmallVectorImpl<char> &OutputBuffer) { 1470 OutputBuffer.reserve(MB.getBufferSize()); 1471 1472 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 1473 Ptr != End; ++Ptr) { 1474 // Eliminate trailing dosish \r. 1475 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 1476 continue; 1477 } 1478 1479 // If current char is not a horizontal whitespace or if horizontal 1480 // whitespace canonicalization is disabled, dump it to output as is. 1481 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 1482 OutputBuffer.push_back(*Ptr); 1483 continue; 1484 } 1485 1486 // Otherwise, add one space and advance over neighboring space. 1487 OutputBuffer.push_back(' '); 1488 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 1489 ++Ptr; 1490 } 1491 1492 // Add a null byte and then return all but that byte. 1493 OutputBuffer.push_back('\0'); 1494 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 1495 } 1496 1497 FileCheckDiag::FileCheckDiag(const SourceMgr &SM, 1498 const Check::FileCheckType &CheckTy, 1499 SMLoc CheckLoc, MatchType MatchTy, 1500 SMRange InputRange, StringRef Note) 1501 : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy), Note(Note) { 1502 auto Start = SM.getLineAndColumn(InputRange.Start); 1503 auto End = SM.getLineAndColumn(InputRange.End); 1504 InputStartLine = Start.first; 1505 InputStartCol = Start.second; 1506 InputEndLine = End.first; 1507 InputEndCol = End.second; 1508 } 1509 1510 static bool IsPartOfWord(char c) { 1511 return (isAlnum(c) || c == '-' || c == '_'); 1512 } 1513 1514 Check::FileCheckType &Check::FileCheckType::setCount(int C) { 1515 assert(Count > 0 && "zero and negative counts are not supported"); 1516 assert((C == 1 || Kind == CheckPlain) && 1517 "count supported only for plain CHECK directives"); 1518 Count = C; 1519 return *this; 1520 } 1521 1522 std::string Check::FileCheckType::getModifiersDescription() const { 1523 if (Modifiers.none()) 1524 return ""; 1525 std::string Ret; 1526 raw_string_ostream OS(Ret); 1527 OS << '{'; 1528 if (isLiteralMatch()) 1529 OS << "LITERAL"; 1530 OS << '}'; 1531 return Ret; 1532 } 1533 1534 std::string Check::FileCheckType::getDescription(StringRef Prefix) const { 1535 // Append directive modifiers. 1536 auto WithModifiers = [this, Prefix](StringRef Str) -> std::string { 1537 return (Prefix + Str + getModifiersDescription()).str(); 1538 }; 1539 1540 switch (Kind) { 1541 case Check::CheckNone: 1542 return "invalid"; 1543 case Check::CheckMisspelled: 1544 return "misspelled"; 1545 case Check::CheckPlain: 1546 if (Count > 1) 1547 return WithModifiers("-COUNT"); 1548 return WithModifiers(""); 1549 case Check::CheckNext: 1550 return WithModifiers("-NEXT"); 1551 case Check::CheckSame: 1552 return WithModifiers("-SAME"); 1553 case Check::CheckNot: 1554 return WithModifiers("-NOT"); 1555 case Check::CheckDAG: 1556 return WithModifiers("-DAG"); 1557 case Check::CheckLabel: 1558 return WithModifiers("-LABEL"); 1559 case Check::CheckEmpty: 1560 return WithModifiers("-EMPTY"); 1561 case Check::CheckComment: 1562 return std::string(Prefix); 1563 case Check::CheckEOF: 1564 return "implicit EOF"; 1565 case Check::CheckBadNot: 1566 return "bad NOT"; 1567 case Check::CheckBadCount: 1568 return "bad COUNT"; 1569 } 1570 llvm_unreachable("unknown FileCheckType"); 1571 } 1572 1573 static std::pair<Check::FileCheckType, StringRef> 1574 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix, 1575 bool &Misspelled) { 1576 if (Buffer.size() <= Prefix.size()) 1577 return {Check::CheckNone, StringRef()}; 1578 1579 StringRef Rest = Buffer.drop_front(Prefix.size()); 1580 // Check for comment. 1581 if (llvm::is_contained(Req.CommentPrefixes, Prefix)) { 1582 if (Rest.consume_front(":")) 1583 return {Check::CheckComment, Rest}; 1584 // Ignore a comment prefix if it has a suffix like "-NOT". 1585 return {Check::CheckNone, StringRef()}; 1586 } 1587 1588 auto ConsumeModifiers = [&](Check::FileCheckType Ret) 1589 -> std::pair<Check::FileCheckType, StringRef> { 1590 if (Rest.consume_front(":")) 1591 return {Ret, Rest}; 1592 if (!Rest.consume_front("{")) 1593 return {Check::CheckNone, StringRef()}; 1594 1595 // Parse the modifiers, speparated by commas. 1596 do { 1597 // Allow whitespace in modifiers list. 1598 Rest = Rest.ltrim(); 1599 if (Rest.consume_front("LITERAL")) 1600 Ret.setLiteralMatch(); 1601 else 1602 return {Check::CheckNone, Rest}; 1603 // Allow whitespace in modifiers list. 1604 Rest = Rest.ltrim(); 1605 } while (Rest.consume_front(",")); 1606 if (!Rest.consume_front("}:")) 1607 return {Check::CheckNone, Rest}; 1608 return {Ret, Rest}; 1609 }; 1610 1611 // Verify that the prefix is followed by directive modifiers or a colon. 1612 if (Rest.consume_front(":")) 1613 return {Check::CheckPlain, Rest}; 1614 if (Rest.front() == '{') 1615 return ConsumeModifiers(Check::CheckPlain); 1616 1617 if (Rest.consume_front("_")) 1618 Misspelled = true; 1619 else if (!Rest.consume_front("-")) 1620 return {Check::CheckNone, StringRef()}; 1621 1622 if (Rest.consume_front("COUNT-")) { 1623 int64_t Count; 1624 if (Rest.consumeInteger(10, Count)) 1625 // Error happened in parsing integer. 1626 return {Check::CheckBadCount, Rest}; 1627 if (Count <= 0 || Count > INT32_MAX) 1628 return {Check::CheckBadCount, Rest}; 1629 if (Rest.front() != ':' && Rest.front() != '{') 1630 return {Check::CheckBadCount, Rest}; 1631 return ConsumeModifiers( 1632 Check::FileCheckType(Check::CheckPlain).setCount(Count)); 1633 } 1634 1635 // You can't combine -NOT with another suffix. 1636 if (Rest.starts_with("DAG-NOT:") || Rest.starts_with("NOT-DAG:") || 1637 Rest.starts_with("NEXT-NOT:") || Rest.starts_with("NOT-NEXT:") || 1638 Rest.starts_with("SAME-NOT:") || Rest.starts_with("NOT-SAME:") || 1639 Rest.starts_with("EMPTY-NOT:") || Rest.starts_with("NOT-EMPTY:")) 1640 return {Check::CheckBadNot, Rest}; 1641 1642 if (Rest.consume_front("NEXT")) 1643 return ConsumeModifiers(Check::CheckNext); 1644 1645 if (Rest.consume_front("SAME")) 1646 return ConsumeModifiers(Check::CheckSame); 1647 1648 if (Rest.consume_front("NOT")) 1649 return ConsumeModifiers(Check::CheckNot); 1650 1651 if (Rest.consume_front("DAG")) 1652 return ConsumeModifiers(Check::CheckDAG); 1653 1654 if (Rest.consume_front("LABEL")) 1655 return ConsumeModifiers(Check::CheckLabel); 1656 1657 if (Rest.consume_front("EMPTY")) 1658 return ConsumeModifiers(Check::CheckEmpty); 1659 1660 return {Check::CheckNone, Rest}; 1661 } 1662 1663 static std::pair<Check::FileCheckType, StringRef> 1664 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { 1665 bool Misspelled = false; 1666 auto Res = FindCheckType(Req, Buffer, Prefix, Misspelled); 1667 if (Res.first != Check::CheckNone && Misspelled) 1668 return {Check::CheckMisspelled, Res.second}; 1669 return Res; 1670 } 1671 1672 // From the given position, find the next character after the word. 1673 static size_t SkipWord(StringRef Str, size_t Loc) { 1674 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 1675 ++Loc; 1676 return Loc; 1677 } 1678 1679 static const char *DefaultCheckPrefixes[] = {"CHECK"}; 1680 static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; 1681 1682 static void addDefaultPrefixes(FileCheckRequest &Req) { 1683 if (Req.CheckPrefixes.empty()) { 1684 llvm::append_range(Req.CheckPrefixes, DefaultCheckPrefixes); 1685 Req.IsDefaultCheckPrefix = true; 1686 } 1687 if (Req.CommentPrefixes.empty()) 1688 llvm::append_range(Req.CommentPrefixes, DefaultCommentPrefixes); 1689 } 1690 1691 struct PrefixMatcher { 1692 /// Prefixes and their first occurrence past the current position. 1693 SmallVector<std::pair<StringRef, size_t>> Prefixes; 1694 StringRef Input; 1695 1696 PrefixMatcher(ArrayRef<StringRef> CheckPrefixes, 1697 ArrayRef<StringRef> CommentPrefixes, StringRef Input) 1698 : Input(Input) { 1699 for (StringRef Prefix : CheckPrefixes) 1700 Prefixes.push_back({Prefix, Input.find(Prefix)}); 1701 for (StringRef Prefix : CommentPrefixes) 1702 Prefixes.push_back({Prefix, Input.find(Prefix)}); 1703 1704 // Sort by descending length. 1705 llvm::sort(Prefixes, 1706 [](auto A, auto B) { return A.first.size() > B.first.size(); }); 1707 } 1708 1709 /// Find the next match of a prefix in Buffer. 1710 /// Returns empty StringRef if not found. 1711 StringRef match(StringRef Buffer) { 1712 assert(Buffer.data() >= Input.data() && 1713 Buffer.data() + Buffer.size() == Input.data() + Input.size() && 1714 "Buffer must be suffix of Input"); 1715 1716 size_t From = Buffer.data() - Input.data(); 1717 StringRef Match; 1718 for (auto &[Prefix, Pos] : Prefixes) { 1719 // If the last occurrence was before From, find the next one after From. 1720 if (Pos < From) 1721 Pos = Input.find(Prefix, From); 1722 // Find the first prefix with the lowest position. 1723 if (Pos != StringRef::npos && 1724 (Match.empty() || size_t(Match.data() - Input.data()) > Pos)) 1725 Match = StringRef(Input.substr(Pos, Prefix.size())); 1726 } 1727 return Match; 1728 } 1729 }; 1730 1731 /// Searches the buffer for the first prefix in the prefix regular expression. 1732 /// 1733 /// This searches the buffer using the provided regular expression, however it 1734 /// enforces constraints beyond that: 1735 /// 1) The found prefix must not be a suffix of something that looks like 1736 /// a valid prefix. 1737 /// 2) The found prefix must be followed by a valid check type suffix using \c 1738 /// FindCheckType above. 1739 /// 1740 /// \returns a pair of StringRefs into the Buffer, which combines: 1741 /// - the first match of the regular expression to satisfy these two is 1742 /// returned, 1743 /// otherwise an empty StringRef is returned to indicate failure. 1744 /// - buffer rewound to the location right after parsed suffix, for parsing 1745 /// to continue from 1746 /// 1747 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 1748 /// start at the beginning of the returned prefix, increment \p LineNumber for 1749 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 1750 /// check found by examining the suffix. 1751 /// 1752 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 1753 /// is unspecified. 1754 static std::pair<StringRef, StringRef> 1755 FindFirstMatchingPrefix(const FileCheckRequest &Req, PrefixMatcher &Matcher, 1756 StringRef &Buffer, unsigned &LineNumber, 1757 Check::FileCheckType &CheckTy) { 1758 while (!Buffer.empty()) { 1759 // Find the first (longest) prefix match. 1760 StringRef Prefix = Matcher.match(Buffer); 1761 if (Prefix.empty()) 1762 // No match at all, bail. 1763 return {StringRef(), StringRef()}; 1764 1765 assert(Prefix.data() >= Buffer.data() && 1766 Prefix.data() < Buffer.data() + Buffer.size() && 1767 "Prefix doesn't start inside of buffer!"); 1768 size_t Loc = Prefix.data() - Buffer.data(); 1769 StringRef Skipped = Buffer.substr(0, Loc); 1770 Buffer = Buffer.drop_front(Loc); 1771 LineNumber += Skipped.count('\n'); 1772 1773 // Check that the matched prefix isn't a suffix of some other check-like 1774 // word. 1775 // FIXME: This is a very ad-hoc check. it would be better handled in some 1776 // other way. Among other things it seems hard to distinguish between 1777 // intentional and unintentional uses of this feature. 1778 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 1779 // Now extract the type. 1780 StringRef AfterSuffix; 1781 std::tie(CheckTy, AfterSuffix) = FindCheckType(Req, Buffer, Prefix); 1782 1783 // If we've found a valid check type for this prefix, we're done. 1784 if (CheckTy != Check::CheckNone) 1785 return {Prefix, AfterSuffix}; 1786 } 1787 1788 // If we didn't successfully find a prefix, we need to skip this invalid 1789 // prefix and continue scanning. We directly skip the prefix that was 1790 // matched and any additional parts of that check-like word. 1791 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 1792 } 1793 1794 // We ran out of buffer while skipping partial matches so give up. 1795 return {StringRef(), StringRef()}; 1796 } 1797 1798 void FileCheckPatternContext::createLineVariable() { 1799 assert(!LineVariable && "@LINE pseudo numeric variable already created"); 1800 StringRef LineName = "@LINE"; 1801 LineVariable = makeNumericVariable( 1802 LineName, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 1803 GlobalNumericVariableTable[LineName] = LineVariable; 1804 } 1805 1806 FileCheck::FileCheck(FileCheckRequest Req) 1807 : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()) {} 1808 1809 FileCheck::~FileCheck() = default; 1810 1811 bool FileCheck::readCheckFile( 1812 SourceMgr &SM, StringRef Buffer, 1813 std::pair<unsigned, unsigned> *ImpPatBufferIDRange) { 1814 if (ImpPatBufferIDRange) 1815 ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0; 1816 1817 Error DefineError = 1818 PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); 1819 if (DefineError) { 1820 logAllUnhandledErrors(std::move(DefineError), errs()); 1821 return true; 1822 } 1823 1824 PatternContext->createLineVariable(); 1825 1826 std::vector<FileCheckString::DagNotPrefixInfo> ImplicitNegativeChecks; 1827 for (StringRef PatternString : Req.ImplicitCheckNot) { 1828 // Create a buffer with fake command line content in order to display the 1829 // command line option responsible for the specific implicit CHECK-NOT. 1830 std::string Prefix = "-implicit-check-not='"; 1831 std::string Suffix = "'"; 1832 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 1833 (Prefix + PatternString + Suffix).str(), "command line"); 1834 1835 StringRef PatternInBuffer = 1836 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 1837 unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 1838 if (ImpPatBufferIDRange) { 1839 if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) { 1840 ImpPatBufferIDRange->first = BufferID; 1841 ImpPatBufferIDRange->second = BufferID + 1; 1842 } else { 1843 assert(BufferID == ImpPatBufferIDRange->second && 1844 "expected consecutive source buffer IDs"); 1845 ++ImpPatBufferIDRange->second; 1846 } 1847 } 1848 1849 ImplicitNegativeChecks.emplace_back( 1850 Pattern(Check::CheckNot, PatternContext.get()), 1851 StringRef("IMPLICIT-CHECK")); 1852 ImplicitNegativeChecks.back().DagNotPat.parsePattern( 1853 PatternInBuffer, "IMPLICIT-CHECK", SM, Req); 1854 } 1855 1856 std::vector<FileCheckString::DagNotPrefixInfo> DagNotMatches = 1857 ImplicitNegativeChecks; 1858 // LineNumber keeps track of the line on which CheckPrefix instances are 1859 // found. 1860 unsigned LineNumber = 1; 1861 1862 addDefaultPrefixes(Req); 1863 PrefixMatcher Matcher(Req.CheckPrefixes, Req.CommentPrefixes, Buffer); 1864 std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(), 1865 Req.CheckPrefixes.end()); 1866 const size_t DistinctPrefixes = PrefixesNotFound.size(); 1867 while (true) { 1868 Check::FileCheckType CheckTy; 1869 1870 // See if a prefix occurs in the memory buffer. 1871 StringRef UsedPrefix; 1872 StringRef AfterSuffix; 1873 std::tie(UsedPrefix, AfterSuffix) = 1874 FindFirstMatchingPrefix(Req, Matcher, Buffer, LineNumber, CheckTy); 1875 if (UsedPrefix.empty()) 1876 break; 1877 if (CheckTy != Check::CheckComment) 1878 PrefixesNotFound.erase(UsedPrefix); 1879 1880 assert(UsedPrefix.data() == Buffer.data() && 1881 "Failed to move Buffer's start forward, or pointed prefix outside " 1882 "of the buffer!"); 1883 assert(AfterSuffix.data() >= Buffer.data() && 1884 AfterSuffix.data() < Buffer.data() + Buffer.size() && 1885 "Parsing after suffix doesn't start inside of buffer!"); 1886 1887 // Location to use for error messages. 1888 const char *UsedPrefixStart = UsedPrefix.data(); 1889 1890 // Skip the buffer to the end of parsed suffix (or just prefix, if no good 1891 // suffix was processed). 1892 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) 1893 : AfterSuffix; 1894 1895 // Complain about misspelled directives. 1896 if (CheckTy == Check::CheckMisspelled) { 1897 StringRef UsedDirective(UsedPrefix.data(), 1898 AfterSuffix.data() - UsedPrefix.data()); 1899 SM.PrintMessage(SMLoc::getFromPointer(UsedDirective.data()), 1900 SourceMgr::DK_Error, 1901 "misspelled directive '" + UsedDirective + "'"); 1902 return true; 1903 } 1904 1905 // Complain about useful-looking but unsupported suffixes. 1906 if (CheckTy == Check::CheckBadNot) { 1907 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1908 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 1909 return true; 1910 } 1911 1912 // Complain about invalid count specification. 1913 if (CheckTy == Check::CheckBadCount) { 1914 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1915 "invalid count in -COUNT specification on prefix '" + 1916 UsedPrefix + "'"); 1917 return true; 1918 } 1919 1920 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 1921 // leading whitespace. 1922 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 1923 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 1924 1925 // Scan ahead to the end of line. 1926 size_t EOL = Buffer.find_first_of("\n\r"); 1927 1928 // Remember the location of the start of the pattern, for diagnostics. 1929 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 1930 1931 // Extract the pattern from the buffer. 1932 StringRef PatternBuffer = Buffer.substr(0, EOL); 1933 Buffer = Buffer.substr(EOL); 1934 1935 // If this is a comment, we're done. 1936 if (CheckTy == Check::CheckComment) 1937 continue; 1938 1939 // Parse the pattern. 1940 Pattern P(CheckTy, PatternContext.get(), LineNumber); 1941 if (P.parsePattern(PatternBuffer, UsedPrefix, SM, Req)) 1942 return true; 1943 1944 // Verify that CHECK-LABEL lines do not define or use variables 1945 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 1946 SM.PrintMessage( 1947 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 1948 "found '" + UsedPrefix + "-LABEL:'" 1949 " with variable definition or use"); 1950 return true; 1951 } 1952 1953 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 1954 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 1955 CheckTy == Check::CheckEmpty) && 1956 CheckStrings.empty()) { 1957 StringRef Type = CheckTy == Check::CheckNext 1958 ? "NEXT" 1959 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 1960 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 1961 SourceMgr::DK_Error, 1962 "found '" + UsedPrefix + "-" + Type + 1963 "' without previous '" + UsedPrefix + ": line"); 1964 return true; 1965 } 1966 1967 // Handle CHECK-DAG/-NOT. 1968 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 1969 DagNotMatches.emplace_back(P, UsedPrefix); 1970 continue; 1971 } 1972 1973 // Okay, add the string we captured to the output vector and move on. 1974 CheckStrings.emplace_back(std::move(P), UsedPrefix, PatternLoc, 1975 std::move(DagNotMatches)); 1976 DagNotMatches = ImplicitNegativeChecks; 1977 } 1978 1979 // When there are no used prefixes we report an error except in the case that 1980 // no prefix is specified explicitly but -implicit-check-not is specified. 1981 const bool NoPrefixesFound = PrefixesNotFound.size() == DistinctPrefixes; 1982 const bool SomePrefixesUnexpectedlyNotUsed = 1983 !Req.AllowUnusedPrefixes && !PrefixesNotFound.empty(); 1984 if ((NoPrefixesFound || SomePrefixesUnexpectedlyNotUsed) && 1985 (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) { 1986 errs() << "error: no check strings found with prefix" 1987 << (PrefixesNotFound.size() > 1 ? "es " : " "); 1988 bool First = true; 1989 for (StringRef MissingPrefix : PrefixesNotFound) { 1990 if (!First) 1991 errs() << ", "; 1992 errs() << "\'" << MissingPrefix << ":'"; 1993 First = false; 1994 } 1995 errs() << '\n'; 1996 return true; 1997 } 1998 1999 // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs, 2000 // and use the first prefix as a filler for the error message. 2001 if (!DagNotMatches.empty()) { 2002 CheckStrings.emplace_back( 2003 Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), 2004 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()), 2005 std::move(DagNotMatches)); 2006 } 2007 2008 return false; 2009 } 2010 2011 /// Returns either (1) \c ErrorSuccess if there was no error or (2) 2012 /// \c ErrorReported if an error was reported, such as an unexpected match. 2013 static Error printMatch(bool ExpectedMatch, const SourceMgr &SM, 2014 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2015 int MatchedCount, StringRef Buffer, 2016 Pattern::MatchResult MatchResult, 2017 const FileCheckRequest &Req, 2018 std::vector<FileCheckDiag> *Diags) { 2019 // Suppress some verbosity if there's no error. 2020 bool HasError = !ExpectedMatch || MatchResult.TheError; 2021 bool PrintDiag = true; 2022 if (!HasError) { 2023 if (!Req.Verbose) 2024 return ErrorReported::reportedOrSuccess(HasError); 2025 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) 2026 return ErrorReported::reportedOrSuccess(HasError); 2027 // Due to their verbosity, we don't print verbose diagnostics here if we're 2028 // gathering them for Diags to be rendered elsewhere, but we always print 2029 // other diagnostics. 2030 PrintDiag = !Diags; 2031 } 2032 2033 // Add "found" diagnostic, substitutions, and variable definitions to Diags. 2034 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2035 ? FileCheckDiag::MatchFoundAndExpected 2036 : FileCheckDiag::MatchFoundButExcluded; 2037 SMRange MatchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2038 Buffer, MatchResult.TheMatch->Pos, 2039 MatchResult.TheMatch->Len, Diags); 2040 if (Diags) { 2041 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, Diags); 2042 Pat.printVariableDefs(SM, MatchTy, Diags); 2043 } 2044 if (!PrintDiag) { 2045 assert(!HasError && "expected to report more diagnostics for error"); 2046 return ErrorReported::reportedOrSuccess(HasError); 2047 } 2048 2049 // Print the match. 2050 std::string Message = formatv("{0}: {1} string found in input", 2051 Pat.getCheckTy().getDescription(Prefix), 2052 (ExpectedMatch ? "expected" : "excluded")) 2053 .str(); 2054 if (Pat.getCount() > 1) 2055 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2056 SM.PrintMessage( 2057 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); 2058 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", 2059 {MatchRange}); 2060 2061 // Print additional information, which can be useful even if there are errors. 2062 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, nullptr); 2063 Pat.printVariableDefs(SM, MatchTy, nullptr); 2064 2065 // Print errors and add them to Diags. We report these errors after the match 2066 // itself because we found them after the match. If we had found them before 2067 // the match, we'd be in printNoMatch. 2068 handleAllErrors(std::move(MatchResult.TheError), 2069 [&](const ErrorDiagnostic &E) { 2070 E.log(errs()); 2071 if (Diags) { 2072 Diags->emplace_back(SM, Pat.getCheckTy(), Loc, 2073 FileCheckDiag::MatchFoundErrorNote, 2074 E.getRange(), E.getMessage().str()); 2075 } 2076 }); 2077 return ErrorReported::reportedOrSuccess(HasError); 2078 } 2079 2080 /// Returns either (1) \c ErrorSuccess if there was no error, or (2) 2081 /// \c ErrorReported if an error was reported, such as an expected match not 2082 /// found. 2083 static Error printNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2084 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2085 int MatchedCount, StringRef Buffer, Error MatchError, 2086 bool VerboseVerbose, 2087 std::vector<FileCheckDiag> *Diags) { 2088 // Print any pattern errors, and record them to be added to Diags later. 2089 bool HasError = ExpectedMatch; 2090 bool HasPatternError = false; 2091 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2092 ? FileCheckDiag::MatchNoneButExpected 2093 : FileCheckDiag::MatchNoneAndExcluded; 2094 SmallVector<std::string, 4> ErrorMsgs; 2095 handleAllErrors( 2096 std::move(MatchError), 2097 [&](const ErrorDiagnostic &E) { 2098 HasError = HasPatternError = true; 2099 MatchTy = FileCheckDiag::MatchNoneForInvalidPattern; 2100 E.log(errs()); 2101 if (Diags) 2102 ErrorMsgs.push_back(E.getMessage().str()); 2103 }, 2104 // NotFoundError is why printNoMatch was invoked. 2105 [](const NotFoundError &E) {}); 2106 2107 // Suppress some verbosity if there's no error. 2108 bool PrintDiag = true; 2109 if (!HasError) { 2110 if (!VerboseVerbose) 2111 return ErrorReported::reportedOrSuccess(HasError); 2112 // Due to their verbosity, we don't print verbose diagnostics here if we're 2113 // gathering them for Diags to be rendered elsewhere, but we always print 2114 // other diagnostics. 2115 PrintDiag = !Diags; 2116 } 2117 2118 // Add "not found" diagnostic, substitutions, and pattern errors to Diags. 2119 // 2120 // We handle Diags a little differently than the errors we print directly: 2121 // we add the "not found" diagnostic to Diags even if there are pattern 2122 // errors. The reason is that we need to attach pattern errors as notes 2123 // somewhere in the input, and the input search range from the "not found" 2124 // diagnostic is all we have to anchor them. 2125 SMRange SearchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2126 Buffer, 0, Buffer.size(), Diags); 2127 if (Diags) { 2128 SMRange NoteRange = SMRange(SearchRange.Start, SearchRange.Start); 2129 for (StringRef ErrorMsg : ErrorMsgs) 2130 Diags->emplace_back(SM, Pat.getCheckTy(), Loc, MatchTy, NoteRange, 2131 ErrorMsg); 2132 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, Diags); 2133 } 2134 if (!PrintDiag) { 2135 assert(!HasError && "expected to report more diagnostics for error"); 2136 return ErrorReported::reportedOrSuccess(HasError); 2137 } 2138 2139 // Print "not found" diagnostic, except that's implied if we already printed a 2140 // pattern error. 2141 if (!HasPatternError) { 2142 std::string Message = formatv("{0}: {1} string not found in input", 2143 Pat.getCheckTy().getDescription(Prefix), 2144 (ExpectedMatch ? "expected" : "excluded")) 2145 .str(); 2146 if (Pat.getCount() > 1) 2147 Message += 2148 formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2149 SM.PrintMessage(Loc, 2150 ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, 2151 Message); 2152 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, 2153 "scanning from here"); 2154 } 2155 2156 // Print additional information, which can be useful even after a pattern 2157 // error. 2158 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, nullptr); 2159 if (ExpectedMatch) 2160 Pat.printFuzzyMatch(SM, Buffer, Diags); 2161 return ErrorReported::reportedOrSuccess(HasError); 2162 } 2163 2164 /// Returns either (1) \c ErrorSuccess if there was no error, or (2) 2165 /// \c ErrorReported if an error was reported. 2166 static Error reportMatchResult(bool ExpectedMatch, const SourceMgr &SM, 2167 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2168 int MatchedCount, StringRef Buffer, 2169 Pattern::MatchResult MatchResult, 2170 const FileCheckRequest &Req, 2171 std::vector<FileCheckDiag> *Diags) { 2172 if (MatchResult.TheMatch) 2173 return printMatch(ExpectedMatch, SM, Prefix, Loc, Pat, MatchedCount, Buffer, 2174 std::move(MatchResult), Req, Diags); 2175 return printNoMatch(ExpectedMatch, SM, Prefix, Loc, Pat, MatchedCount, Buffer, 2176 std::move(MatchResult.TheError), Req.VerboseVerbose, 2177 Diags); 2178 } 2179 2180 /// Counts the number of newlines in the specified range. 2181 static unsigned CountNumNewlinesBetween(StringRef Range, 2182 const char *&FirstNewLine) { 2183 unsigned NumNewLines = 0; 2184 while (true) { 2185 // Scan for newline. 2186 Range = Range.substr(Range.find_first_of("\n\r")); 2187 if (Range.empty()) 2188 return NumNewLines; 2189 2190 ++NumNewLines; 2191 2192 // Handle \n\r and \r\n as a single newline. 2193 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 2194 (Range[0] != Range[1])) 2195 Range = Range.substr(1); 2196 Range = Range.substr(1); 2197 2198 if (NumNewLines == 1) 2199 FirstNewLine = Range.begin(); 2200 } 2201 } 2202 2203 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, 2204 bool IsLabelScanMode, size_t &MatchLen, 2205 FileCheckRequest &Req, 2206 std::vector<FileCheckDiag> *Diags) const { 2207 size_t LastPos = 0; 2208 std::vector<const DagNotPrefixInfo *> NotStrings; 2209 2210 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 2211 // bounds; we have not processed variable definitions within the bounded block 2212 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 2213 // over the block again (including the last CHECK-LABEL) in normal mode. 2214 if (!IsLabelScanMode) { 2215 // Match "dag strings" (with mixed "not strings" if any). 2216 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); 2217 if (LastPos == StringRef::npos) 2218 return StringRef::npos; 2219 } 2220 2221 // Match itself from the last position after matching CHECK-DAG. 2222 size_t LastMatchEnd = LastPos; 2223 size_t FirstMatchPos = 0; 2224 // Go match the pattern Count times. Majority of patterns only match with 2225 // count 1 though. 2226 assert(Pat.getCount() != 0 && "pattern count can not be zero"); 2227 for (int i = 1; i <= Pat.getCount(); i++) { 2228 StringRef MatchBuffer = Buffer.substr(LastMatchEnd); 2229 // get a match at current start point 2230 Pattern::MatchResult MatchResult = Pat.match(MatchBuffer, SM); 2231 2232 // report 2233 if (Error Err = reportMatchResult(/*ExpectedMatch=*/true, SM, Prefix, Loc, 2234 Pat, i, MatchBuffer, 2235 std::move(MatchResult), Req, Diags)) { 2236 cantFail(handleErrors(std::move(Err), [&](const ErrorReported &E) {})); 2237 return StringRef::npos; 2238 } 2239 2240 size_t MatchPos = MatchResult.TheMatch->Pos; 2241 if (i == 1) 2242 FirstMatchPos = LastPos + MatchPos; 2243 2244 // move start point after the match 2245 LastMatchEnd += MatchPos + MatchResult.TheMatch->Len; 2246 } 2247 // Full match len counts from first match pos. 2248 MatchLen = LastMatchEnd - FirstMatchPos; 2249 2250 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 2251 // or CHECK-NOT 2252 if (!IsLabelScanMode) { 2253 size_t MatchPos = FirstMatchPos - LastPos; 2254 StringRef MatchBuffer = Buffer.substr(LastPos); 2255 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 2256 2257 // If this check is a "CHECK-NEXT", verify that the previous match was on 2258 // the previous line (i.e. that there is one newline between them). 2259 if (CheckNext(SM, SkippedRegion)) { 2260 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2261 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2262 Diags, Req.Verbose); 2263 return StringRef::npos; 2264 } 2265 2266 // If this check is a "CHECK-SAME", verify that the previous match was on 2267 // the same line (i.e. that there is no newline between them). 2268 if (CheckSame(SM, SkippedRegion)) { 2269 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2270 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2271 Diags, Req.Verbose); 2272 return StringRef::npos; 2273 } 2274 2275 // If this match had "not strings", verify that they don't exist in the 2276 // skipped region. 2277 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2278 return StringRef::npos; 2279 } 2280 2281 return FirstMatchPos; 2282 } 2283 2284 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 2285 if (Pat.getCheckTy() != Check::CheckNext && 2286 Pat.getCheckTy() != Check::CheckEmpty) 2287 return false; 2288 2289 Twine CheckName = 2290 Prefix + 2291 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 2292 2293 // Count the number of newlines between the previous match and this one. 2294 const char *FirstNewLine = nullptr; 2295 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2296 2297 if (NumNewLines == 0) { 2298 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2299 CheckName + ": is on the same line as previous match"); 2300 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2301 "'next' match was here"); 2302 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2303 "previous match ended here"); 2304 return true; 2305 } 2306 2307 if (NumNewLines != 1) { 2308 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2309 CheckName + 2310 ": is not on the line after the previous match"); 2311 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2312 "'next' match was here"); 2313 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2314 "previous match ended here"); 2315 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 2316 "non-matching line after previous match is here"); 2317 return true; 2318 } 2319 2320 return false; 2321 } 2322 2323 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 2324 if (Pat.getCheckTy() != Check::CheckSame) 2325 return false; 2326 2327 // Count the number of newlines between the previous match and this one. 2328 const char *FirstNewLine = nullptr; 2329 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2330 2331 if (NumNewLines != 0) { 2332 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2333 Prefix + 2334 "-SAME: is not on the same line as the previous match"); 2335 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2336 "'next' match was here"); 2337 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2338 "previous match ended here"); 2339 return true; 2340 } 2341 2342 return false; 2343 } 2344 2345 bool FileCheckString::CheckNot( 2346 const SourceMgr &SM, StringRef Buffer, 2347 const std::vector<const DagNotPrefixInfo *> &NotStrings, 2348 const FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const { 2349 bool DirectiveFail = false; 2350 for (auto NotInfo : NotStrings) { 2351 assert((NotInfo->DagNotPat.getCheckTy() == Check::CheckNot) && 2352 "Expect CHECK-NOT!"); 2353 Pattern::MatchResult MatchResult = NotInfo->DagNotPat.match(Buffer, SM); 2354 if (Error Err = reportMatchResult( 2355 /*ExpectedMatch=*/false, SM, NotInfo->DagNotPrefix, 2356 NotInfo->DagNotPat.getLoc(), NotInfo->DagNotPat, 1, Buffer, 2357 std::move(MatchResult), Req, Diags)) { 2358 cantFail(handleErrors(std::move(Err), [&](const ErrorReported &E) {})); 2359 DirectiveFail = true; 2360 continue; 2361 } 2362 } 2363 return DirectiveFail; 2364 } 2365 2366 size_t 2367 FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 2368 std::vector<const DagNotPrefixInfo *> &NotStrings, 2369 const FileCheckRequest &Req, 2370 std::vector<FileCheckDiag> *Diags) const { 2371 if (DagNotStrings.empty()) 2372 return 0; 2373 2374 // The start of the search range. 2375 size_t StartPos = 0; 2376 2377 struct MatchRange { 2378 size_t Pos; 2379 size_t End; 2380 }; 2381 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match 2382 // ranges are erased from this list once they are no longer in the search 2383 // range. 2384 std::list<MatchRange> MatchRanges; 2385 2386 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG 2387 // group, so we don't use a range-based for loop here. 2388 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); 2389 PatItr != PatEnd; ++PatItr) { 2390 const Pattern &Pat = PatItr->DagNotPat; 2391 const StringRef DNPrefix = PatItr->DagNotPrefix; 2392 assert((Pat.getCheckTy() == Check::CheckDAG || 2393 Pat.getCheckTy() == Check::CheckNot) && 2394 "Invalid CHECK-DAG or CHECK-NOT!"); 2395 2396 if (Pat.getCheckTy() == Check::CheckNot) { 2397 NotStrings.push_back(&*PatItr); 2398 continue; 2399 } 2400 2401 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 2402 2403 // CHECK-DAG always matches from the start. 2404 size_t MatchLen = 0, MatchPos = StartPos; 2405 2406 // Search for a match that doesn't overlap a previous match in this 2407 // CHECK-DAG group. 2408 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { 2409 StringRef MatchBuffer = Buffer.substr(MatchPos); 2410 Pattern::MatchResult MatchResult = Pat.match(MatchBuffer, SM); 2411 // With a group of CHECK-DAGs, a single mismatching means the match on 2412 // that group of CHECK-DAGs fails immediately. 2413 if (MatchResult.TheError || Req.VerboseVerbose) { 2414 if (Error Err = reportMatchResult(/*ExpectedMatch=*/true, SM, DNPrefix, 2415 Pat.getLoc(), Pat, 1, MatchBuffer, 2416 std::move(MatchResult), Req, Diags)) { 2417 cantFail( 2418 handleErrors(std::move(Err), [&](const ErrorReported &E) {})); 2419 return StringRef::npos; 2420 } 2421 } 2422 MatchLen = MatchResult.TheMatch->Len; 2423 // Re-calc it as the offset relative to the start of the original 2424 // string. 2425 MatchPos += MatchResult.TheMatch->Pos; 2426 MatchRange M{MatchPos, MatchPos + MatchLen}; 2427 if (Req.AllowDeprecatedDagOverlap) { 2428 // We don't need to track all matches in this mode, so we just maintain 2429 // one match range that encompasses the current CHECK-DAG group's 2430 // matches. 2431 if (MatchRanges.empty()) 2432 MatchRanges.insert(MatchRanges.end(), M); 2433 else { 2434 auto Block = MatchRanges.begin(); 2435 Block->Pos = std::min(Block->Pos, M.Pos); 2436 Block->End = std::max(Block->End, M.End); 2437 } 2438 break; 2439 } 2440 // Iterate previous matches until overlapping match or insertion point. 2441 bool Overlap = false; 2442 for (; MI != ME; ++MI) { 2443 if (M.Pos < MI->End) { 2444 // !Overlap => New match has no overlap and is before this old match. 2445 // Overlap => New match overlaps this old match. 2446 Overlap = MI->Pos < M.End; 2447 break; 2448 } 2449 } 2450 if (!Overlap) { 2451 // Insert non-overlapping match into list. 2452 MatchRanges.insert(MI, M); 2453 break; 2454 } 2455 if (Req.VerboseVerbose) { 2456 // Due to their verbosity, we don't print verbose diagnostics here if 2457 // we're gathering them for a different rendering, but we always print 2458 // other diagnostics. 2459 if (!Diags) { 2460 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); 2461 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); 2462 SMRange OldRange(OldStart, OldEnd); 2463 SM.PrintMessage(OldStart, SourceMgr::DK_Note, 2464 "match discarded, overlaps earlier DAG match here", 2465 {OldRange}); 2466 } else { 2467 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 2468 for (auto I = Diags->rbegin(), E = Diags->rend(); 2469 I != E && I->CheckLoc == CheckLoc; ++I) 2470 I->MatchTy = FileCheckDiag::MatchFoundButDiscarded; 2471 } 2472 } 2473 MatchPos = MI->End; 2474 } 2475 if (!Req.VerboseVerbose) 2476 cantFail(printMatch( 2477 /*ExpectedMatch=*/true, SM, DNPrefix, Pat.getLoc(), Pat, 1, Buffer, 2478 Pattern::MatchResult(MatchPos, MatchLen, Error::success()), Req, 2479 Diags)); 2480 2481 // Handle the end of a CHECK-DAG group. 2482 if (std::next(PatItr) == PatEnd || 2483 std::next(PatItr)->DagNotPat.getCheckTy() == Check::CheckNot) { 2484 if (!NotStrings.empty()) { 2485 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to 2486 // CHECK-DAG, verify that there are no 'not' strings occurred in that 2487 // region. 2488 StringRef SkippedRegion = 2489 Buffer.slice(StartPos, MatchRanges.begin()->Pos); 2490 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2491 return StringRef::npos; 2492 // Clear "not strings". 2493 NotStrings.clear(); 2494 } 2495 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the 2496 // end of this CHECK-DAG group's match range. 2497 StartPos = MatchRanges.rbegin()->End; 2498 // Don't waste time checking for (impossible) overlaps before that. 2499 MatchRanges.clear(); 2500 } 2501 } 2502 2503 return StartPos; 2504 } 2505 2506 static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes, 2507 ArrayRef<StringRef> SuppliedPrefixes) { 2508 for (StringRef Prefix : SuppliedPrefixes) { 2509 if (Prefix.empty()) { 2510 errs() << "error: supplied " << Kind << " prefix must not be the empty " 2511 << "string\n"; 2512 return false; 2513 } 2514 static const Regex Validator("^[a-zA-Z0-9_-]*$"); 2515 if (!Validator.match(Prefix)) { 2516 errs() << "error: supplied " << Kind << " prefix must start with a " 2517 << "letter and contain only alphanumeric characters, hyphens, and " 2518 << "underscores: '" << Prefix << "'\n"; 2519 return false; 2520 } 2521 if (!UniquePrefixes.insert(Prefix).second) { 2522 errs() << "error: supplied " << Kind << " prefix must be unique among " 2523 << "check and comment prefixes: '" << Prefix << "'\n"; 2524 return false; 2525 } 2526 } 2527 return true; 2528 } 2529 2530 bool FileCheck::ValidateCheckPrefixes() { 2531 StringSet<> UniquePrefixes; 2532 // Add default prefixes to catch user-supplied duplicates of them below. 2533 if (Req.CheckPrefixes.empty()) 2534 UniquePrefixes.insert_range(DefaultCheckPrefixes); 2535 if (Req.CommentPrefixes.empty()) 2536 UniquePrefixes.insert_range(DefaultCommentPrefixes); 2537 // Do not validate the default prefixes, or diagnostics about duplicates might 2538 // incorrectly indicate that they were supplied by the user. 2539 if (!ValidatePrefixes("check", UniquePrefixes, Req.CheckPrefixes)) 2540 return false; 2541 if (!ValidatePrefixes("comment", UniquePrefixes, Req.CommentPrefixes)) 2542 return false; 2543 return true; 2544 } 2545 2546 Error FileCheckPatternContext::defineCmdlineVariables( 2547 ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) { 2548 assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && 2549 "Overriding defined variable with command-line variable definitions"); 2550 2551 if (CmdlineDefines.empty()) 2552 return Error::success(); 2553 2554 // Create a string representing the vector of command-line definitions. Each 2555 // definition is on its own line and prefixed with a definition number to 2556 // clarify which definition a given diagnostic corresponds to. 2557 unsigned I = 0; 2558 Error Errs = Error::success(); 2559 std::string CmdlineDefsDiag; 2560 SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices; 2561 for (StringRef CmdlineDef : CmdlineDefines) { 2562 std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); 2563 size_t EqIdx = CmdlineDef.find('='); 2564 if (EqIdx == StringRef::npos) { 2565 CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); 2566 continue; 2567 } 2568 // Numeric variable definition. 2569 if (CmdlineDef[0] == '#') { 2570 // Append a copy of the command-line definition adapted to use the same 2571 // format as in the input file to be able to reuse 2572 // parseNumericSubstitutionBlock. 2573 CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); 2574 std::string SubstitutionStr = std::string(CmdlineDef); 2575 SubstitutionStr[EqIdx] = ':'; 2576 CmdlineDefsIndices.push_back( 2577 std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); 2578 CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); 2579 } else { 2580 CmdlineDefsDiag += DefPrefix; 2581 CmdlineDefsIndices.push_back( 2582 std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); 2583 CmdlineDefsDiag += (CmdlineDef + "\n").str(); 2584 } 2585 } 2586 2587 // Create a buffer with fake command line content in order to display 2588 // parsing diagnostic with location information and point to the 2589 // global definition with invalid syntax. 2590 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer = 2591 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); 2592 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); 2593 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); 2594 2595 for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) { 2596 StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, 2597 CmdlineDefIndices.second); 2598 if (CmdlineDef.empty()) { 2599 Errs = joinErrors( 2600 std::move(Errs), 2601 ErrorDiagnostic::get(SM, CmdlineDef, 2602 "missing equal sign in global definition")); 2603 continue; 2604 } 2605 2606 // Numeric variable definition. 2607 if (CmdlineDef[0] == '#') { 2608 // Now parse the definition both to check that the syntax is correct and 2609 // to create the necessary class instance. 2610 StringRef CmdlineDefExpr = CmdlineDef.substr(1); 2611 std::optional<NumericVariable *> DefinedNumericVariable; 2612 Expected<std::unique_ptr<Expression>> ExpressionResult = 2613 Pattern::parseNumericSubstitutionBlock(CmdlineDefExpr, 2614 DefinedNumericVariable, false, 2615 std::nullopt, this, SM); 2616 if (!ExpressionResult) { 2617 Errs = joinErrors(std::move(Errs), ExpressionResult.takeError()); 2618 continue; 2619 } 2620 std::unique_ptr<Expression> Expression = std::move(*ExpressionResult); 2621 // Now evaluate the expression whose value this variable should be set 2622 // to, since the expression of a command-line variable definition should 2623 // only use variables defined earlier on the command-line. If not, this 2624 // is an error and we report it. 2625 Expected<APInt> Value = Expression->getAST()->eval(); 2626 if (!Value) { 2627 Errs = joinErrors(std::move(Errs), Value.takeError()); 2628 continue; 2629 } 2630 2631 assert(DefinedNumericVariable && "No variable defined"); 2632 (*DefinedNumericVariable)->setValue(*Value); 2633 2634 // Record this variable definition. 2635 GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = 2636 *DefinedNumericVariable; 2637 } else { 2638 // String variable definition. 2639 std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('='); 2640 StringRef CmdlineName = CmdlineNameVal.first; 2641 StringRef OrigCmdlineName = CmdlineName; 2642 Expected<Pattern::VariableProperties> ParseVarResult = 2643 Pattern::parseVariable(CmdlineName, SM); 2644 if (!ParseVarResult) { 2645 Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); 2646 continue; 2647 } 2648 // Check that CmdlineName does not denote a pseudo variable is only 2649 // composed of the parsed numeric variable. This catches cases like 2650 // "FOO+2" in a "FOO+2=10" definition. 2651 if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { 2652 Errs = joinErrors(std::move(Errs), 2653 ErrorDiagnostic::get( 2654 SM, OrigCmdlineName, 2655 "invalid name in string variable definition '" + 2656 OrigCmdlineName + "'")); 2657 continue; 2658 } 2659 StringRef Name = ParseVarResult->Name; 2660 2661 // Detect collisions between string and numeric variables when the former 2662 // is created later than the latter. 2663 if (GlobalNumericVariableTable.contains(Name)) { 2664 Errs = joinErrors(std::move(Errs), 2665 ErrorDiagnostic::get(SM, Name, 2666 "numeric variable with name '" + 2667 Name + "' already exists")); 2668 continue; 2669 } 2670 GlobalVariableTable.insert(CmdlineNameVal); 2671 // Mark the string variable as defined to detect collisions between 2672 // string and numeric variables in defineCmdlineVariables when the latter 2673 // is created later than the former. We cannot reuse GlobalVariableTable 2674 // for this by populating it with an empty string since we would then 2675 // lose the ability to detect the use of an undefined variable in 2676 // match(). 2677 DefinedVariableTable[Name] = true; 2678 } 2679 } 2680 2681 return Errs; 2682 } 2683 2684 void FileCheckPatternContext::clearLocalVars() { 2685 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars; 2686 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable) 2687 if (Var.first()[0] != '$') 2688 LocalPatternVars.push_back(Var.first()); 2689 2690 // Numeric substitution reads the value of a variable directly, not via 2691 // GlobalNumericVariableTable. Therefore, we clear local variables by 2692 // clearing their value which will lead to a numeric substitution failure. We 2693 // also mark the variable for removal from GlobalNumericVariableTable since 2694 // this is what defineCmdlineVariables checks to decide that no global 2695 // variable has been defined. 2696 for (const auto &Var : GlobalNumericVariableTable) 2697 if (Var.first()[0] != '$') { 2698 Var.getValue()->clearValue(); 2699 LocalNumericVars.push_back(Var.first()); 2700 } 2701 2702 for (const auto &Var : LocalPatternVars) 2703 GlobalVariableTable.erase(Var); 2704 for (const auto &Var : LocalNumericVars) 2705 GlobalNumericVariableTable.erase(Var); 2706 } 2707 2708 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, 2709 std::vector<FileCheckDiag> *Diags) { 2710 bool ChecksFailed = false; 2711 2712 unsigned i = 0, j = 0, e = CheckStrings.size(); 2713 while (true) { 2714 StringRef CheckRegion; 2715 if (j == e) { 2716 CheckRegion = Buffer; 2717 } else { 2718 const FileCheckString &CheckLabelStr = CheckStrings[j]; 2719 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 2720 ++j; 2721 continue; 2722 } 2723 2724 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 2725 size_t MatchLabelLen = 0; 2726 size_t MatchLabelPos = 2727 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); 2728 if (MatchLabelPos == StringRef::npos) 2729 // Immediately bail if CHECK-LABEL fails, nothing else we can do. 2730 return false; 2731 2732 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 2733 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 2734 ++j; 2735 } 2736 2737 // Do not clear the first region as it's the one before the first 2738 // CHECK-LABEL and it would clear variables defined on the command-line 2739 // before they get used. 2740 if (i != 0 && Req.EnableVarScope) 2741 PatternContext->clearLocalVars(); 2742 2743 for (; i != j; ++i) { 2744 const FileCheckString &CheckStr = CheckStrings[i]; 2745 2746 // Check each string within the scanned region, including a second check 2747 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 2748 size_t MatchLen = 0; 2749 size_t MatchPos = 2750 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); 2751 2752 if (MatchPos == StringRef::npos) { 2753 ChecksFailed = true; 2754 i = j; 2755 break; 2756 } 2757 2758 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 2759 } 2760 2761 if (j == e) 2762 break; 2763 } 2764 2765 // Success if no checks failed. 2766 return !ChecksFailed; 2767 } 2768