1 //===- Parser.cpp - Matcher expression parser -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Recursive parser implementation for the matcher expression grammar. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/ASTMatchers/Dynamic/Parser.h" 15 #include "clang/ASTMatchers/ASTMatchersInternal.h" 16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h" 17 #include "clang/ASTMatchers/Dynamic/Registry.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/ManagedStatic.h" 22 #include <cassert> 23 #include <cerrno> 24 #include <cstddef> 25 #include <cstdlib> 26 #include <optional> 27 #include <string> 28 #include <utility> 29 #include <vector> 30 31 namespace clang { 32 namespace ast_matchers { 33 namespace dynamic { 34 35 /// Simple structure to hold information for one token from the parser. 36 struct Parser::TokenInfo { 37 /// Different possible tokens. 38 enum TokenKind { 39 TK_Eof, 40 TK_NewLine, 41 TK_OpenParen, 42 TK_CloseParen, 43 TK_Comma, 44 TK_Period, 45 TK_Literal, 46 TK_Ident, 47 TK_InvalidChar, 48 TK_Error, 49 TK_CodeCompletion 50 }; 51 52 /// Some known identifiers. 53 static const char* const ID_Bind; 54 static const char *const ID_With; 55 56 TokenInfo() = default; 57 58 StringRef Text; 59 TokenKind Kind = TK_Eof; 60 SourceRange Range; 61 VariantValue Value; 62 }; 63 64 const char* const Parser::TokenInfo::ID_Bind = "bind"; 65 const char *const Parser::TokenInfo::ID_With = "with"; 66 67 /// Simple tokenizer for the parser. 68 class Parser::CodeTokenizer { 69 public: 70 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error) 71 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { 72 NextToken = getNextToken(); 73 } 74 75 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, 76 unsigned CodeCompletionOffset) 77 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), 78 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { 79 NextToken = getNextToken(); 80 } 81 82 /// Returns but doesn't consume the next token. 83 const TokenInfo &peekNextToken() const { return NextToken; } 84 85 /// Consumes and returns the next token. 86 TokenInfo consumeNextToken() { 87 TokenInfo ThisToken = NextToken; 88 NextToken = getNextToken(); 89 return ThisToken; 90 } 91 92 TokenInfo SkipNewlines() { 93 while (NextToken.Kind == TokenInfo::TK_NewLine) 94 NextToken = getNextToken(); 95 return NextToken; 96 } 97 98 TokenInfo consumeNextTokenIgnoreNewlines() { 99 SkipNewlines(); 100 if (NextToken.Kind == TokenInfo::TK_Eof) 101 return NextToken; 102 return consumeNextToken(); 103 } 104 105 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 106 107 private: 108 TokenInfo getNextToken() { 109 consumeWhitespace(); 110 TokenInfo Result; 111 Result.Range.Start = currentLocation(); 112 113 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { 114 Result.Kind = TokenInfo::TK_CodeCompletion; 115 Result.Text = StringRef(CodeCompletionLocation, 0); 116 CodeCompletionLocation = nullptr; 117 return Result; 118 } 119 120 if (Code.empty()) { 121 Result.Kind = TokenInfo::TK_Eof; 122 Result.Text = ""; 123 return Result; 124 } 125 126 switch (Code[0]) { 127 case '#': 128 Code = Code.drop_until([](char c) { return c == '\n'; }); 129 return getNextToken(); 130 case ',': 131 Result.Kind = TokenInfo::TK_Comma; 132 Result.Text = Code.substr(0, 1); 133 Code = Code.drop_front(); 134 break; 135 case '.': 136 Result.Kind = TokenInfo::TK_Period; 137 Result.Text = Code.substr(0, 1); 138 Code = Code.drop_front(); 139 break; 140 case '\n': 141 ++Line; 142 StartOfLine = Code.drop_front(); 143 Result.Kind = TokenInfo::TK_NewLine; 144 Result.Text = Code.substr(0, 1); 145 Code = Code.drop_front(); 146 break; 147 case '(': 148 Result.Kind = TokenInfo::TK_OpenParen; 149 Result.Text = Code.substr(0, 1); 150 Code = Code.drop_front(); 151 break; 152 case ')': 153 Result.Kind = TokenInfo::TK_CloseParen; 154 Result.Text = Code.substr(0, 1); 155 Code = Code.drop_front(); 156 break; 157 158 case '"': 159 case '\'': 160 // Parse a string literal. 161 consumeStringLiteral(&Result); 162 break; 163 164 case '0': case '1': case '2': case '3': case '4': 165 case '5': case '6': case '7': case '8': case '9': 166 // Parse an unsigned and float literal. 167 consumeNumberLiteral(&Result); 168 break; 169 170 default: 171 if (isAlphanumeric(Code[0])) { 172 // Parse an identifier 173 size_t TokenLength = 1; 174 while (true) { 175 // A code completion location in/immediately after an identifier will 176 // cause the portion of the identifier before the code completion 177 // location to become a code completion token. 178 if (CodeCompletionLocation == Code.data() + TokenLength) { 179 CodeCompletionLocation = nullptr; 180 Result.Kind = TokenInfo::TK_CodeCompletion; 181 Result.Text = Code.substr(0, TokenLength); 182 Code = Code.drop_front(TokenLength); 183 return Result; 184 } 185 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength])) 186 break; 187 ++TokenLength; 188 } 189 if (TokenLength == 4 && Code.starts_with("true")) { 190 Result.Kind = TokenInfo::TK_Literal; 191 Result.Value = true; 192 } else if (TokenLength == 5 && Code.starts_with("false")) { 193 Result.Kind = TokenInfo::TK_Literal; 194 Result.Value = false; 195 } else { 196 Result.Kind = TokenInfo::TK_Ident; 197 Result.Text = Code.substr(0, TokenLength); 198 } 199 Code = Code.drop_front(TokenLength); 200 } else { 201 Result.Kind = TokenInfo::TK_InvalidChar; 202 Result.Text = Code.substr(0, 1); 203 Code = Code.drop_front(1); 204 } 205 break; 206 } 207 208 Result.Range.End = currentLocation(); 209 return Result; 210 } 211 212 /// Consume an unsigned and float literal. 213 void consumeNumberLiteral(TokenInfo *Result) { 214 bool isFloatingLiteral = false; 215 unsigned Length = 1; 216 if (Code.size() > 1) { 217 // Consume the 'x' or 'b' radix modifier, if present. 218 switch (toLowercase(Code[1])) { 219 case 'x': case 'b': Length = 2; 220 } 221 } 222 while (Length < Code.size() && isHexDigit(Code[Length])) 223 ++Length; 224 225 // Try to recognize a floating point literal. 226 while (Length < Code.size()) { 227 char c = Code[Length]; 228 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) { 229 isFloatingLiteral = true; 230 Length++; 231 } else { 232 break; 233 } 234 } 235 236 Result->Text = Code.substr(0, Length); 237 Code = Code.drop_front(Length); 238 239 if (isFloatingLiteral) { 240 char *end; 241 errno = 0; 242 std::string Text = Result->Text.str(); 243 double doubleValue = strtod(Text.c_str(), &end); 244 if (*end == 0 && errno == 0) { 245 Result->Kind = TokenInfo::TK_Literal; 246 Result->Value = doubleValue; 247 return; 248 } 249 } else { 250 unsigned Value; 251 if (!Result->Text.getAsInteger(0, Value)) { 252 Result->Kind = TokenInfo::TK_Literal; 253 Result->Value = Value; 254 return; 255 } 256 } 257 258 SourceRange Range; 259 Range.Start = Result->Range.Start; 260 Range.End = currentLocation(); 261 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text; 262 Result->Kind = TokenInfo::TK_Error; 263 } 264 265 /// Consume a string literal. 266 /// 267 /// \c Code must be positioned at the start of the literal (the opening 268 /// quote). Consumed until it finds the same closing quote character. 269 void consumeStringLiteral(TokenInfo *Result) { 270 bool InEscape = false; 271 const char Marker = Code[0]; 272 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 273 if (InEscape) { 274 InEscape = false; 275 continue; 276 } 277 if (Code[Length] == '\\') { 278 InEscape = true; 279 continue; 280 } 281 if (Code[Length] == Marker) { 282 Result->Kind = TokenInfo::TK_Literal; 283 Result->Text = Code.substr(0, Length + 1); 284 Result->Value = Code.substr(1, Length - 1); 285 Code = Code.drop_front(Length + 1); 286 return; 287 } 288 } 289 290 StringRef ErrorText = Code; 291 Code = Code.drop_front(Code.size()); 292 SourceRange Range; 293 Range.Start = Result->Range.Start; 294 Range.End = currentLocation(); 295 Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 296 Result->Kind = TokenInfo::TK_Error; 297 } 298 299 /// Consume all leading whitespace from \c Code. 300 void consumeWhitespace() { 301 // Don't trim newlines. 302 Code = Code.ltrim(" \t\v\f\r"); 303 } 304 305 SourceLocation currentLocation() { 306 SourceLocation Location; 307 Location.Line = Line; 308 Location.Column = Code.data() - StartOfLine.data() + 1; 309 return Location; 310 } 311 312 StringRef &Code; 313 StringRef StartOfLine; 314 unsigned Line = 1; 315 Diagnostics *Error; 316 TokenInfo NextToken; 317 const char *CodeCompletionLocation = nullptr; 318 }; 319 320 Parser::Sema::~Sema() = default; 321 322 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( 323 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 324 return {}; 325 } 326 327 std::vector<MatcherCompletion> 328 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { 329 return {}; 330 } 331 332 struct Parser::ScopedContextEntry { 333 Parser *P; 334 335 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { 336 P->ContextStack.push_back(std::make_pair(C, 0u)); 337 } 338 339 ~ScopedContextEntry() { 340 P->ContextStack.pop_back(); 341 } 342 343 void nextArg() { 344 ++P->ContextStack.back().second; 345 } 346 }; 347 348 /// Parse expressions that start with an identifier. 349 /// 350 /// This function can parse named values and matchers. 351 /// In case of failure it will try to determine the user's intent to give 352 /// an appropriate error message. 353 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { 354 const TokenInfo NameToken = Tokenizer->consumeNextToken(); 355 356 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 357 // Parse as a named value. 358 if (const VariantValue NamedValue = 359 NamedValues ? NamedValues->lookup(NameToken.Text) 360 : VariantValue()) { 361 362 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) { 363 *Value = NamedValue; 364 return true; 365 } 366 367 std::string BindID; 368 Tokenizer->consumeNextToken(); 369 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 370 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 371 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 372 return false; 373 } 374 375 if (ChainCallToken.Kind != TokenInfo::TK_Ident || 376 (ChainCallToken.Text != TokenInfo::ID_Bind && 377 ChainCallToken.Text != TokenInfo::ID_With)) { 378 Error->addError(ChainCallToken.Range, 379 Error->ET_ParserMalformedChainedExpr); 380 return false; 381 } 382 if (ChainCallToken.Text == TokenInfo::ID_With) { 383 384 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 385 NameToken.Text, NameToken.Range); 386 387 Error->addError(ChainCallToken.Range, 388 Error->ET_RegistryMatcherNoWithSupport); 389 return false; 390 } 391 if (!parseBindID(BindID)) 392 return false; 393 394 assert(NamedValue.isMatcher()); 395 std::optional<DynTypedMatcher> Result = 396 NamedValue.getMatcher().getSingleMatcher(); 397 if (Result) { 398 std::optional<DynTypedMatcher> Bound = Result->tryBind(BindID); 399 if (Bound) { 400 *Value = VariantMatcher::SingleMatcher(*Bound); 401 return true; 402 } 403 } 404 return false; 405 } 406 407 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) { 408 Error->addError(Tokenizer->peekNextToken().Range, 409 Error->ET_ParserNoOpenParen) 410 << "NewLine"; 411 return false; 412 } 413 414 // If the syntax is correct and the name is not a matcher either, report 415 // unknown named value. 416 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || 417 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || 418 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine || 419 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && 420 !S->lookupMatcherCtor(NameToken.Text)) { 421 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) 422 << NameToken.Text; 423 return false; 424 } 425 // Otherwise, fallback to the matcher parser. 426 } 427 428 Tokenizer->SkipNewlines(); 429 430 assert(NameToken.Kind == TokenInfo::TK_Ident); 431 TokenInfo OpenToken = Tokenizer->consumeNextToken(); 432 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 433 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 434 << OpenToken.Text; 435 return false; 436 } 437 438 std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text); 439 440 // Parse as a matcher expression. 441 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value); 442 } 443 444 bool Parser::parseBindID(std::string &BindID) { 445 // Parse the parenthesized argument to .bind("foo") 446 const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 447 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 448 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 449 450 // TODO: We could use different error codes for each/some to be more 451 // explicit about the syntax error. 452 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 453 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 454 return false; 455 } 456 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 457 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 458 return false; 459 } 460 if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 461 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 462 return false; 463 } 464 BindID = IDToken.Value.getString(); 465 return true; 466 } 467 468 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken, 469 const TokenInfo &OpenToken, 470 VariantValue *Value) { 471 std::vector<ParserValue> Args; 472 TokenInfo EndToken; 473 474 Tokenizer->SkipNewlines(); 475 476 { 477 ScopedContextEntry SCE(this, Ctor); 478 479 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 480 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 481 // End of args. 482 EndToken = Tokenizer->consumeNextToken(); 483 break; 484 } 485 if (!Args.empty()) { 486 // We must find a , token to continue. 487 TokenInfo CommaToken = Tokenizer->consumeNextToken(); 488 if (CommaToken.Kind != TokenInfo::TK_Comma) { 489 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 490 << CommaToken.Text; 491 return false; 492 } 493 } 494 495 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 496 NameToken.Text, NameToken.Range, 497 Args.size() + 1); 498 ParserValue ArgValue; 499 Tokenizer->SkipNewlines(); 500 501 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) { 502 addExpressionCompletions(); 503 return false; 504 } 505 506 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken(); 507 508 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) { 509 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher) 510 << NameToken.Text; 511 return false; 512 } 513 514 ArgValue.Text = NodeMatcherToken.Text; 515 ArgValue.Range = NodeMatcherToken.Range; 516 517 std::optional<MatcherCtor> MappedMatcher = 518 S->lookupMatcherCtor(ArgValue.Text); 519 520 if (!MappedMatcher) { 521 Error->addError(NodeMatcherToken.Range, 522 Error->ET_RegistryMatcherNotFound) 523 << NodeMatcherToken.Text; 524 return false; 525 } 526 527 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher); 528 529 if (NK.isNone()) { 530 Error->addError(NodeMatcherToken.Range, 531 Error->ET_RegistryNonNodeMatcher) 532 << NodeMatcherToken.Text; 533 return false; 534 } 535 536 ArgValue.Value = NK; 537 538 Tokenizer->SkipNewlines(); 539 Args.push_back(ArgValue); 540 541 SCE.nextArg(); 542 } 543 } 544 545 if (EndToken.Kind == TokenInfo::TK_Eof) { 546 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 547 return false; 548 } 549 550 internal::MatcherDescriptorPtr BuiltCtor = 551 S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error); 552 553 if (!BuiltCtor.get()) { 554 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher) 555 << NameToken.Text; 556 return false; 557 } 558 559 std::string BindID; 560 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 561 Tokenizer->consumeNextToken(); 562 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 563 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 564 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 565 addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1)); 566 return false; 567 } 568 if (ChainCallToken.Kind != TokenInfo::TK_Ident || 569 (ChainCallToken.Text != TokenInfo::ID_Bind && 570 ChainCallToken.Text != TokenInfo::ID_With)) { 571 Error->addError(ChainCallToken.Range, 572 Error->ET_ParserMalformedChainedExpr); 573 return false; 574 } 575 if (ChainCallToken.Text == TokenInfo::ID_Bind) { 576 if (!parseBindID(BindID)) 577 return false; 578 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 579 NameToken.Text, NameToken.Range); 580 SourceRange MatcherRange = NameToken.Range; 581 MatcherRange.End = ChainCallToken.Range.End; 582 VariantMatcher Result = S->actOnMatcherExpression( 583 BuiltCtor.get(), MatcherRange, BindID, {}, Error); 584 if (Result.isNull()) 585 return false; 586 587 *Value = Result; 588 return true; 589 } else if (ChainCallToken.Text == TokenInfo::ID_With) { 590 Tokenizer->SkipNewlines(); 591 592 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 593 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof 594 ? StringRef("EOF") 595 : Tokenizer->peekNextToken().Text; 596 Error->addError(Tokenizer->peekNextToken().Range, 597 Error->ET_ParserNoOpenParen) 598 << ErrTxt; 599 return false; 600 } 601 602 TokenInfo WithOpenToken = Tokenizer->consumeNextToken(); 603 604 return parseMatcherExpressionImpl(NameToken, WithOpenToken, 605 BuiltCtor.get(), Value); 606 } 607 } 608 609 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 610 NameToken.Text, NameToken.Range); 611 SourceRange MatcherRange = NameToken.Range; 612 MatcherRange.End = EndToken.Range.End; 613 VariantMatcher Result = S->actOnMatcherExpression( 614 BuiltCtor.get(), MatcherRange, BindID, {}, Error); 615 if (Result.isNull()) 616 return false; 617 618 *Value = Result; 619 return true; 620 } 621 622 /// Parse and validate a matcher expression. 623 /// \return \c true on success, in which case \c Value has the matcher parsed. 624 /// If the input is malformed, or some argument has an error, it 625 /// returns \c false. 626 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, 627 const TokenInfo &OpenToken, 628 std::optional<MatcherCtor> Ctor, 629 VariantValue *Value) { 630 if (!Ctor) { 631 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound) 632 << NameToken.Text; 633 // Do not return here. We need to continue to give completion suggestions. 634 } 635 636 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor)) 637 return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value); 638 639 std::vector<ParserValue> Args; 640 TokenInfo EndToken; 641 642 Tokenizer->SkipNewlines(); 643 644 { 645 ScopedContextEntry SCE(this, Ctor.value_or(nullptr)); 646 647 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 648 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 649 // End of args. 650 EndToken = Tokenizer->consumeNextToken(); 651 break; 652 } 653 if (!Args.empty()) { 654 // We must find a , token to continue. 655 const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 656 if (CommaToken.Kind != TokenInfo::TK_Comma) { 657 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 658 << CommaToken.Text; 659 return false; 660 } 661 } 662 663 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 664 NameToken.Text, NameToken.Range, 665 Args.size() + 1); 666 ParserValue ArgValue; 667 Tokenizer->SkipNewlines(); 668 ArgValue.Text = Tokenizer->peekNextToken().Text; 669 ArgValue.Range = Tokenizer->peekNextToken().Range; 670 if (!parseExpressionImpl(&ArgValue.Value)) { 671 return false; 672 } 673 674 Tokenizer->SkipNewlines(); 675 Args.push_back(ArgValue); 676 SCE.nextArg(); 677 } 678 } 679 680 if (EndToken.Kind == TokenInfo::TK_Eof) { 681 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 682 return false; 683 } 684 685 std::string BindID; 686 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 687 Tokenizer->consumeNextToken(); 688 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 689 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 690 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 691 return false; 692 } 693 694 if (ChainCallToken.Kind != TokenInfo::TK_Ident) { 695 Error->addError(ChainCallToken.Range, 696 Error->ET_ParserMalformedChainedExpr); 697 return false; 698 } 699 if (ChainCallToken.Text == TokenInfo::ID_With) { 700 701 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 702 NameToken.Text, NameToken.Range); 703 704 Error->addError(ChainCallToken.Range, 705 Error->ET_RegistryMatcherNoWithSupport); 706 return false; 707 } 708 if (ChainCallToken.Text != TokenInfo::ID_Bind) { 709 Error->addError(ChainCallToken.Range, 710 Error->ET_ParserMalformedChainedExpr); 711 return false; 712 } 713 if (!parseBindID(BindID)) 714 return false; 715 } 716 717 if (!Ctor) 718 return false; 719 720 // Merge the start and end infos. 721 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 722 NameToken.Text, NameToken.Range); 723 SourceRange MatcherRange = NameToken.Range; 724 MatcherRange.End = EndToken.Range.End; 725 VariantMatcher Result = S->actOnMatcherExpression( 726 *Ctor, MatcherRange, BindID, Args, Error); 727 if (Result.isNull()) return false; 728 729 *Value = Result; 730 return true; 731 } 732 733 // If the prefix of this completion matches the completion token, add it to 734 // Completions minus the prefix. 735 void Parser::addCompletion(const TokenInfo &CompToken, 736 const MatcherCompletion& Completion) { 737 if (StringRef(Completion.TypedText).starts_with(CompToken.Text) && 738 Completion.Specificity > 0) { 739 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()), 740 Completion.MatcherDecl, Completion.Specificity); 741 } 742 } 743 744 std::vector<MatcherCompletion> Parser::getNamedValueCompletions( 745 ArrayRef<ArgKind> AcceptedTypes) { 746 if (!NamedValues) return std::vector<MatcherCompletion>(); 747 std::vector<MatcherCompletion> Result; 748 for (const auto &Entry : *NamedValues) { 749 unsigned Specificity; 750 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) { 751 std::string Decl = 752 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str(); 753 Result.emplace_back(Entry.getKey(), Decl, Specificity); 754 } 755 } 756 return Result; 757 } 758 759 void Parser::addExpressionCompletions() { 760 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 761 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); 762 763 // We cannot complete code if there is an invalid element on the context 764 // stack. 765 for (ContextStackTy::iterator I = ContextStack.begin(), 766 E = ContextStack.end(); 767 I != E; ++I) { 768 if (!I->first) 769 return; 770 } 771 772 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack); 773 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { 774 addCompletion(CompToken, Completion); 775 } 776 777 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { 778 addCompletion(CompToken, Completion); 779 } 780 } 781 782 /// Parse an <Expression> 783 bool Parser::parseExpressionImpl(VariantValue *Value) { 784 switch (Tokenizer->nextTokenKind()) { 785 case TokenInfo::TK_Literal: 786 *Value = Tokenizer->consumeNextToken().Value; 787 return true; 788 789 case TokenInfo::TK_Ident: 790 return parseIdentifierPrefixImpl(Value); 791 792 case TokenInfo::TK_CodeCompletion: 793 addExpressionCompletions(); 794 return false; 795 796 case TokenInfo::TK_Eof: 797 Error->addError(Tokenizer->consumeNextToken().Range, 798 Error->ET_ParserNoCode); 799 return false; 800 801 case TokenInfo::TK_Error: 802 // This error was already reported by the tokenizer. 803 return false; 804 case TokenInfo::TK_NewLine: 805 case TokenInfo::TK_OpenParen: 806 case TokenInfo::TK_CloseParen: 807 case TokenInfo::TK_Comma: 808 case TokenInfo::TK_Period: 809 case TokenInfo::TK_InvalidChar: 810 const TokenInfo Token = Tokenizer->consumeNextToken(); 811 Error->addError(Token.Range, Error->ET_ParserInvalidToken) 812 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text); 813 return false; 814 } 815 816 llvm_unreachable("Unknown token kind."); 817 } 818 819 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; 820 821 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, 822 const NamedValueMap *NamedValues, Diagnostics *Error) 823 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), 824 NamedValues(NamedValues), Error(Error) {} 825 826 Parser::RegistrySema::~RegistrySema() = default; 827 828 std::optional<MatcherCtor> 829 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { 830 return Registry::lookupMatcherCtor(MatcherName); 831 } 832 833 VariantMatcher Parser::RegistrySema::actOnMatcherExpression( 834 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, 835 ArrayRef<ParserValue> Args, Diagnostics *Error) { 836 if (BindID.empty()) { 837 return Registry::constructMatcher(Ctor, NameRange, Args, Error); 838 } else { 839 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, 840 Error); 841 } 842 } 843 844 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( 845 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 846 return Registry::getAcceptedCompletionTypes(Context); 847 } 848 849 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( 850 ArrayRef<ArgKind> AcceptedTypes) { 851 return Registry::getMatcherCompletions(AcceptedTypes); 852 } 853 854 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const { 855 return Registry::isBuilderMatcher(Ctor); 856 } 857 858 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const { 859 return Registry::nodeMatcherType(Ctor); 860 } 861 862 internal::MatcherDescriptorPtr 863 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange, 864 ArrayRef<ParserValue> Args, 865 Diagnostics *Error) const { 866 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error); 867 } 868 869 bool Parser::parseExpression(StringRef &Code, Sema *S, 870 const NamedValueMap *NamedValues, 871 VariantValue *Value, Diagnostics *Error) { 872 CodeTokenizer Tokenizer(Code, Error); 873 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) 874 return false; 875 auto NT = Tokenizer.peekNextToken(); 876 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) { 877 Error->addError(Tokenizer.peekNextToken().Range, 878 Error->ET_ParserTrailingCode); 879 return false; 880 } 881 return true; 882 } 883 884 std::vector<MatcherCompletion> 885 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, 886 const NamedValueMap *NamedValues) { 887 Diagnostics Error; 888 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); 889 Parser P(&Tokenizer, S, NamedValues, &Error); 890 VariantValue Dummy; 891 P.parseExpressionImpl(&Dummy); 892 893 // Sort by specificity, then by name. 894 llvm::sort(P.Completions, 895 [](const MatcherCompletion &A, const MatcherCompletion &B) { 896 if (A.Specificity != B.Specificity) 897 return A.Specificity > B.Specificity; 898 return A.TypedText < B.TypedText; 899 }); 900 901 return P.Completions; 902 } 903 904 std::optional<DynTypedMatcher> 905 Parser::parseMatcherExpression(StringRef &Code, Sema *S, 906 const NamedValueMap *NamedValues, 907 Diagnostics *Error) { 908 VariantValue Value; 909 if (!parseExpression(Code, S, NamedValues, &Value, Error)) 910 return std::nullopt; 911 if (!Value.isMatcher()) { 912 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 913 return std::nullopt; 914 } 915 std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher(); 916 if (!Result) { 917 Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 918 << Value.getTypeAsString(); 919 } 920 return Result; 921 } 922 923 } // namespace dynamic 924 } // namespace ast_matchers 925 } // namespace clang 926