1 //===- Parser.cpp - Matcher expression parser -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Recursive parser implementation for the matcher expression grammar. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/ASTMatchers/Dynamic/Parser.h" 15 #include "clang/ASTMatchers/ASTMatchersInternal.h" 16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h" 17 #include "clang/ASTMatchers/Dynamic/Registry.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/ManagedStatic.h" 22 #include <algorithm> 23 #include <cassert> 24 #include <cerrno> 25 #include <cstddef> 26 #include <cstdlib> 27 #include <optional> 28 #include <string> 29 #include <utility> 30 #include <vector> 31 32 namespace clang { 33 namespace ast_matchers { 34 namespace dynamic { 35 36 /// Simple structure to hold information for one token from the parser. 37 struct Parser::TokenInfo { 38 /// Different possible tokens. 39 enum TokenKind { 40 TK_Eof, 41 TK_NewLine, 42 TK_OpenParen, 43 TK_CloseParen, 44 TK_Comma, 45 TK_Period, 46 TK_Literal, 47 TK_Ident, 48 TK_InvalidChar, 49 TK_Error, 50 TK_CodeCompletion 51 }; 52 53 /// Some known identifiers. 54 static const char* const ID_Bind; 55 static const char *const ID_With; 56 57 TokenInfo() = default; 58 59 StringRef Text; 60 TokenKind Kind = TK_Eof; 61 SourceRange Range; 62 VariantValue Value; 63 }; 64 65 const char* const Parser::TokenInfo::ID_Bind = "bind"; 66 const char *const Parser::TokenInfo::ID_With = "with"; 67 68 /// Simple tokenizer for the parser. 69 class Parser::CodeTokenizer { 70 public: 71 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error) 72 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { 73 NextToken = getNextToken(); 74 } 75 76 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, 77 unsigned CodeCompletionOffset) 78 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), 79 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { 80 NextToken = getNextToken(); 81 } 82 83 /// Returns but doesn't consume the next token. 84 const TokenInfo &peekNextToken() const { return NextToken; } 85 86 /// Consumes and returns the next token. 87 TokenInfo consumeNextToken() { 88 TokenInfo ThisToken = NextToken; 89 NextToken = getNextToken(); 90 return ThisToken; 91 } 92 93 TokenInfo SkipNewlines() { 94 while (NextToken.Kind == TokenInfo::TK_NewLine) 95 NextToken = getNextToken(); 96 return NextToken; 97 } 98 99 TokenInfo consumeNextTokenIgnoreNewlines() { 100 SkipNewlines(); 101 if (NextToken.Kind == TokenInfo::TK_Eof) 102 return NextToken; 103 return consumeNextToken(); 104 } 105 106 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 107 108 private: 109 TokenInfo getNextToken() { 110 consumeWhitespace(); 111 TokenInfo Result; 112 Result.Range.Start = currentLocation(); 113 114 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { 115 Result.Kind = TokenInfo::TK_CodeCompletion; 116 Result.Text = StringRef(CodeCompletionLocation, 0); 117 CodeCompletionLocation = nullptr; 118 return Result; 119 } 120 121 if (Code.empty()) { 122 Result.Kind = TokenInfo::TK_Eof; 123 Result.Text = ""; 124 return Result; 125 } 126 127 switch (Code[0]) { 128 case '#': 129 Code = Code.drop_until([](char c) { return c == '\n'; }); 130 return getNextToken(); 131 case ',': 132 Result.Kind = TokenInfo::TK_Comma; 133 Result.Text = Code.substr(0, 1); 134 Code = Code.drop_front(); 135 break; 136 case '.': 137 Result.Kind = TokenInfo::TK_Period; 138 Result.Text = Code.substr(0, 1); 139 Code = Code.drop_front(); 140 break; 141 case '\n': 142 ++Line; 143 StartOfLine = Code.drop_front(); 144 Result.Kind = TokenInfo::TK_NewLine; 145 Result.Text = Code.substr(0, 1); 146 Code = Code.drop_front(); 147 break; 148 case '(': 149 Result.Kind = TokenInfo::TK_OpenParen; 150 Result.Text = Code.substr(0, 1); 151 Code = Code.drop_front(); 152 break; 153 case ')': 154 Result.Kind = TokenInfo::TK_CloseParen; 155 Result.Text = Code.substr(0, 1); 156 Code = Code.drop_front(); 157 break; 158 159 case '"': 160 case '\'': 161 // Parse a string literal. 162 consumeStringLiteral(&Result); 163 break; 164 165 case '0': case '1': case '2': case '3': case '4': 166 case '5': case '6': case '7': case '8': case '9': 167 // Parse an unsigned and float literal. 168 consumeNumberLiteral(&Result); 169 break; 170 171 default: 172 if (isAlphanumeric(Code[0])) { 173 // Parse an identifier 174 size_t TokenLength = 1; 175 while (true) { 176 // A code completion location in/immediately after an identifier will 177 // cause the portion of the identifier before the code completion 178 // location to become a code completion token. 179 if (CodeCompletionLocation == Code.data() + TokenLength) { 180 CodeCompletionLocation = nullptr; 181 Result.Kind = TokenInfo::TK_CodeCompletion; 182 Result.Text = Code.substr(0, TokenLength); 183 Code = Code.drop_front(TokenLength); 184 return Result; 185 } 186 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength])) 187 break; 188 ++TokenLength; 189 } 190 if (TokenLength == 4 && Code.startswith("true")) { 191 Result.Kind = TokenInfo::TK_Literal; 192 Result.Value = true; 193 } else if (TokenLength == 5 && Code.startswith("false")) { 194 Result.Kind = TokenInfo::TK_Literal; 195 Result.Value = false; 196 } else { 197 Result.Kind = TokenInfo::TK_Ident; 198 Result.Text = Code.substr(0, TokenLength); 199 } 200 Code = Code.drop_front(TokenLength); 201 } else { 202 Result.Kind = TokenInfo::TK_InvalidChar; 203 Result.Text = Code.substr(0, 1); 204 Code = Code.drop_front(1); 205 } 206 break; 207 } 208 209 Result.Range.End = currentLocation(); 210 return Result; 211 } 212 213 /// Consume an unsigned and float literal. 214 void consumeNumberLiteral(TokenInfo *Result) { 215 bool isFloatingLiteral = false; 216 unsigned Length = 1; 217 if (Code.size() > 1) { 218 // Consume the 'x' or 'b' radix modifier, if present. 219 switch (toLowercase(Code[1])) { 220 case 'x': case 'b': Length = 2; 221 } 222 } 223 while (Length < Code.size() && isHexDigit(Code[Length])) 224 ++Length; 225 226 // Try to recognize a floating point literal. 227 while (Length < Code.size()) { 228 char c = Code[Length]; 229 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) { 230 isFloatingLiteral = true; 231 Length++; 232 } else { 233 break; 234 } 235 } 236 237 Result->Text = Code.substr(0, Length); 238 Code = Code.drop_front(Length); 239 240 if (isFloatingLiteral) { 241 char *end; 242 errno = 0; 243 std::string Text = Result->Text.str(); 244 double doubleValue = strtod(Text.c_str(), &end); 245 if (*end == 0 && errno == 0) { 246 Result->Kind = TokenInfo::TK_Literal; 247 Result->Value = doubleValue; 248 return; 249 } 250 } else { 251 unsigned Value; 252 if (!Result->Text.getAsInteger(0, Value)) { 253 Result->Kind = TokenInfo::TK_Literal; 254 Result->Value = Value; 255 return; 256 } 257 } 258 259 SourceRange Range; 260 Range.Start = Result->Range.Start; 261 Range.End = currentLocation(); 262 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text; 263 Result->Kind = TokenInfo::TK_Error; 264 } 265 266 /// Consume a string literal. 267 /// 268 /// \c Code must be positioned at the start of the literal (the opening 269 /// quote). Consumed until it finds the same closing quote character. 270 void consumeStringLiteral(TokenInfo *Result) { 271 bool InEscape = false; 272 const char Marker = Code[0]; 273 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 274 if (InEscape) { 275 InEscape = false; 276 continue; 277 } 278 if (Code[Length] == '\\') { 279 InEscape = true; 280 continue; 281 } 282 if (Code[Length] == Marker) { 283 Result->Kind = TokenInfo::TK_Literal; 284 Result->Text = Code.substr(0, Length + 1); 285 Result->Value = Code.substr(1, Length - 1); 286 Code = Code.drop_front(Length + 1); 287 return; 288 } 289 } 290 291 StringRef ErrorText = Code; 292 Code = Code.drop_front(Code.size()); 293 SourceRange Range; 294 Range.Start = Result->Range.Start; 295 Range.End = currentLocation(); 296 Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 297 Result->Kind = TokenInfo::TK_Error; 298 } 299 300 /// Consume all leading whitespace from \c Code. 301 void consumeWhitespace() { 302 Code = Code.drop_while([](char c) { 303 // Don't trim newlines. 304 return StringRef(" \t\v\f\r").contains(c); 305 }); 306 } 307 308 SourceLocation currentLocation() { 309 SourceLocation Location; 310 Location.Line = Line; 311 Location.Column = Code.data() - StartOfLine.data() + 1; 312 return Location; 313 } 314 315 StringRef &Code; 316 StringRef StartOfLine; 317 unsigned Line = 1; 318 Diagnostics *Error; 319 TokenInfo NextToken; 320 const char *CodeCompletionLocation = nullptr; 321 }; 322 323 Parser::Sema::~Sema() = default; 324 325 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( 326 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 327 return {}; 328 } 329 330 std::vector<MatcherCompletion> 331 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { 332 return {}; 333 } 334 335 struct Parser::ScopedContextEntry { 336 Parser *P; 337 338 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { 339 P->ContextStack.push_back(std::make_pair(C, 0u)); 340 } 341 342 ~ScopedContextEntry() { 343 P->ContextStack.pop_back(); 344 } 345 346 void nextArg() { 347 ++P->ContextStack.back().second; 348 } 349 }; 350 351 /// Parse expressions that start with an identifier. 352 /// 353 /// This function can parse named values and matchers. 354 /// In case of failure it will try to determine the user's intent to give 355 /// an appropriate error message. 356 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { 357 const TokenInfo NameToken = Tokenizer->consumeNextToken(); 358 359 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 360 // Parse as a named value. 361 if (const VariantValue NamedValue = 362 NamedValues ? NamedValues->lookup(NameToken.Text) 363 : VariantValue()) { 364 365 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) { 366 *Value = NamedValue; 367 return true; 368 } 369 370 std::string BindID; 371 Tokenizer->consumeNextToken(); 372 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 373 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 374 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 375 return false; 376 } 377 378 if (ChainCallToken.Kind != TokenInfo::TK_Ident || 379 (ChainCallToken.Text != TokenInfo::ID_Bind && 380 ChainCallToken.Text != TokenInfo::ID_With)) { 381 Error->addError(ChainCallToken.Range, 382 Error->ET_ParserMalformedChainedExpr); 383 return false; 384 } 385 if (ChainCallToken.Text == TokenInfo::ID_With) { 386 387 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 388 NameToken.Text, NameToken.Range); 389 390 Error->addError(ChainCallToken.Range, 391 Error->ET_RegistryMatcherNoWithSupport); 392 return false; 393 } 394 if (!parseBindID(BindID)) 395 return false; 396 397 assert(NamedValue.isMatcher()); 398 std::optional<DynTypedMatcher> Result = 399 NamedValue.getMatcher().getSingleMatcher(); 400 if (Result) { 401 std::optional<DynTypedMatcher> Bound = Result->tryBind(BindID); 402 if (Bound) { 403 *Value = VariantMatcher::SingleMatcher(*Bound); 404 return true; 405 } 406 } 407 return false; 408 } 409 410 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) { 411 Error->addError(Tokenizer->peekNextToken().Range, 412 Error->ET_ParserNoOpenParen) 413 << "NewLine"; 414 return false; 415 } 416 417 // If the syntax is correct and the name is not a matcher either, report 418 // unknown named value. 419 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || 420 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || 421 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine || 422 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && 423 !S->lookupMatcherCtor(NameToken.Text)) { 424 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) 425 << NameToken.Text; 426 return false; 427 } 428 // Otherwise, fallback to the matcher parser. 429 } 430 431 Tokenizer->SkipNewlines(); 432 433 assert(NameToken.Kind == TokenInfo::TK_Ident); 434 TokenInfo OpenToken = Tokenizer->consumeNextToken(); 435 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 436 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 437 << OpenToken.Text; 438 return false; 439 } 440 441 std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text); 442 443 // Parse as a matcher expression. 444 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value); 445 } 446 447 bool Parser::parseBindID(std::string &BindID) { 448 // Parse the parenthesized argument to .bind("foo") 449 const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 450 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 451 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 452 453 // TODO: We could use different error codes for each/some to be more 454 // explicit about the syntax error. 455 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 456 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 457 return false; 458 } 459 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 460 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 461 return false; 462 } 463 if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 464 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 465 return false; 466 } 467 BindID = IDToken.Value.getString(); 468 return true; 469 } 470 471 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken, 472 const TokenInfo &OpenToken, 473 VariantValue *Value) { 474 std::vector<ParserValue> Args; 475 TokenInfo EndToken; 476 477 Tokenizer->SkipNewlines(); 478 479 { 480 ScopedContextEntry SCE(this, Ctor); 481 482 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 483 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 484 // End of args. 485 EndToken = Tokenizer->consumeNextToken(); 486 break; 487 } 488 if (!Args.empty()) { 489 // We must find a , token to continue. 490 TokenInfo CommaToken = Tokenizer->consumeNextToken(); 491 if (CommaToken.Kind != TokenInfo::TK_Comma) { 492 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 493 << CommaToken.Text; 494 return false; 495 } 496 } 497 498 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 499 NameToken.Text, NameToken.Range, 500 Args.size() + 1); 501 ParserValue ArgValue; 502 Tokenizer->SkipNewlines(); 503 504 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) { 505 addExpressionCompletions(); 506 return false; 507 } 508 509 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken(); 510 511 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) { 512 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher) 513 << NameToken.Text; 514 return false; 515 } 516 517 ArgValue.Text = NodeMatcherToken.Text; 518 ArgValue.Range = NodeMatcherToken.Range; 519 520 std::optional<MatcherCtor> MappedMatcher = 521 S->lookupMatcherCtor(ArgValue.Text); 522 523 if (!MappedMatcher) { 524 Error->addError(NodeMatcherToken.Range, 525 Error->ET_RegistryMatcherNotFound) 526 << NodeMatcherToken.Text; 527 return false; 528 } 529 530 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher); 531 532 if (NK.isNone()) { 533 Error->addError(NodeMatcherToken.Range, 534 Error->ET_RegistryNonNodeMatcher) 535 << NodeMatcherToken.Text; 536 return false; 537 } 538 539 ArgValue.Value = NK; 540 541 Tokenizer->SkipNewlines(); 542 Args.push_back(ArgValue); 543 544 SCE.nextArg(); 545 } 546 } 547 548 if (EndToken.Kind == TokenInfo::TK_Eof) { 549 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 550 return false; 551 } 552 553 internal::MatcherDescriptorPtr BuiltCtor = 554 S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error); 555 556 if (!BuiltCtor.get()) { 557 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher) 558 << NameToken.Text; 559 return false; 560 } 561 562 std::string BindID; 563 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 564 Tokenizer->consumeNextToken(); 565 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 566 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 567 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 568 addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1)); 569 return false; 570 } 571 if (ChainCallToken.Kind != TokenInfo::TK_Ident || 572 (ChainCallToken.Text != TokenInfo::ID_Bind && 573 ChainCallToken.Text != TokenInfo::ID_With)) { 574 Error->addError(ChainCallToken.Range, 575 Error->ET_ParserMalformedChainedExpr); 576 return false; 577 } 578 if (ChainCallToken.Text == TokenInfo::ID_Bind) { 579 if (!parseBindID(BindID)) 580 return false; 581 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 582 NameToken.Text, NameToken.Range); 583 SourceRange MatcherRange = NameToken.Range; 584 MatcherRange.End = ChainCallToken.Range.End; 585 VariantMatcher Result = S->actOnMatcherExpression( 586 BuiltCtor.get(), MatcherRange, BindID, {}, Error); 587 if (Result.isNull()) 588 return false; 589 590 *Value = Result; 591 return true; 592 } else if (ChainCallToken.Text == TokenInfo::ID_With) { 593 Tokenizer->SkipNewlines(); 594 595 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 596 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof 597 ? StringRef("EOF") 598 : Tokenizer->peekNextToken().Text; 599 Error->addError(Tokenizer->peekNextToken().Range, 600 Error->ET_ParserNoOpenParen) 601 << ErrTxt; 602 return false; 603 } 604 605 TokenInfo WithOpenToken = Tokenizer->consumeNextToken(); 606 607 return parseMatcherExpressionImpl(NameToken, WithOpenToken, 608 BuiltCtor.get(), Value); 609 } 610 } 611 612 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 613 NameToken.Text, NameToken.Range); 614 SourceRange MatcherRange = NameToken.Range; 615 MatcherRange.End = EndToken.Range.End; 616 VariantMatcher Result = S->actOnMatcherExpression( 617 BuiltCtor.get(), MatcherRange, BindID, {}, Error); 618 if (Result.isNull()) 619 return false; 620 621 *Value = Result; 622 return true; 623 } 624 625 /// Parse and validate a matcher expression. 626 /// \return \c true on success, in which case \c Value has the matcher parsed. 627 /// If the input is malformed, or some argument has an error, it 628 /// returns \c false. 629 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, 630 const TokenInfo &OpenToken, 631 std::optional<MatcherCtor> Ctor, 632 VariantValue *Value) { 633 if (!Ctor) { 634 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound) 635 << NameToken.Text; 636 // Do not return here. We need to continue to give completion suggestions. 637 } 638 639 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor)) 640 return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value); 641 642 std::vector<ParserValue> Args; 643 TokenInfo EndToken; 644 645 Tokenizer->SkipNewlines(); 646 647 { 648 ScopedContextEntry SCE(this, Ctor.value_or(nullptr)); 649 650 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 651 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 652 // End of args. 653 EndToken = Tokenizer->consumeNextToken(); 654 break; 655 } 656 if (!Args.empty()) { 657 // We must find a , token to continue. 658 const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 659 if (CommaToken.Kind != TokenInfo::TK_Comma) { 660 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 661 << CommaToken.Text; 662 return false; 663 } 664 } 665 666 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 667 NameToken.Text, NameToken.Range, 668 Args.size() + 1); 669 ParserValue ArgValue; 670 Tokenizer->SkipNewlines(); 671 ArgValue.Text = Tokenizer->peekNextToken().Text; 672 ArgValue.Range = Tokenizer->peekNextToken().Range; 673 if (!parseExpressionImpl(&ArgValue.Value)) { 674 return false; 675 } 676 677 Tokenizer->SkipNewlines(); 678 Args.push_back(ArgValue); 679 SCE.nextArg(); 680 } 681 } 682 683 if (EndToken.Kind == TokenInfo::TK_Eof) { 684 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 685 return false; 686 } 687 688 std::string BindID; 689 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 690 Tokenizer->consumeNextToken(); 691 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 692 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 693 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 694 return false; 695 } 696 697 if (ChainCallToken.Kind != TokenInfo::TK_Ident) { 698 Error->addError(ChainCallToken.Range, 699 Error->ET_ParserMalformedChainedExpr); 700 return false; 701 } 702 if (ChainCallToken.Text == TokenInfo::ID_With) { 703 704 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 705 NameToken.Text, NameToken.Range); 706 707 Error->addError(ChainCallToken.Range, 708 Error->ET_RegistryMatcherNoWithSupport); 709 return false; 710 } 711 if (ChainCallToken.Text != TokenInfo::ID_Bind) { 712 Error->addError(ChainCallToken.Range, 713 Error->ET_ParserMalformedChainedExpr); 714 return false; 715 } 716 if (!parseBindID(BindID)) 717 return false; 718 } 719 720 if (!Ctor) 721 return false; 722 723 // Merge the start and end infos. 724 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 725 NameToken.Text, NameToken.Range); 726 SourceRange MatcherRange = NameToken.Range; 727 MatcherRange.End = EndToken.Range.End; 728 VariantMatcher Result = S->actOnMatcherExpression( 729 *Ctor, MatcherRange, BindID, Args, Error); 730 if (Result.isNull()) return false; 731 732 *Value = Result; 733 return true; 734 } 735 736 // If the prefix of this completion matches the completion token, add it to 737 // Completions minus the prefix. 738 void Parser::addCompletion(const TokenInfo &CompToken, 739 const MatcherCompletion& Completion) { 740 if (StringRef(Completion.TypedText).startswith(CompToken.Text) && 741 Completion.Specificity > 0) { 742 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()), 743 Completion.MatcherDecl, Completion.Specificity); 744 } 745 } 746 747 std::vector<MatcherCompletion> Parser::getNamedValueCompletions( 748 ArrayRef<ArgKind> AcceptedTypes) { 749 if (!NamedValues) return std::vector<MatcherCompletion>(); 750 std::vector<MatcherCompletion> Result; 751 for (const auto &Entry : *NamedValues) { 752 unsigned Specificity; 753 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) { 754 std::string Decl = 755 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str(); 756 Result.emplace_back(Entry.getKey(), Decl, Specificity); 757 } 758 } 759 return Result; 760 } 761 762 void Parser::addExpressionCompletions() { 763 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 764 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); 765 766 // We cannot complete code if there is an invalid element on the context 767 // stack. 768 for (ContextStackTy::iterator I = ContextStack.begin(), 769 E = ContextStack.end(); 770 I != E; ++I) { 771 if (!I->first) 772 return; 773 } 774 775 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack); 776 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { 777 addCompletion(CompToken, Completion); 778 } 779 780 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { 781 addCompletion(CompToken, Completion); 782 } 783 } 784 785 /// Parse an <Expression> 786 bool Parser::parseExpressionImpl(VariantValue *Value) { 787 switch (Tokenizer->nextTokenKind()) { 788 case TokenInfo::TK_Literal: 789 *Value = Tokenizer->consumeNextToken().Value; 790 return true; 791 792 case TokenInfo::TK_Ident: 793 return parseIdentifierPrefixImpl(Value); 794 795 case TokenInfo::TK_CodeCompletion: 796 addExpressionCompletions(); 797 return false; 798 799 case TokenInfo::TK_Eof: 800 Error->addError(Tokenizer->consumeNextToken().Range, 801 Error->ET_ParserNoCode); 802 return false; 803 804 case TokenInfo::TK_Error: 805 // This error was already reported by the tokenizer. 806 return false; 807 case TokenInfo::TK_NewLine: 808 case TokenInfo::TK_OpenParen: 809 case TokenInfo::TK_CloseParen: 810 case TokenInfo::TK_Comma: 811 case TokenInfo::TK_Period: 812 case TokenInfo::TK_InvalidChar: 813 const TokenInfo Token = Tokenizer->consumeNextToken(); 814 Error->addError(Token.Range, Error->ET_ParserInvalidToken) 815 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text); 816 return false; 817 } 818 819 llvm_unreachable("Unknown token kind."); 820 } 821 822 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; 823 824 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, 825 const NamedValueMap *NamedValues, Diagnostics *Error) 826 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), 827 NamedValues(NamedValues), Error(Error) {} 828 829 Parser::RegistrySema::~RegistrySema() = default; 830 831 std::optional<MatcherCtor> 832 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { 833 return Registry::lookupMatcherCtor(MatcherName); 834 } 835 836 VariantMatcher Parser::RegistrySema::actOnMatcherExpression( 837 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, 838 ArrayRef<ParserValue> Args, Diagnostics *Error) { 839 if (BindID.empty()) { 840 return Registry::constructMatcher(Ctor, NameRange, Args, Error); 841 } else { 842 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, 843 Error); 844 } 845 } 846 847 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( 848 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 849 return Registry::getAcceptedCompletionTypes(Context); 850 } 851 852 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( 853 ArrayRef<ArgKind> AcceptedTypes) { 854 return Registry::getMatcherCompletions(AcceptedTypes); 855 } 856 857 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const { 858 return Registry::isBuilderMatcher(Ctor); 859 } 860 861 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const { 862 return Registry::nodeMatcherType(Ctor); 863 } 864 865 internal::MatcherDescriptorPtr 866 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange, 867 ArrayRef<ParserValue> Args, 868 Diagnostics *Error) const { 869 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error); 870 } 871 872 bool Parser::parseExpression(StringRef &Code, Sema *S, 873 const NamedValueMap *NamedValues, 874 VariantValue *Value, Diagnostics *Error) { 875 CodeTokenizer Tokenizer(Code, Error); 876 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) 877 return false; 878 auto NT = Tokenizer.peekNextToken(); 879 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) { 880 Error->addError(Tokenizer.peekNextToken().Range, 881 Error->ET_ParserTrailingCode); 882 return false; 883 } 884 return true; 885 } 886 887 std::vector<MatcherCompletion> 888 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, 889 const NamedValueMap *NamedValues) { 890 Diagnostics Error; 891 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); 892 Parser P(&Tokenizer, S, NamedValues, &Error); 893 VariantValue Dummy; 894 P.parseExpressionImpl(&Dummy); 895 896 // Sort by specificity, then by name. 897 llvm::sort(P.Completions, 898 [](const MatcherCompletion &A, const MatcherCompletion &B) { 899 if (A.Specificity != B.Specificity) 900 return A.Specificity > B.Specificity; 901 return A.TypedText < B.TypedText; 902 }); 903 904 return P.Completions; 905 } 906 907 std::optional<DynTypedMatcher> 908 Parser::parseMatcherExpression(StringRef &Code, Sema *S, 909 const NamedValueMap *NamedValues, 910 Diagnostics *Error) { 911 VariantValue Value; 912 if (!parseExpression(Code, S, NamedValues, &Value, Error)) 913 return std::nullopt; 914 if (!Value.isMatcher()) { 915 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 916 return std::nullopt; 917 } 918 std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher(); 919 if (!Result) { 920 Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 921 << Value.getTypeAsString(); 922 } 923 return Result; 924 } 925 926 } // namespace dynamic 927 } // namespace ast_matchers 928 } // namespace clang 929