1 //===- Parser.cpp - Matcher expression parser -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Recursive parser implementation for the matcher expression grammar. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/ASTMatchers/Dynamic/Parser.h" 15 #include "clang/ASTMatchers/ASTMatchersInternal.h" 16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h" 17 #include "clang/ASTMatchers/Dynamic/Registry.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/ManagedStatic.h" 22 #include <algorithm> 23 #include <cassert> 24 #include <cerrno> 25 #include <cstddef> 26 #include <cstdlib> 27 #include <optional> 28 #include <string> 29 #include <utility> 30 #include <vector> 31 32 namespace clang { 33 namespace ast_matchers { 34 namespace dynamic { 35 36 /// Simple structure to hold information for one token from the parser. 37 struct Parser::TokenInfo { 38 /// Different possible tokens. 39 enum TokenKind { 40 TK_Eof, 41 TK_NewLine, 42 TK_OpenParen, 43 TK_CloseParen, 44 TK_Comma, 45 TK_Period, 46 TK_Literal, 47 TK_Ident, 48 TK_InvalidChar, 49 TK_Error, 50 TK_CodeCompletion 51 }; 52 53 /// Some known identifiers. 54 static const char* const ID_Bind; 55 static const char *const ID_With; 56 57 TokenInfo() = default; 58 59 StringRef Text; 60 TokenKind Kind = TK_Eof; 61 SourceRange Range; 62 VariantValue Value; 63 }; 64 65 const char* const Parser::TokenInfo::ID_Bind = "bind"; 66 const char *const Parser::TokenInfo::ID_With = "with"; 67 68 /// Simple tokenizer for the parser. 69 class Parser::CodeTokenizer { 70 public: 71 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error) 72 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { 73 NextToken = getNextToken(); 74 } 75 76 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, 77 unsigned CodeCompletionOffset) 78 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), 79 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { 80 NextToken = getNextToken(); 81 } 82 83 /// Returns but doesn't consume the next token. 84 const TokenInfo &peekNextToken() const { return NextToken; } 85 86 /// Consumes and returns the next token. 87 TokenInfo consumeNextToken() { 88 TokenInfo ThisToken = NextToken; 89 NextToken = getNextToken(); 90 return ThisToken; 91 } 92 93 TokenInfo SkipNewlines() { 94 while (NextToken.Kind == TokenInfo::TK_NewLine) 95 NextToken = getNextToken(); 96 return NextToken; 97 } 98 99 TokenInfo consumeNextTokenIgnoreNewlines() { 100 SkipNewlines(); 101 if (NextToken.Kind == TokenInfo::TK_Eof) 102 return NextToken; 103 return consumeNextToken(); 104 } 105 106 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 107 108 private: 109 TokenInfo getNextToken() { 110 consumeWhitespace(); 111 TokenInfo Result; 112 Result.Range.Start = currentLocation(); 113 114 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { 115 Result.Kind = TokenInfo::TK_CodeCompletion; 116 Result.Text = StringRef(CodeCompletionLocation, 0); 117 CodeCompletionLocation = nullptr; 118 return Result; 119 } 120 121 if (Code.empty()) { 122 Result.Kind = TokenInfo::TK_Eof; 123 Result.Text = ""; 124 return Result; 125 } 126 127 switch (Code[0]) { 128 case '#': 129 Code = Code.drop_until([](char c) { return c == '\n'; }); 130 return getNextToken(); 131 case ',': 132 Result.Kind = TokenInfo::TK_Comma; 133 Result.Text = Code.substr(0, 1); 134 Code = Code.drop_front(); 135 break; 136 case '.': 137 Result.Kind = TokenInfo::TK_Period; 138 Result.Text = Code.substr(0, 1); 139 Code = Code.drop_front(); 140 break; 141 case '\n': 142 ++Line; 143 StartOfLine = Code.drop_front(); 144 Result.Kind = TokenInfo::TK_NewLine; 145 Result.Text = Code.substr(0, 1); 146 Code = Code.drop_front(); 147 break; 148 case '(': 149 Result.Kind = TokenInfo::TK_OpenParen; 150 Result.Text = Code.substr(0, 1); 151 Code = Code.drop_front(); 152 break; 153 case ')': 154 Result.Kind = TokenInfo::TK_CloseParen; 155 Result.Text = Code.substr(0, 1); 156 Code = Code.drop_front(); 157 break; 158 159 case '"': 160 case '\'': 161 // Parse a string literal. 162 consumeStringLiteral(&Result); 163 break; 164 165 case '0': case '1': case '2': case '3': case '4': 166 case '5': case '6': case '7': case '8': case '9': 167 // Parse an unsigned and float literal. 168 consumeNumberLiteral(&Result); 169 break; 170 171 default: 172 if (isAlphanumeric(Code[0])) { 173 // Parse an identifier 174 size_t TokenLength = 1; 175 while (true) { 176 // A code completion location in/immediately after an identifier will 177 // cause the portion of the identifier before the code completion 178 // location to become a code completion token. 179 if (CodeCompletionLocation == Code.data() + TokenLength) { 180 CodeCompletionLocation = nullptr; 181 Result.Kind = TokenInfo::TK_CodeCompletion; 182 Result.Text = Code.substr(0, TokenLength); 183 Code = Code.drop_front(TokenLength); 184 return Result; 185 } 186 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength])) 187 break; 188 ++TokenLength; 189 } 190 if (TokenLength == 4 && Code.starts_with("true")) { 191 Result.Kind = TokenInfo::TK_Literal; 192 Result.Value = true; 193 } else if (TokenLength == 5 && Code.starts_with("false")) { 194 Result.Kind = TokenInfo::TK_Literal; 195 Result.Value = false; 196 } else { 197 Result.Kind = TokenInfo::TK_Ident; 198 Result.Text = Code.substr(0, TokenLength); 199 } 200 Code = Code.drop_front(TokenLength); 201 } else { 202 Result.Kind = TokenInfo::TK_InvalidChar; 203 Result.Text = Code.substr(0, 1); 204 Code = Code.drop_front(1); 205 } 206 break; 207 } 208 209 Result.Range.End = currentLocation(); 210 return Result; 211 } 212 213 /// Consume an unsigned and float literal. 214 void consumeNumberLiteral(TokenInfo *Result) { 215 bool isFloatingLiteral = false; 216 unsigned Length = 1; 217 if (Code.size() > 1) { 218 // Consume the 'x' or 'b' radix modifier, if present. 219 switch (toLowercase(Code[1])) { 220 case 'x': case 'b': Length = 2; 221 } 222 } 223 while (Length < Code.size() && isHexDigit(Code[Length])) 224 ++Length; 225 226 // Try to recognize a floating point literal. 227 while (Length < Code.size()) { 228 char c = Code[Length]; 229 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) { 230 isFloatingLiteral = true; 231 Length++; 232 } else { 233 break; 234 } 235 } 236 237 Result->Text = Code.substr(0, Length); 238 Code = Code.drop_front(Length); 239 240 if (isFloatingLiteral) { 241 char *end; 242 errno = 0; 243 std::string Text = Result->Text.str(); 244 double doubleValue = strtod(Text.c_str(), &end); 245 if (*end == 0 && errno == 0) { 246 Result->Kind = TokenInfo::TK_Literal; 247 Result->Value = doubleValue; 248 return; 249 } 250 } else { 251 unsigned Value; 252 if (!Result->Text.getAsInteger(0, Value)) { 253 Result->Kind = TokenInfo::TK_Literal; 254 Result->Value = Value; 255 return; 256 } 257 } 258 259 SourceRange Range; 260 Range.Start = Result->Range.Start; 261 Range.End = currentLocation(); 262 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text; 263 Result->Kind = TokenInfo::TK_Error; 264 } 265 266 /// Consume a string literal. 267 /// 268 /// \c Code must be positioned at the start of the literal (the opening 269 /// quote). Consumed until it finds the same closing quote character. 270 void consumeStringLiteral(TokenInfo *Result) { 271 bool InEscape = false; 272 const char Marker = Code[0]; 273 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 274 if (InEscape) { 275 InEscape = false; 276 continue; 277 } 278 if (Code[Length] == '\\') { 279 InEscape = true; 280 continue; 281 } 282 if (Code[Length] == Marker) { 283 Result->Kind = TokenInfo::TK_Literal; 284 Result->Text = Code.substr(0, Length + 1); 285 Result->Value = Code.substr(1, Length - 1); 286 Code = Code.drop_front(Length + 1); 287 return; 288 } 289 } 290 291 StringRef ErrorText = Code; 292 Code = Code.drop_front(Code.size()); 293 SourceRange Range; 294 Range.Start = Result->Range.Start; 295 Range.End = currentLocation(); 296 Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 297 Result->Kind = TokenInfo::TK_Error; 298 } 299 300 /// Consume all leading whitespace from \c Code. 301 void consumeWhitespace() { 302 // Don't trim newlines. 303 Code = Code.ltrim(" \t\v\f\r"); 304 } 305 306 SourceLocation currentLocation() { 307 SourceLocation Location; 308 Location.Line = Line; 309 Location.Column = Code.data() - StartOfLine.data() + 1; 310 return Location; 311 } 312 313 StringRef &Code; 314 StringRef StartOfLine; 315 unsigned Line = 1; 316 Diagnostics *Error; 317 TokenInfo NextToken; 318 const char *CodeCompletionLocation = nullptr; 319 }; 320 321 Parser::Sema::~Sema() = default; 322 323 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( 324 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 325 return {}; 326 } 327 328 std::vector<MatcherCompletion> 329 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { 330 return {}; 331 } 332 333 struct Parser::ScopedContextEntry { 334 Parser *P; 335 336 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { 337 P->ContextStack.push_back(std::make_pair(C, 0u)); 338 } 339 340 ~ScopedContextEntry() { 341 P->ContextStack.pop_back(); 342 } 343 344 void nextArg() { 345 ++P->ContextStack.back().second; 346 } 347 }; 348 349 /// Parse expressions that start with an identifier. 350 /// 351 /// This function can parse named values and matchers. 352 /// In case of failure it will try to determine the user's intent to give 353 /// an appropriate error message. 354 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { 355 const TokenInfo NameToken = Tokenizer->consumeNextToken(); 356 357 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 358 // Parse as a named value. 359 if (const VariantValue NamedValue = 360 NamedValues ? NamedValues->lookup(NameToken.Text) 361 : VariantValue()) { 362 363 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) { 364 *Value = NamedValue; 365 return true; 366 } 367 368 std::string BindID; 369 Tokenizer->consumeNextToken(); 370 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 371 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 372 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 373 return false; 374 } 375 376 if (ChainCallToken.Kind != TokenInfo::TK_Ident || 377 (ChainCallToken.Text != TokenInfo::ID_Bind && 378 ChainCallToken.Text != TokenInfo::ID_With)) { 379 Error->addError(ChainCallToken.Range, 380 Error->ET_ParserMalformedChainedExpr); 381 return false; 382 } 383 if (ChainCallToken.Text == TokenInfo::ID_With) { 384 385 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 386 NameToken.Text, NameToken.Range); 387 388 Error->addError(ChainCallToken.Range, 389 Error->ET_RegistryMatcherNoWithSupport); 390 return false; 391 } 392 if (!parseBindID(BindID)) 393 return false; 394 395 assert(NamedValue.isMatcher()); 396 std::optional<DynTypedMatcher> Result = 397 NamedValue.getMatcher().getSingleMatcher(); 398 if (Result) { 399 std::optional<DynTypedMatcher> Bound = Result->tryBind(BindID); 400 if (Bound) { 401 *Value = VariantMatcher::SingleMatcher(*Bound); 402 return true; 403 } 404 } 405 return false; 406 } 407 408 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) { 409 Error->addError(Tokenizer->peekNextToken().Range, 410 Error->ET_ParserNoOpenParen) 411 << "NewLine"; 412 return false; 413 } 414 415 // If the syntax is correct and the name is not a matcher either, report 416 // unknown named value. 417 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || 418 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || 419 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine || 420 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && 421 !S->lookupMatcherCtor(NameToken.Text)) { 422 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) 423 << NameToken.Text; 424 return false; 425 } 426 // Otherwise, fallback to the matcher parser. 427 } 428 429 Tokenizer->SkipNewlines(); 430 431 assert(NameToken.Kind == TokenInfo::TK_Ident); 432 TokenInfo OpenToken = Tokenizer->consumeNextToken(); 433 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 434 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 435 << OpenToken.Text; 436 return false; 437 } 438 439 std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text); 440 441 // Parse as a matcher expression. 442 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value); 443 } 444 445 bool Parser::parseBindID(std::string &BindID) { 446 // Parse the parenthesized argument to .bind("foo") 447 const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 448 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 449 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 450 451 // TODO: We could use different error codes for each/some to be more 452 // explicit about the syntax error. 453 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 454 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 455 return false; 456 } 457 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 458 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 459 return false; 460 } 461 if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 462 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 463 return false; 464 } 465 BindID = IDToken.Value.getString(); 466 return true; 467 } 468 469 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken, 470 const TokenInfo &OpenToken, 471 VariantValue *Value) { 472 std::vector<ParserValue> Args; 473 TokenInfo EndToken; 474 475 Tokenizer->SkipNewlines(); 476 477 { 478 ScopedContextEntry SCE(this, Ctor); 479 480 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 481 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 482 // End of args. 483 EndToken = Tokenizer->consumeNextToken(); 484 break; 485 } 486 if (!Args.empty()) { 487 // We must find a , token to continue. 488 TokenInfo CommaToken = Tokenizer->consumeNextToken(); 489 if (CommaToken.Kind != TokenInfo::TK_Comma) { 490 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 491 << CommaToken.Text; 492 return false; 493 } 494 } 495 496 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 497 NameToken.Text, NameToken.Range, 498 Args.size() + 1); 499 ParserValue ArgValue; 500 Tokenizer->SkipNewlines(); 501 502 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) { 503 addExpressionCompletions(); 504 return false; 505 } 506 507 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken(); 508 509 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) { 510 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher) 511 << NameToken.Text; 512 return false; 513 } 514 515 ArgValue.Text = NodeMatcherToken.Text; 516 ArgValue.Range = NodeMatcherToken.Range; 517 518 std::optional<MatcherCtor> MappedMatcher = 519 S->lookupMatcherCtor(ArgValue.Text); 520 521 if (!MappedMatcher) { 522 Error->addError(NodeMatcherToken.Range, 523 Error->ET_RegistryMatcherNotFound) 524 << NodeMatcherToken.Text; 525 return false; 526 } 527 528 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher); 529 530 if (NK.isNone()) { 531 Error->addError(NodeMatcherToken.Range, 532 Error->ET_RegistryNonNodeMatcher) 533 << NodeMatcherToken.Text; 534 return false; 535 } 536 537 ArgValue.Value = NK; 538 539 Tokenizer->SkipNewlines(); 540 Args.push_back(ArgValue); 541 542 SCE.nextArg(); 543 } 544 } 545 546 if (EndToken.Kind == TokenInfo::TK_Eof) { 547 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 548 return false; 549 } 550 551 internal::MatcherDescriptorPtr BuiltCtor = 552 S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error); 553 554 if (!BuiltCtor.get()) { 555 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher) 556 << NameToken.Text; 557 return false; 558 } 559 560 std::string BindID; 561 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 562 Tokenizer->consumeNextToken(); 563 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 564 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 565 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 566 addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1)); 567 return false; 568 } 569 if (ChainCallToken.Kind != TokenInfo::TK_Ident || 570 (ChainCallToken.Text != TokenInfo::ID_Bind && 571 ChainCallToken.Text != TokenInfo::ID_With)) { 572 Error->addError(ChainCallToken.Range, 573 Error->ET_ParserMalformedChainedExpr); 574 return false; 575 } 576 if (ChainCallToken.Text == TokenInfo::ID_Bind) { 577 if (!parseBindID(BindID)) 578 return false; 579 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 580 NameToken.Text, NameToken.Range); 581 SourceRange MatcherRange = NameToken.Range; 582 MatcherRange.End = ChainCallToken.Range.End; 583 VariantMatcher Result = S->actOnMatcherExpression( 584 BuiltCtor.get(), MatcherRange, BindID, {}, Error); 585 if (Result.isNull()) 586 return false; 587 588 *Value = Result; 589 return true; 590 } else if (ChainCallToken.Text == TokenInfo::ID_With) { 591 Tokenizer->SkipNewlines(); 592 593 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 594 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof 595 ? StringRef("EOF") 596 : Tokenizer->peekNextToken().Text; 597 Error->addError(Tokenizer->peekNextToken().Range, 598 Error->ET_ParserNoOpenParen) 599 << ErrTxt; 600 return false; 601 } 602 603 TokenInfo WithOpenToken = Tokenizer->consumeNextToken(); 604 605 return parseMatcherExpressionImpl(NameToken, WithOpenToken, 606 BuiltCtor.get(), Value); 607 } 608 } 609 610 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 611 NameToken.Text, NameToken.Range); 612 SourceRange MatcherRange = NameToken.Range; 613 MatcherRange.End = EndToken.Range.End; 614 VariantMatcher Result = S->actOnMatcherExpression( 615 BuiltCtor.get(), MatcherRange, BindID, {}, Error); 616 if (Result.isNull()) 617 return false; 618 619 *Value = Result; 620 return true; 621 } 622 623 /// Parse and validate a matcher expression. 624 /// \return \c true on success, in which case \c Value has the matcher parsed. 625 /// If the input is malformed, or some argument has an error, it 626 /// returns \c false. 627 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, 628 const TokenInfo &OpenToken, 629 std::optional<MatcherCtor> Ctor, 630 VariantValue *Value) { 631 if (!Ctor) { 632 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound) 633 << NameToken.Text; 634 // Do not return here. We need to continue to give completion suggestions. 635 } 636 637 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor)) 638 return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value); 639 640 std::vector<ParserValue> Args; 641 TokenInfo EndToken; 642 643 Tokenizer->SkipNewlines(); 644 645 { 646 ScopedContextEntry SCE(this, Ctor.value_or(nullptr)); 647 648 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 649 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 650 // End of args. 651 EndToken = Tokenizer->consumeNextToken(); 652 break; 653 } 654 if (!Args.empty()) { 655 // We must find a , token to continue. 656 const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 657 if (CommaToken.Kind != TokenInfo::TK_Comma) { 658 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 659 << CommaToken.Text; 660 return false; 661 } 662 } 663 664 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 665 NameToken.Text, NameToken.Range, 666 Args.size() + 1); 667 ParserValue ArgValue; 668 Tokenizer->SkipNewlines(); 669 ArgValue.Text = Tokenizer->peekNextToken().Text; 670 ArgValue.Range = Tokenizer->peekNextToken().Range; 671 if (!parseExpressionImpl(&ArgValue.Value)) { 672 return false; 673 } 674 675 Tokenizer->SkipNewlines(); 676 Args.push_back(ArgValue); 677 SCE.nextArg(); 678 } 679 } 680 681 if (EndToken.Kind == TokenInfo::TK_Eof) { 682 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 683 return false; 684 } 685 686 std::string BindID; 687 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 688 Tokenizer->consumeNextToken(); 689 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 690 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 691 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 692 return false; 693 } 694 695 if (ChainCallToken.Kind != TokenInfo::TK_Ident) { 696 Error->addError(ChainCallToken.Range, 697 Error->ET_ParserMalformedChainedExpr); 698 return false; 699 } 700 if (ChainCallToken.Text == TokenInfo::ID_With) { 701 702 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 703 NameToken.Text, NameToken.Range); 704 705 Error->addError(ChainCallToken.Range, 706 Error->ET_RegistryMatcherNoWithSupport); 707 return false; 708 } 709 if (ChainCallToken.Text != TokenInfo::ID_Bind) { 710 Error->addError(ChainCallToken.Range, 711 Error->ET_ParserMalformedChainedExpr); 712 return false; 713 } 714 if (!parseBindID(BindID)) 715 return false; 716 } 717 718 if (!Ctor) 719 return false; 720 721 // Merge the start and end infos. 722 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 723 NameToken.Text, NameToken.Range); 724 SourceRange MatcherRange = NameToken.Range; 725 MatcherRange.End = EndToken.Range.End; 726 VariantMatcher Result = S->actOnMatcherExpression( 727 *Ctor, MatcherRange, BindID, Args, Error); 728 if (Result.isNull()) return false; 729 730 *Value = Result; 731 return true; 732 } 733 734 // If the prefix of this completion matches the completion token, add it to 735 // Completions minus the prefix. 736 void Parser::addCompletion(const TokenInfo &CompToken, 737 const MatcherCompletion& Completion) { 738 if (StringRef(Completion.TypedText).starts_with(CompToken.Text) && 739 Completion.Specificity > 0) { 740 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()), 741 Completion.MatcherDecl, Completion.Specificity); 742 } 743 } 744 745 std::vector<MatcherCompletion> Parser::getNamedValueCompletions( 746 ArrayRef<ArgKind> AcceptedTypes) { 747 if (!NamedValues) return std::vector<MatcherCompletion>(); 748 std::vector<MatcherCompletion> Result; 749 for (const auto &Entry : *NamedValues) { 750 unsigned Specificity; 751 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) { 752 std::string Decl = 753 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str(); 754 Result.emplace_back(Entry.getKey(), Decl, Specificity); 755 } 756 } 757 return Result; 758 } 759 760 void Parser::addExpressionCompletions() { 761 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 762 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); 763 764 // We cannot complete code if there is an invalid element on the context 765 // stack. 766 for (ContextStackTy::iterator I = ContextStack.begin(), 767 E = ContextStack.end(); 768 I != E; ++I) { 769 if (!I->first) 770 return; 771 } 772 773 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack); 774 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { 775 addCompletion(CompToken, Completion); 776 } 777 778 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { 779 addCompletion(CompToken, Completion); 780 } 781 } 782 783 /// Parse an <Expression> 784 bool Parser::parseExpressionImpl(VariantValue *Value) { 785 switch (Tokenizer->nextTokenKind()) { 786 case TokenInfo::TK_Literal: 787 *Value = Tokenizer->consumeNextToken().Value; 788 return true; 789 790 case TokenInfo::TK_Ident: 791 return parseIdentifierPrefixImpl(Value); 792 793 case TokenInfo::TK_CodeCompletion: 794 addExpressionCompletions(); 795 return false; 796 797 case TokenInfo::TK_Eof: 798 Error->addError(Tokenizer->consumeNextToken().Range, 799 Error->ET_ParserNoCode); 800 return false; 801 802 case TokenInfo::TK_Error: 803 // This error was already reported by the tokenizer. 804 return false; 805 case TokenInfo::TK_NewLine: 806 case TokenInfo::TK_OpenParen: 807 case TokenInfo::TK_CloseParen: 808 case TokenInfo::TK_Comma: 809 case TokenInfo::TK_Period: 810 case TokenInfo::TK_InvalidChar: 811 const TokenInfo Token = Tokenizer->consumeNextToken(); 812 Error->addError(Token.Range, Error->ET_ParserInvalidToken) 813 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text); 814 return false; 815 } 816 817 llvm_unreachable("Unknown token kind."); 818 } 819 820 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; 821 822 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, 823 const NamedValueMap *NamedValues, Diagnostics *Error) 824 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), 825 NamedValues(NamedValues), Error(Error) {} 826 827 Parser::RegistrySema::~RegistrySema() = default; 828 829 std::optional<MatcherCtor> 830 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { 831 return Registry::lookupMatcherCtor(MatcherName); 832 } 833 834 VariantMatcher Parser::RegistrySema::actOnMatcherExpression( 835 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, 836 ArrayRef<ParserValue> Args, Diagnostics *Error) { 837 if (BindID.empty()) { 838 return Registry::constructMatcher(Ctor, NameRange, Args, Error); 839 } else { 840 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, 841 Error); 842 } 843 } 844 845 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( 846 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 847 return Registry::getAcceptedCompletionTypes(Context); 848 } 849 850 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( 851 ArrayRef<ArgKind> AcceptedTypes) { 852 return Registry::getMatcherCompletions(AcceptedTypes); 853 } 854 855 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const { 856 return Registry::isBuilderMatcher(Ctor); 857 } 858 859 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const { 860 return Registry::nodeMatcherType(Ctor); 861 } 862 863 internal::MatcherDescriptorPtr 864 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange, 865 ArrayRef<ParserValue> Args, 866 Diagnostics *Error) const { 867 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error); 868 } 869 870 bool Parser::parseExpression(StringRef &Code, Sema *S, 871 const NamedValueMap *NamedValues, 872 VariantValue *Value, Diagnostics *Error) { 873 CodeTokenizer Tokenizer(Code, Error); 874 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) 875 return false; 876 auto NT = Tokenizer.peekNextToken(); 877 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) { 878 Error->addError(Tokenizer.peekNextToken().Range, 879 Error->ET_ParserTrailingCode); 880 return false; 881 } 882 return true; 883 } 884 885 std::vector<MatcherCompletion> 886 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, 887 const NamedValueMap *NamedValues) { 888 Diagnostics Error; 889 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); 890 Parser P(&Tokenizer, S, NamedValues, &Error); 891 VariantValue Dummy; 892 P.parseExpressionImpl(&Dummy); 893 894 // Sort by specificity, then by name. 895 llvm::sort(P.Completions, 896 [](const MatcherCompletion &A, const MatcherCompletion &B) { 897 if (A.Specificity != B.Specificity) 898 return A.Specificity > B.Specificity; 899 return A.TypedText < B.TypedText; 900 }); 901 902 return P.Completions; 903 } 904 905 std::optional<DynTypedMatcher> 906 Parser::parseMatcherExpression(StringRef &Code, Sema *S, 907 const NamedValueMap *NamedValues, 908 Diagnostics *Error) { 909 VariantValue Value; 910 if (!parseExpression(Code, S, NamedValues, &Value, Error)) 911 return std::nullopt; 912 if (!Value.isMatcher()) { 913 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 914 return std::nullopt; 915 } 916 std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher(); 917 if (!Result) { 918 Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 919 << Value.getTypeAsString(); 920 } 921 return Result; 922 } 923 924 } // namespace dynamic 925 } // namespace ast_matchers 926 } // namespace clang 927