1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/AST/Stmt.h" 18 #include "clang/AST/TypeLoc.h" 19 #include "clang/AST/TypeLocVisitor.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/Specifiers.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Lex/LiteralSupport.h" 27 #include "clang/Tooling/Syntax/Nodes.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "clang/Tooling/Syntax/Tree.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/PointerUnion.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/ScopeExit.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/Support/Allocator.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/FormatVariadic.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cstddef> 43 #include <map> 44 45 using namespace clang; 46 47 LLVM_ATTRIBUTE_UNUSED 48 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 49 50 namespace { 51 /// Get start location of the Declarator from the TypeLoc. 52 /// E.g.: 53 /// loc of `(` in `int (a)` 54 /// loc of `*` in `int *(a)` 55 /// loc of the first `(` in `int (*a)(int)` 56 /// loc of the `*` in `int *(a)(int)` 57 /// loc of the first `*` in `const int *const *volatile a;` 58 /// 59 /// It is non-trivial to get the start location because TypeLocs are stored 60 /// inside out. In the example above `*volatile` is the TypeLoc returned 61 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 62 /// returns. 63 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 64 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 65 auto L = Visit(T.getInnerLoc()); 66 if (L.isValid()) 67 return L; 68 return T.getLParenLoc(); 69 } 70 71 // Types spelled in the prefix part of the declarator. 72 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 73 return HandlePointer(T); 74 } 75 76 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 77 return HandlePointer(T); 78 } 79 80 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 81 return HandlePointer(T); 82 } 83 84 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 85 return HandlePointer(T); 86 } 87 88 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 89 return HandlePointer(T); 90 } 91 92 // All other cases are not important, as they are either part of declaration 93 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 94 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 95 // declarator themselves, but their underlying type can. 96 SourceLocation VisitTypeLoc(TypeLoc T) { 97 auto N = T.getNextTypeLoc(); 98 if (!N) 99 return SourceLocation(); 100 return Visit(N); 101 } 102 103 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 104 if (T.getTypePtr()->hasTrailingReturn()) 105 return SourceLocation(); // avoid recursing into the suffix of declarator. 106 return VisitTypeLoc(T); 107 } 108 109 private: 110 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 111 auto L = Visit(T.getPointeeLoc()); 112 if (L.isValid()) 113 return L; 114 return T.getLocalSourceRange().getBegin(); 115 } 116 }; 117 } // namespace 118 119 static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { 120 switch (E.getOperator()) { 121 // Comparison 122 case OO_EqualEqual: 123 case OO_ExclaimEqual: 124 case OO_Greater: 125 case OO_GreaterEqual: 126 case OO_Less: 127 case OO_LessEqual: 128 case OO_Spaceship: 129 // Assignment 130 case OO_Equal: 131 case OO_SlashEqual: 132 case OO_PercentEqual: 133 case OO_CaretEqual: 134 case OO_PipeEqual: 135 case OO_LessLessEqual: 136 case OO_GreaterGreaterEqual: 137 case OO_PlusEqual: 138 case OO_MinusEqual: 139 case OO_StarEqual: 140 case OO_AmpEqual: 141 // Binary computation 142 case OO_Slash: 143 case OO_Percent: 144 case OO_Caret: 145 case OO_Pipe: 146 case OO_LessLess: 147 case OO_GreaterGreater: 148 case OO_AmpAmp: 149 case OO_PipePipe: 150 case OO_ArrowStar: 151 case OO_Comma: 152 return syntax::NodeKind::BinaryOperatorExpression; 153 case OO_Tilde: 154 case OO_Exclaim: 155 return syntax::NodeKind::PrefixUnaryOperatorExpression; 156 // Prefix/Postfix increment/decrement 157 case OO_PlusPlus: 158 case OO_MinusMinus: 159 switch (E.getNumArgs()) { 160 case 1: 161 return syntax::NodeKind::PrefixUnaryOperatorExpression; 162 case 2: 163 return syntax::NodeKind::PostfixUnaryOperatorExpression; 164 default: 165 llvm_unreachable("Invalid number of arguments for operator"); 166 } 167 // Operators that can be unary or binary 168 case OO_Plus: 169 case OO_Minus: 170 case OO_Star: 171 case OO_Amp: 172 switch (E.getNumArgs()) { 173 case 1: 174 return syntax::NodeKind::PrefixUnaryOperatorExpression; 175 case 2: 176 return syntax::NodeKind::BinaryOperatorExpression; 177 default: 178 llvm_unreachable("Invalid number of arguments for operator"); 179 } 180 return syntax::NodeKind::BinaryOperatorExpression; 181 // Not yet supported by SyntaxTree 182 case OO_New: 183 case OO_Delete: 184 case OO_Array_New: 185 case OO_Array_Delete: 186 case OO_Coawait: 187 case OO_Call: 188 case OO_Subscript: 189 case OO_Arrow: 190 return syntax::NodeKind::UnknownExpression; 191 case OO_Conditional: // not overloadable 192 case NUM_OVERLOADED_OPERATORS: 193 case OO_None: 194 llvm_unreachable("Not an overloadable operator"); 195 } 196 llvm_unreachable("Unknown OverloadedOperatorKind enum"); 197 } 198 199 /// Gets the range of declarator as defined by the C++ grammar. E.g. 200 /// `int a;` -> range of `a`, 201 /// `int *a;` -> range of `*a`, 202 /// `int a[10];` -> range of `a[10]`, 203 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 204 /// `int *a = nullptr` -> range of `*a = nullptr`. 205 /// FIMXE: \p Name must be a source range, e.g. for `operator+`. 206 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 207 SourceLocation Name, 208 SourceRange Initializer) { 209 SourceLocation Start = GetStartLoc().Visit(T); 210 SourceLocation End = T.getSourceRange().getEnd(); 211 assert(End.isValid()); 212 if (Name.isValid()) { 213 if (Start.isInvalid()) 214 Start = Name; 215 if (SM.isBeforeInTranslationUnit(End, Name)) 216 End = Name; 217 } 218 if (Initializer.isValid()) { 219 auto InitializerEnd = Initializer.getEnd(); 220 assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || 221 End == InitializerEnd); 222 End = InitializerEnd; 223 } 224 return SourceRange(Start, End); 225 } 226 227 namespace { 228 /// All AST hierarchy roots that can be represented as pointers. 229 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 230 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 231 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 232 /// FIXME: expose this as public API. 233 class ASTToSyntaxMapping { 234 public: 235 void add(ASTPtr From, syntax::Tree *To) { 236 assert(To != nullptr); 237 assert(!From.isNull()); 238 239 bool Added = Nodes.insert({From, To}).second; 240 (void)Added; 241 assert(Added && "mapping added twice"); 242 } 243 244 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 245 246 private: 247 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 248 }; 249 } // namespace 250 251 /// A helper class for constructing the syntax tree while traversing a clang 252 /// AST. 253 /// 254 /// At each point of the traversal we maintain a list of pending nodes. 255 /// Initially all tokens are added as pending nodes. When processing a clang AST 256 /// node, the clients need to: 257 /// - create a corresponding syntax node, 258 /// - assign roles to all pending child nodes with 'markChild' and 259 /// 'markChildToken', 260 /// - replace the child nodes with the new syntax node in the pending list 261 /// with 'foldNode'. 262 /// 263 /// Note that all children are expected to be processed when building a node. 264 /// 265 /// Call finalize() to finish building the tree and consume the root node. 266 class syntax::TreeBuilder { 267 public: 268 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 269 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 270 LocationToToken.insert({T.location().getRawEncoding(), &T}); 271 } 272 273 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 274 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 275 276 /// Populate children for \p New node, assuming it covers tokens from \p 277 /// Range. 278 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 279 ASTPtr From) { 280 assert(New); 281 Pending.foldChildren(Arena, Range, New); 282 if (From) 283 Mapping.add(From, New); 284 } 285 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 286 TypeLoc L) { 287 // FIXME: add mapping for TypeLocs 288 foldNode(Range, New, nullptr); 289 } 290 291 /// Notifies that we should not consume trailing semicolon when computing 292 /// token range of \p D. 293 void noticeDeclWithoutSemicolon(Decl *D); 294 295 /// Mark the \p Child node with a corresponding \p Role. All marked children 296 /// should be consumed by foldNode. 297 /// When called on expressions (clang::Expr is derived from clang::Stmt), 298 /// wraps expressions into expression statement. 299 void markStmtChild(Stmt *Child, NodeRole Role); 300 /// Should be called for expressions in non-statement position to avoid 301 /// wrapping into expression statement. 302 void markExprChild(Expr *Child, NodeRole Role); 303 /// Set role for a token starting at \p Loc. 304 void markChildToken(SourceLocation Loc, NodeRole R); 305 /// Set role for \p T. 306 void markChildToken(const syntax::Token *T, NodeRole R); 307 308 /// Set role for \p N. 309 void markChild(syntax::Node *N, NodeRole R); 310 /// Set role for the syntax node matching \p N. 311 void markChild(ASTPtr N, NodeRole R); 312 313 /// Finish building the tree and consume the root node. 314 syntax::TranslationUnit *finalize() && { 315 auto Tokens = Arena.tokenBuffer().expandedTokens(); 316 assert(!Tokens.empty()); 317 assert(Tokens.back().kind() == tok::eof); 318 319 // Build the root of the tree, consuming all the children. 320 Pending.foldChildren(Arena, Tokens.drop_back(), 321 new (Arena.allocator()) syntax::TranslationUnit); 322 323 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 324 TU->assertInvariantsRecursive(); 325 return TU; 326 } 327 328 /// Finds a token starting at \p L. The token must exist if \p L is valid. 329 const syntax::Token *findToken(SourceLocation L) const; 330 331 /// Finds the syntax tokens corresponding to the \p SourceRange. 332 llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const { 333 assert(Range.isValid()); 334 return getRange(Range.getBegin(), Range.getEnd()); 335 } 336 337 /// Finds the syntax tokens corresponding to the passed source locations. 338 /// \p First is the start position of the first token and \p Last is the start 339 /// position of the last token. 340 llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 341 SourceLocation Last) const { 342 assert(First.isValid()); 343 assert(Last.isValid()); 344 assert(First == Last || 345 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 346 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 347 } 348 349 llvm::ArrayRef<syntax::Token> 350 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 351 auto Tokens = getRange(D->getSourceRange()); 352 return maybeAppendSemicolon(Tokens, D); 353 } 354 355 /// Returns true if \p D is the last declarator in a chain and is thus 356 /// reponsible for creating SimpleDeclaration for the whole chain. 357 template <class T> 358 bool isResponsibleForCreatingDeclaration(const T *D) const { 359 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 360 std::is_base_of<TypedefNameDecl, T>::value), 361 "only DeclaratorDecl and TypedefNameDecl are supported."); 362 363 const Decl *Next = D->getNextDeclInContext(); 364 365 // There's no next sibling, this one is responsible. 366 if (Next == nullptr) { 367 return true; 368 } 369 const auto *NextT = llvm::dyn_cast<T>(Next); 370 371 // Next sibling is not the same type, this one is responsible. 372 if (NextT == nullptr) { 373 return true; 374 } 375 // Next sibling doesn't begin at the same loc, it must be a different 376 // declaration, so this declarator is responsible. 377 if (NextT->getBeginLoc() != D->getBeginLoc()) { 378 return true; 379 } 380 381 // NextT is a member of the same declaration, and we need the last member to 382 // create declaration. This one is not responsible. 383 return false; 384 } 385 386 llvm::ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { 387 llvm::ArrayRef<clang::syntax::Token> Tokens; 388 // We want to drop the template parameters for specializations. 389 if (const auto *S = llvm::dyn_cast<TagDecl>(D)) 390 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 391 else 392 Tokens = getRange(D->getSourceRange()); 393 return maybeAppendSemicolon(Tokens, D); 394 } 395 396 llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 397 return getRange(E->getSourceRange()); 398 } 399 400 /// Find the adjusted range for the statement, consuming the trailing 401 /// semicolon when needed. 402 llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 403 auto Tokens = getRange(S->getSourceRange()); 404 if (isa<CompoundStmt>(S)) 405 return Tokens; 406 407 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 408 // all statements that end with those. Consume this semicolon here. 409 if (Tokens.back().kind() == tok::semi) 410 return Tokens; 411 return withTrailingSemicolon(Tokens); 412 } 413 414 private: 415 llvm::ArrayRef<syntax::Token> 416 maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens, 417 const Decl *D) const { 418 if (llvm::isa<NamespaceDecl>(D)) 419 return Tokens; 420 if (DeclsWithoutSemicolons.count(D)) 421 return Tokens; 422 // FIXME: do not consume trailing semicolon on function definitions. 423 // Most declarations own a semicolon in syntax trees, but not in clang AST. 424 return withTrailingSemicolon(Tokens); 425 } 426 427 llvm::ArrayRef<syntax::Token> 428 withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { 429 assert(!Tokens.empty()); 430 assert(Tokens.back().kind() != tok::eof); 431 // We never consume 'eof', so looking at the next token is ok. 432 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 433 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 434 return Tokens; 435 } 436 437 void setRole(syntax::Node *N, NodeRole R) { 438 assert(N->role() == NodeRole::Detached); 439 N->setRole(R); 440 } 441 442 /// A collection of trees covering the input tokens. 443 /// When created, each tree corresponds to a single token in the file. 444 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 445 /// node and update the list of trees accordingly. 446 /// 447 /// Ensures that added nodes properly nest and cover the whole token stream. 448 struct Forest { 449 Forest(syntax::Arena &A) { 450 assert(!A.tokenBuffer().expandedTokens().empty()); 451 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 452 // Create all leaf nodes. 453 // Note that we do not have 'eof' in the tree. 454 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 455 auto *L = new (A.allocator()) syntax::Leaf(&T); 456 L->Original = true; 457 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 458 Trees.insert(Trees.end(), {&T, L}); 459 } 460 } 461 462 void assignRole(llvm::ArrayRef<syntax::Token> Range, 463 syntax::NodeRole Role) { 464 assert(!Range.empty()); 465 auto It = Trees.lower_bound(Range.begin()); 466 assert(It != Trees.end() && "no node found"); 467 assert(It->first == Range.begin() && "no child with the specified range"); 468 assert((std::next(It) == Trees.end() || 469 std::next(It)->first == Range.end()) && 470 "no child with the specified range"); 471 assert(It->second->role() == NodeRole::Detached && 472 "re-assigning role for a child"); 473 It->second->setRole(Role); 474 } 475 476 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 477 void foldChildren(const syntax::Arena &A, 478 llvm::ArrayRef<syntax::Token> Tokens, 479 syntax::Tree *Node) { 480 // Attach children to `Node`. 481 assert(Node->firstChild() == nullptr && "node already has children"); 482 483 auto *FirstToken = Tokens.begin(); 484 auto BeginChildren = Trees.lower_bound(FirstToken); 485 486 assert((BeginChildren == Trees.end() || 487 BeginChildren->first == FirstToken) && 488 "fold crosses boundaries of existing subtrees"); 489 auto EndChildren = Trees.lower_bound(Tokens.end()); 490 assert( 491 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 492 "fold crosses boundaries of existing subtrees"); 493 494 // We need to go in reverse order, because we can only prepend. 495 for (auto It = EndChildren; It != BeginChildren; --It) { 496 auto *C = std::prev(It)->second; 497 if (C->role() == NodeRole::Detached) 498 C->setRole(NodeRole::Unknown); 499 Node->prependChildLowLevel(C); 500 } 501 502 // Mark that this node came from the AST and is backed by the source code. 503 Node->Original = true; 504 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 505 506 Trees.erase(BeginChildren, EndChildren); 507 Trees.insert({FirstToken, Node}); 508 } 509 510 // EXPECTS: all tokens were consumed and are owned by a single root node. 511 syntax::Node *finalize() && { 512 assert(Trees.size() == 1); 513 auto *Root = Trees.begin()->second; 514 Trees = {}; 515 return Root; 516 } 517 518 std::string str(const syntax::Arena &A) const { 519 std::string R; 520 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 521 unsigned CoveredTokens = 522 It != Trees.end() 523 ? (std::next(It)->first - It->first) 524 : A.tokenBuffer().expandedTokens().end() - It->first; 525 526 R += std::string(llvm::formatv( 527 "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 528 It->first->text(A.sourceManager()), CoveredTokens)); 529 R += It->second->dump(A); 530 } 531 return R; 532 } 533 534 private: 535 /// Maps from the start token to a subtree starting at that token. 536 /// Keys in the map are pointers into the array of expanded tokens, so 537 /// pointer order corresponds to the order of preprocessor tokens. 538 std::map<const syntax::Token *, syntax::Node *> Trees; 539 }; 540 541 /// For debugging purposes. 542 std::string str() { return Pending.str(Arena); } 543 544 syntax::Arena &Arena; 545 /// To quickly find tokens by their start location. 546 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 547 LocationToToken; 548 Forest Pending; 549 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 550 ASTToSyntaxMapping Mapping; 551 }; 552 553 namespace { 554 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 555 public: 556 explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) 557 : Builder(Builder), Context(Context) {} 558 559 bool shouldTraversePostOrder() const { return true; } 560 561 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 562 return processDeclaratorAndDeclaration(DD); 563 } 564 565 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { 566 return processDeclaratorAndDeclaration(TD); 567 } 568 569 bool VisitDecl(Decl *D) { 570 assert(!D->isImplicit()); 571 Builder.foldNode(Builder.getDeclarationRange(D), 572 new (allocator()) syntax::UnknownDeclaration(), D); 573 return true; 574 } 575 576 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 577 // override Traverse. 578 // FIXME: make RAV call WalkUpFrom* instead. 579 bool 580 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 581 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 582 return false; 583 if (C->isExplicitSpecialization()) 584 return true; // we are only interested in explicit instantiations. 585 auto *Declaration = 586 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 587 foldExplicitTemplateInstantiation( 588 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 589 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 590 return true; 591 } 592 593 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 594 foldTemplateDeclaration( 595 Builder.getDeclarationRange(S), 596 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 597 Builder.getDeclarationRange(S->getTemplatedDecl()), S); 598 return true; 599 } 600 601 bool WalkUpFromTagDecl(TagDecl *C) { 602 // FIXME: build the ClassSpecifier node. 603 if (!C->isFreeStanding()) { 604 assert(C->getNumTemplateParameterLists() == 0); 605 return true; 606 } 607 handleFreeStandingTagDecl(C); 608 return true; 609 } 610 611 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 612 assert(C->isFreeStanding()); 613 // Class is a declaration specifier and needs a spanning declaration node. 614 auto DeclarationRange = Builder.getDeclarationRange(C); 615 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 616 Builder.foldNode(DeclarationRange, Result, nullptr); 617 618 // Build TemplateDeclaration nodes if we had template parameters. 619 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 620 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 621 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 622 Result = 623 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 624 DeclarationRange = R; 625 }; 626 if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 627 ConsumeTemplateParameters(*S->getTemplateParameters()); 628 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 629 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 630 return Result; 631 } 632 633 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 634 // We do not want to call VisitDecl(), the declaration for translation 635 // unit is built by finalize(). 636 return true; 637 } 638 639 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 640 using NodeRole = syntax::NodeRole; 641 642 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 643 for (auto *Child : S->body()) 644 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 645 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 646 647 Builder.foldNode(Builder.getStmtRange(S), 648 new (allocator()) syntax::CompoundStatement, S); 649 return true; 650 } 651 652 // Some statements are not yet handled by syntax trees. 653 bool WalkUpFromStmt(Stmt *S) { 654 Builder.foldNode(Builder.getStmtRange(S), 655 new (allocator()) syntax::UnknownStatement, S); 656 return true; 657 } 658 659 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 660 // We override to traverse range initializer as VarDecl. 661 // RAV traverses it as a statement, we produce invalid node kinds in that 662 // case. 663 // FIXME: should do this in RAV instead? 664 bool Result = [&, this]() { 665 if (S->getInit() && !TraverseStmt(S->getInit())) 666 return false; 667 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 668 return false; 669 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 670 return false; 671 if (S->getBody() && !TraverseStmt(S->getBody())) 672 return false; 673 return true; 674 }(); 675 WalkUpFromCXXForRangeStmt(S); 676 return Result; 677 } 678 679 bool TraverseStmt(Stmt *S) { 680 if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { 681 // We want to consume the semicolon, make sure SimpleDeclaration does not. 682 for (auto *D : DS->decls()) 683 Builder.noticeDeclWithoutSemicolon(D); 684 } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 685 return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); 686 } 687 return RecursiveASTVisitor::TraverseStmt(S); 688 } 689 690 // Some expressions are not yet handled by syntax trees. 691 bool WalkUpFromExpr(Expr *E) { 692 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 693 Builder.foldNode(Builder.getExprRange(E), 694 new (allocator()) syntax::UnknownExpression, E); 695 return true; 696 } 697 698 syntax::NestedNameSpecifier * 699 BuildNestedNameSpecifier(NestedNameSpecifierLoc QualifierLoc) { 700 if (!QualifierLoc) 701 return nullptr; 702 for (auto it = QualifierLoc; it; it = it.getPrefix()) { 703 auto *NS = new (allocator()) syntax::NameSpecifier; 704 Builder.foldNode(Builder.getRange(it.getLocalSourceRange()), NS, nullptr); 705 Builder.markChild(NS, syntax::NodeRole::NestedNameSpecifier_specifier); 706 } 707 auto *NNS = new (allocator()) syntax::NestedNameSpecifier; 708 Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), NNS, 709 nullptr); 710 return NNS; 711 } 712 713 bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { 714 // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node 715 // referencing the location of the UDL suffix (`_w` in `1.2_w`). The 716 // UDL suffix location does not point to the beginning of a token, so we 717 // can't represent the UDL suffix as a separate syntax tree node. 718 719 return WalkUpFromUserDefinedLiteral(S); 720 } 721 722 syntax::UserDefinedLiteralExpression * 723 buildUserDefinedLiteral(UserDefinedLiteral *S) { 724 switch (S->getLiteralOperatorKind()) { 725 case clang::UserDefinedLiteral::LOK_Integer: 726 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 727 case clang::UserDefinedLiteral::LOK_Floating: 728 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 729 case clang::UserDefinedLiteral::LOK_Character: 730 return new (allocator()) syntax::CharUserDefinedLiteralExpression; 731 case clang::UserDefinedLiteral::LOK_String: 732 return new (allocator()) syntax::StringUserDefinedLiteralExpression; 733 case clang::UserDefinedLiteral::LOK_Raw: 734 case clang::UserDefinedLiteral::LOK_Template: 735 // For raw literal operator and numeric literal operator template we 736 // cannot get the type of the operand in the semantic AST. We get this 737 // information from the token. As integer and floating point have the same 738 // token kind, we run `NumericLiteralParser` again to distinguish them. 739 auto TokLoc = S->getBeginLoc(); 740 auto TokSpelling = 741 Builder.findToken(TokLoc)->text(Context.getSourceManager()); 742 auto Literal = 743 NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), 744 Context.getLangOpts(), Context.getTargetInfo(), 745 Context.getDiagnostics()); 746 if (Literal.isIntegerLiteral()) 747 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 748 else { 749 assert(Literal.isFloatingLiteral()); 750 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 751 } 752 } 753 llvm_unreachable("Unknown literal operator kind."); 754 } 755 756 bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { 757 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 758 Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); 759 return true; 760 } 761 762 bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { 763 if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc())) 764 Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier); 765 766 auto *unqualifiedId = new (allocator()) syntax::UnqualifiedId; 767 // Get `UnqualifiedId` from `DeclRefExpr`. 768 // FIXME: Extract this logic so that it can be used by `MemberExpr`, 769 // and other semantic constructs, now it is tied to `DeclRefExpr`. 770 if (!S->hasExplicitTemplateArgs()) { 771 Builder.foldNode(Builder.getRange(S->getNameInfo().getSourceRange()), 772 unqualifiedId, nullptr); 773 } else { 774 auto templateIdSourceRange = 775 SourceRange(S->getNameInfo().getBeginLoc(), S->getRAngleLoc()); 776 Builder.foldNode(Builder.getRange(templateIdSourceRange), unqualifiedId, 777 nullptr); 778 } 779 Builder.markChild(unqualifiedId, syntax::NodeRole::IdExpression_id); 780 781 Builder.foldNode(Builder.getExprRange(S), 782 new (allocator()) syntax::IdExpression, S); 783 return true; 784 } 785 786 bool WalkUpFromParenExpr(ParenExpr *S) { 787 Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); 788 Builder.markExprChild(S->getSubExpr(), 789 syntax::NodeRole::ParenExpression_subExpression); 790 Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); 791 Builder.foldNode(Builder.getExprRange(S), 792 new (allocator()) syntax::ParenExpression, S); 793 return true; 794 } 795 796 bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { 797 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 798 Builder.foldNode(Builder.getExprRange(S), 799 new (allocator()) syntax::IntegerLiteralExpression, S); 800 return true; 801 } 802 803 bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { 804 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 805 Builder.foldNode(Builder.getExprRange(S), 806 new (allocator()) syntax::CharacterLiteralExpression, S); 807 return true; 808 } 809 810 bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { 811 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 812 Builder.foldNode(Builder.getExprRange(S), 813 new (allocator()) syntax::FloatingLiteralExpression, S); 814 return true; 815 } 816 817 bool WalkUpFromStringLiteral(StringLiteral *S) { 818 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 819 Builder.foldNode(Builder.getExprRange(S), 820 new (allocator()) syntax::StringLiteralExpression, S); 821 return true; 822 } 823 824 bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { 825 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 826 Builder.foldNode(Builder.getExprRange(S), 827 new (allocator()) syntax::BoolLiteralExpression, S); 828 return true; 829 } 830 831 bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { 832 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 833 Builder.foldNode(Builder.getExprRange(S), 834 new (allocator()) syntax::CxxNullPtrExpression, S); 835 return true; 836 } 837 838 bool WalkUpFromUnaryOperator(UnaryOperator *S) { 839 Builder.markChildToken(S->getOperatorLoc(), 840 syntax::NodeRole::OperatorExpression_operatorToken); 841 Builder.markExprChild(S->getSubExpr(), 842 syntax::NodeRole::UnaryOperatorExpression_operand); 843 844 if (S->isPostfix()) 845 Builder.foldNode(Builder.getExprRange(S), 846 new (allocator()) syntax::PostfixUnaryOperatorExpression, 847 S); 848 else 849 Builder.foldNode(Builder.getExprRange(S), 850 new (allocator()) syntax::PrefixUnaryOperatorExpression, 851 S); 852 853 return true; 854 } 855 856 bool WalkUpFromBinaryOperator(BinaryOperator *S) { 857 Builder.markExprChild( 858 S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 859 Builder.markChildToken(S->getOperatorLoc(), 860 syntax::NodeRole::OperatorExpression_operatorToken); 861 Builder.markExprChild( 862 S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 863 Builder.foldNode(Builder.getExprRange(S), 864 new (allocator()) syntax::BinaryOperatorExpression, S); 865 return true; 866 } 867 868 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 869 if (getOperatorNodeKind(*S) == 870 syntax::NodeKind::PostfixUnaryOperatorExpression) { 871 // A postfix unary operator is declared as taking two operands. The 872 // second operand is used to distinguish from its prefix counterpart. In 873 // the semantic AST this "phantom" operand is represented as a 874 // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this 875 // operand because it does not correspond to anything written in source 876 // code 877 for (auto *child : S->children()) { 878 if (child->getSourceRange().isInvalid()) 879 continue; 880 if (!TraverseStmt(child)) 881 return false; 882 } 883 return WalkUpFromCXXOperatorCallExpr(S); 884 } else 885 return RecursiveASTVisitor::TraverseCXXOperatorCallExpr(S); 886 } 887 888 bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 889 switch (getOperatorNodeKind(*S)) { 890 case syntax::NodeKind::BinaryOperatorExpression: 891 Builder.markExprChild( 892 S->getArg(0), 893 syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 894 Builder.markChildToken( 895 S->getOperatorLoc(), 896 syntax::NodeRole::OperatorExpression_operatorToken); 897 Builder.markExprChild( 898 S->getArg(1), 899 syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 900 Builder.foldNode(Builder.getExprRange(S), 901 new (allocator()) syntax::BinaryOperatorExpression, S); 902 return true; 903 case syntax::NodeKind::PrefixUnaryOperatorExpression: 904 Builder.markChildToken( 905 S->getOperatorLoc(), 906 syntax::NodeRole::OperatorExpression_operatorToken); 907 Builder.markExprChild(S->getArg(0), 908 syntax::NodeRole::UnaryOperatorExpression_operand); 909 Builder.foldNode(Builder.getExprRange(S), 910 new (allocator()) syntax::PrefixUnaryOperatorExpression, 911 S); 912 return true; 913 case syntax::NodeKind::PostfixUnaryOperatorExpression: 914 Builder.markChildToken( 915 S->getOperatorLoc(), 916 syntax::NodeRole::OperatorExpression_operatorToken); 917 Builder.markExprChild(S->getArg(0), 918 syntax::NodeRole::UnaryOperatorExpression_operand); 919 Builder.foldNode(Builder.getExprRange(S), 920 new (allocator()) syntax::PostfixUnaryOperatorExpression, 921 S); 922 return true; 923 case syntax::NodeKind::UnknownExpression: 924 return RecursiveASTVisitor::WalkUpFromCXXOperatorCallExpr(S); 925 default: 926 llvm_unreachable("getOperatorNodeKind() does not return this value"); 927 } 928 } 929 930 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 931 auto Tokens = Builder.getDeclarationRange(S); 932 if (Tokens.front().kind() == tok::coloncolon) { 933 // Handle nested namespace definitions. Those start at '::' token, e.g. 934 // namespace a^::b {} 935 // FIXME: build corresponding nodes for the name of this namespace. 936 return true; 937 } 938 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 939 return true; 940 } 941 942 bool TraverseParenTypeLoc(ParenTypeLoc L) { 943 // We reverse order of traversal to get the proper syntax structure. 944 if (!WalkUpFromParenTypeLoc(L)) 945 return false; 946 return TraverseTypeLoc(L.getInnerLoc()); 947 } 948 949 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 950 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 951 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 952 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 953 new (allocator()) syntax::ParenDeclarator, L); 954 return true; 955 } 956 957 // Declarator chunks, they are produced by type locs and some clang::Decls. 958 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 959 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 960 Builder.markExprChild(L.getSizeExpr(), 961 syntax::NodeRole::ArraySubscript_sizeExpression); 962 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 963 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 964 new (allocator()) syntax::ArraySubscript, L); 965 return true; 966 } 967 968 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 969 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 970 for (auto *P : L.getParams()) { 971 Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter); 972 } 973 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 974 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 975 new (allocator()) syntax::ParametersAndQualifiers, L); 976 return true; 977 } 978 979 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 980 if (!L.getTypePtr()->hasTrailingReturn()) 981 return WalkUpFromFunctionTypeLoc(L); 982 983 auto *TrailingReturnTokens = BuildTrailingReturn(L); 984 // Finish building the node for parameters. 985 Builder.markChild(TrailingReturnTokens, 986 syntax::NodeRole::ParametersAndQualifiers_trailingReturn); 987 return WalkUpFromFunctionTypeLoc(L); 988 } 989 990 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 991 auto SR = L.getLocalSourceRange(); 992 Builder.foldNode(Builder.getRange(SR), 993 new (allocator()) syntax::MemberPointer, L); 994 return true; 995 } 996 997 // The code below is very regular, it could even be generated with some 998 // preprocessor magic. We merely assign roles to the corresponding children 999 // and fold resulting nodes. 1000 bool WalkUpFromDeclStmt(DeclStmt *S) { 1001 Builder.foldNode(Builder.getStmtRange(S), 1002 new (allocator()) syntax::DeclarationStatement, S); 1003 return true; 1004 } 1005 1006 bool WalkUpFromNullStmt(NullStmt *S) { 1007 Builder.foldNode(Builder.getStmtRange(S), 1008 new (allocator()) syntax::EmptyStatement, S); 1009 return true; 1010 } 1011 1012 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 1013 Builder.markChildToken(S->getSwitchLoc(), 1014 syntax::NodeRole::IntroducerKeyword); 1015 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1016 Builder.foldNode(Builder.getStmtRange(S), 1017 new (allocator()) syntax::SwitchStatement, S); 1018 return true; 1019 } 1020 1021 bool WalkUpFromCaseStmt(CaseStmt *S) { 1022 Builder.markChildToken(S->getKeywordLoc(), 1023 syntax::NodeRole::IntroducerKeyword); 1024 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 1025 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1026 Builder.foldNode(Builder.getStmtRange(S), 1027 new (allocator()) syntax::CaseStatement, S); 1028 return true; 1029 } 1030 1031 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 1032 Builder.markChildToken(S->getKeywordLoc(), 1033 syntax::NodeRole::IntroducerKeyword); 1034 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1035 Builder.foldNode(Builder.getStmtRange(S), 1036 new (allocator()) syntax::DefaultStatement, S); 1037 return true; 1038 } 1039 1040 bool WalkUpFromIfStmt(IfStmt *S) { 1041 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 1042 Builder.markStmtChild(S->getThen(), 1043 syntax::NodeRole::IfStatement_thenStatement); 1044 Builder.markChildToken(S->getElseLoc(), 1045 syntax::NodeRole::IfStatement_elseKeyword); 1046 Builder.markStmtChild(S->getElse(), 1047 syntax::NodeRole::IfStatement_elseStatement); 1048 Builder.foldNode(Builder.getStmtRange(S), 1049 new (allocator()) syntax::IfStatement, S); 1050 return true; 1051 } 1052 1053 bool WalkUpFromForStmt(ForStmt *S) { 1054 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1055 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1056 Builder.foldNode(Builder.getStmtRange(S), 1057 new (allocator()) syntax::ForStatement, S); 1058 return true; 1059 } 1060 1061 bool WalkUpFromWhileStmt(WhileStmt *S) { 1062 Builder.markChildToken(S->getWhileLoc(), 1063 syntax::NodeRole::IntroducerKeyword); 1064 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1065 Builder.foldNode(Builder.getStmtRange(S), 1066 new (allocator()) syntax::WhileStatement, S); 1067 return true; 1068 } 1069 1070 bool WalkUpFromContinueStmt(ContinueStmt *S) { 1071 Builder.markChildToken(S->getContinueLoc(), 1072 syntax::NodeRole::IntroducerKeyword); 1073 Builder.foldNode(Builder.getStmtRange(S), 1074 new (allocator()) syntax::ContinueStatement, S); 1075 return true; 1076 } 1077 1078 bool WalkUpFromBreakStmt(BreakStmt *S) { 1079 Builder.markChildToken(S->getBreakLoc(), 1080 syntax::NodeRole::IntroducerKeyword); 1081 Builder.foldNode(Builder.getStmtRange(S), 1082 new (allocator()) syntax::BreakStatement, S); 1083 return true; 1084 } 1085 1086 bool WalkUpFromReturnStmt(ReturnStmt *S) { 1087 Builder.markChildToken(S->getReturnLoc(), 1088 syntax::NodeRole::IntroducerKeyword); 1089 Builder.markExprChild(S->getRetValue(), 1090 syntax::NodeRole::ReturnStatement_value); 1091 Builder.foldNode(Builder.getStmtRange(S), 1092 new (allocator()) syntax::ReturnStatement, S); 1093 return true; 1094 } 1095 1096 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 1097 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1098 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1099 Builder.foldNode(Builder.getStmtRange(S), 1100 new (allocator()) syntax::RangeBasedForStatement, S); 1101 return true; 1102 } 1103 1104 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 1105 Builder.foldNode(Builder.getDeclarationRange(S), 1106 new (allocator()) syntax::EmptyDeclaration, S); 1107 return true; 1108 } 1109 1110 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 1111 Builder.markExprChild(S->getAssertExpr(), 1112 syntax::NodeRole::StaticAssertDeclaration_condition); 1113 Builder.markExprChild(S->getMessage(), 1114 syntax::NodeRole::StaticAssertDeclaration_message); 1115 Builder.foldNode(Builder.getDeclarationRange(S), 1116 new (allocator()) syntax::StaticAssertDeclaration, S); 1117 return true; 1118 } 1119 1120 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 1121 Builder.foldNode(Builder.getDeclarationRange(S), 1122 new (allocator()) syntax::LinkageSpecificationDeclaration, 1123 S); 1124 return true; 1125 } 1126 1127 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 1128 Builder.foldNode(Builder.getDeclarationRange(S), 1129 new (allocator()) syntax::NamespaceAliasDefinition, S); 1130 return true; 1131 } 1132 1133 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 1134 Builder.foldNode(Builder.getDeclarationRange(S), 1135 new (allocator()) syntax::UsingNamespaceDirective, S); 1136 return true; 1137 } 1138 1139 bool WalkUpFromUsingDecl(UsingDecl *S) { 1140 Builder.foldNode(Builder.getDeclarationRange(S), 1141 new (allocator()) syntax::UsingDeclaration, S); 1142 return true; 1143 } 1144 1145 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 1146 Builder.foldNode(Builder.getDeclarationRange(S), 1147 new (allocator()) syntax::UsingDeclaration, S); 1148 return true; 1149 } 1150 1151 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 1152 Builder.foldNode(Builder.getDeclarationRange(S), 1153 new (allocator()) syntax::UsingDeclaration, S); 1154 return true; 1155 } 1156 1157 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 1158 Builder.foldNode(Builder.getDeclarationRange(S), 1159 new (allocator()) syntax::TypeAliasDeclaration, S); 1160 return true; 1161 } 1162 1163 private: 1164 template <class T> SourceLocation getQualifiedNameStart(T *D) { 1165 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 1166 std::is_base_of<TypedefNameDecl, T>::value), 1167 "only DeclaratorDecl and TypedefNameDecl are supported."); 1168 1169 auto DN = D->getDeclName(); 1170 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 1171 if (IsAnonymous) 1172 return SourceLocation(); 1173 1174 if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D)) { 1175 if (DD->getQualifierLoc()) { 1176 return DD->getQualifierLoc().getBeginLoc(); 1177 } 1178 } 1179 1180 return D->getLocation(); 1181 } 1182 1183 SourceRange getInitializerRange(Decl *D) { 1184 if (auto *V = llvm::dyn_cast<VarDecl>(D)) { 1185 auto *I = V->getInit(); 1186 // Initializers in range-based-for are not part of the declarator 1187 if (I && !V->isCXXForRangeDecl()) 1188 return I->getSourceRange(); 1189 } 1190 1191 return SourceRange(); 1192 } 1193 1194 /// Folds SimpleDeclarator node (if present) and in case this is the last 1195 /// declarator in the chain it also folds SimpleDeclaration node. 1196 template <class T> bool processDeclaratorAndDeclaration(T *D) { 1197 SourceRange Initializer = getInitializerRange(D); 1198 auto Range = getDeclaratorRange(Builder.sourceManager(), 1199 D->getTypeSourceInfo()->getTypeLoc(), 1200 getQualifiedNameStart(D), Initializer); 1201 1202 // There doesn't have to be a declarator (e.g. `void foo(int)` only has 1203 // declaration, but no declarator). 1204 if (Range.getBegin().isValid()) { 1205 auto *N = new (allocator()) syntax::SimpleDeclarator; 1206 Builder.foldNode(Builder.getRange(Range), N, nullptr); 1207 Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); 1208 } 1209 1210 if (Builder.isResponsibleForCreatingDeclaration(D)) { 1211 Builder.foldNode(Builder.getDeclarationRange(D), 1212 new (allocator()) syntax::SimpleDeclaration, D); 1213 } 1214 return true; 1215 } 1216 1217 /// Returns the range of the built node. 1218 syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { 1219 assert(L.getTypePtr()->hasTrailingReturn()); 1220 1221 auto ReturnedType = L.getReturnLoc(); 1222 // Build node for the declarator, if any. 1223 auto ReturnDeclaratorRange = 1224 getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, 1225 /*Name=*/SourceLocation(), 1226 /*Initializer=*/SourceLocation()); 1227 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 1228 if (ReturnDeclaratorRange.isValid()) { 1229 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 1230 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 1231 ReturnDeclarator, nullptr); 1232 } 1233 1234 // Build node for trailing return type. 1235 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 1236 const auto *Arrow = Return.begin() - 1; 1237 assert(Arrow->kind() == tok::arrow); 1238 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 1239 Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); 1240 if (ReturnDeclarator) 1241 Builder.markChild(ReturnDeclarator, 1242 syntax::NodeRole::TrailingReturnType_declarator); 1243 auto *R = new (allocator()) syntax::TrailingReturnType; 1244 Builder.foldNode(Tokens, R, L); 1245 return R; 1246 } 1247 1248 void foldExplicitTemplateInstantiation( 1249 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 1250 const syntax::Token *TemplateKW, 1251 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 1252 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 1253 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1254 Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); 1255 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1256 Builder.markChild( 1257 InnerDeclaration, 1258 syntax::NodeRole::ExplicitTemplateInstantiation_declaration); 1259 Builder.foldNode( 1260 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 1261 } 1262 1263 syntax::TemplateDeclaration *foldTemplateDeclaration( 1264 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 1265 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 1266 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1267 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1268 1269 auto *N = new (allocator()) syntax::TemplateDeclaration; 1270 Builder.foldNode(Range, N, From); 1271 Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration); 1272 return N; 1273 } 1274 1275 /// A small helper to save some typing. 1276 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 1277 1278 syntax::TreeBuilder &Builder; 1279 const ASTContext &Context; 1280 }; 1281 } // namespace 1282 1283 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 1284 DeclsWithoutSemicolons.insert(D); 1285 } 1286 1287 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 1288 if (Loc.isInvalid()) 1289 return; 1290 Pending.assignRole(*findToken(Loc), Role); 1291 } 1292 1293 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 1294 if (!T) 1295 return; 1296 Pending.assignRole(*T, R); 1297 } 1298 1299 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 1300 assert(N); 1301 setRole(N, R); 1302 } 1303 1304 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 1305 auto *SN = Mapping.find(N); 1306 assert(SN != nullptr); 1307 setRole(SN, R); 1308 } 1309 1310 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 1311 if (!Child) 1312 return; 1313 1314 syntax::Tree *ChildNode; 1315 if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { 1316 // This is an expression in a statement position, consume the trailing 1317 // semicolon and form an 'ExpressionStatement' node. 1318 markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression); 1319 ChildNode = new (allocator()) syntax::ExpressionStatement; 1320 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1321 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1322 } else { 1323 ChildNode = Mapping.find(Child); 1324 } 1325 assert(ChildNode != nullptr); 1326 setRole(ChildNode, Role); 1327 } 1328 1329 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1330 if (!Child) 1331 return; 1332 Child = Child->IgnoreImplicit(); 1333 1334 syntax::Tree *ChildNode = Mapping.find(Child); 1335 assert(ChildNode != nullptr); 1336 setRole(ChildNode, Role); 1337 } 1338 1339 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1340 if (L.isInvalid()) 1341 return nullptr; 1342 auto It = LocationToToken.find(L.getRawEncoding()); 1343 assert(It != LocationToToken.end()); 1344 return It->second; 1345 } 1346 1347 syntax::TranslationUnit * 1348 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1349 TreeBuilder Builder(A); 1350 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1351 return std::move(Builder).finalize(); 1352 } 1353