1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/Decl.h" 10 #include "clang/AST/DeclBase.h" 11 #include "clang/AST/RecursiveASTVisitor.h" 12 #include "clang/AST/Stmt.h" 13 #include "clang/Basic/LLVM.h" 14 #include "clang/Basic/SourceLocation.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Basic/TokenKinds.h" 17 #include "clang/Lex/Lexer.h" 18 #include "clang/Tooling/Syntax/Nodes.h" 19 #include "clang/Tooling/Syntax/Tokens.h" 20 #include "clang/Tooling/Syntax/Tree.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/Compiler.h" 27 #include "llvm/Support/FormatVariadic.h" 28 #include "llvm/Support/MemoryBuffer.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <map> 31 32 using namespace clang; 33 34 LLVM_ATTRIBUTE_UNUSED 35 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 36 37 /// A helper class for constructing the syntax tree while traversing a clang 38 /// AST. 39 /// 40 /// At each point of the traversal we maintain a list of pending nodes. 41 /// Initially all tokens are added as pending nodes. When processing a clang AST 42 /// node, the clients need to: 43 /// - create a corresponding syntax node, 44 /// - assign roles to all pending child nodes with 'markChild' and 45 /// 'markChildToken', 46 /// - replace the child nodes with the new syntax node in the pending list 47 /// with 'foldNode'. 48 /// 49 /// Note that all children are expected to be processed when building a node. 50 /// 51 /// Call finalize() to finish building the tree and consume the root node. 52 class syntax::TreeBuilder { 53 public: 54 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 55 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 56 LocationToToken.insert({T.location().getRawEncoding(), &T}); 57 } 58 59 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 60 61 /// Populate children for \p New node, assuming it covers tokens from \p 62 /// Range. 63 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); 64 65 /// Must be called with the range of each `DeclaratorDecl`. Ensures the 66 /// corresponding declarator nodes are covered by `SimpleDeclaration`. 67 void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range); 68 69 /// Notifies that we should not consume trailing semicolon when computing 70 /// token range of \p D. 71 void noticeDeclaratorWithoutSemicolon(Decl *D); 72 73 /// Mark the \p Child node with a corresponding \p Role. All marked children 74 /// should be consumed by foldNode. 75 /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), 76 /// wraps expressions into expression statement. 77 void markStmtChild(Stmt *Child, NodeRole Role); 78 /// Should be called for expressions in non-statement position to avoid 79 /// wrapping into expression statement. 80 void markExprChild(Expr *Child, NodeRole Role); 81 82 /// Set role for a token starting at \p Loc. 83 void markChildToken(SourceLocation Loc, NodeRole R); 84 85 /// Finish building the tree and consume the root node. 86 syntax::TranslationUnit *finalize() && { 87 auto Tokens = Arena.tokenBuffer().expandedTokens(); 88 assert(!Tokens.empty()); 89 assert(Tokens.back().kind() == tok::eof); 90 91 // Build the root of the tree, consuming all the children. 92 Pending.foldChildren(Arena, Tokens.drop_back(), 93 new (Arena.allocator()) syntax::TranslationUnit); 94 95 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 96 TU->assertInvariantsRecursive(); 97 return TU; 98 } 99 100 /// getRange() finds the syntax tokens corresponding to the passed source 101 /// locations. 102 /// \p First is the start position of the first token and \p Last is the start 103 /// position of the last token. 104 llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 105 SourceLocation Last) const { 106 assert(First.isValid()); 107 assert(Last.isValid()); 108 assert(First == Last || 109 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 110 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 111 } 112 llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { 113 auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); 114 if (llvm::isa<NamespaceDecl>(D)) 115 return Tokens; 116 if (DeclsWithoutSemicolons.count(D)) 117 return Tokens; 118 // FIXME: do not consume trailing semicolon on function definitions. 119 // Most declarations own a semicolon in syntax trees, but not in clang AST. 120 return withTrailingSemicolon(Tokens); 121 } 122 llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 123 return getRange(E->getBeginLoc(), E->getEndLoc()); 124 } 125 /// Find the adjusted range for the statement, consuming the trailing 126 /// semicolon when needed. 127 llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 128 auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); 129 if (isa<CompoundStmt>(S)) 130 return Tokens; 131 132 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 133 // all statements that end with those. Consume this semicolon here. 134 if (Tokens.back().kind() == tok::semi) 135 return Tokens; 136 return withTrailingSemicolon(Tokens); 137 } 138 139 private: 140 llvm::ArrayRef<syntax::Token> 141 withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { 142 assert(!Tokens.empty()); 143 assert(Tokens.back().kind() != tok::eof); 144 // (!) we never consume 'eof', so looking at the next token is ok. 145 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 146 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 147 return Tokens; 148 } 149 150 /// Finds a token starting at \p L. The token must exist. 151 const syntax::Token *findToken(SourceLocation L) const; 152 153 /// A collection of trees covering the input tokens. 154 /// When created, each tree corresponds to a single token in the file. 155 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 156 /// node and update the list of trees accordingly. 157 /// 158 /// Ensures that added nodes properly nest and cover the whole token stream. 159 struct Forest { 160 Forest(syntax::Arena &A) { 161 assert(!A.tokenBuffer().expandedTokens().empty()); 162 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 163 // Create all leaf nodes. 164 // Note that we do not have 'eof' in the tree. 165 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 166 auto *L = new (A.allocator()) syntax::Leaf(&T); 167 L->Original = true; 168 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 169 Trees.insert(Trees.end(), {&T, NodeAndRole{L}}); 170 } 171 } 172 173 ~Forest() { assert(DelayedFolds.empty()); } 174 175 void assignRole(llvm::ArrayRef<syntax::Token> Range, 176 syntax::NodeRole Role) { 177 assert(!Range.empty()); 178 auto It = Trees.lower_bound(Range.begin()); 179 assert(It != Trees.end() && "no node found"); 180 assert(It->first == Range.begin() && "no child with the specified range"); 181 assert((std::next(It) == Trees.end() || 182 std::next(It)->first == Range.end()) && 183 "no child with the specified range"); 184 It->second.Role = Role; 185 } 186 187 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 188 void foldChildren(const syntax::Arena &A, 189 llvm::ArrayRef<syntax::Token> Tokens, 190 syntax::Tree *Node) { 191 // Execute delayed folds inside `Tokens`. 192 auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); 193 auto It = BeginExecuted; 194 for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) 195 foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End), 196 It->second.Node); 197 DelayedFolds.erase(BeginExecuted, It); 198 199 // Attach children to `Node`. 200 foldChildrenEager(A, Tokens, Node); 201 } 202 203 /// Schedule a call to `foldChildren` that will only be executed when 204 /// containing node is folded. The range of delayed nodes can be extended by 205 /// calling `extendDelayedFold`. Only one delayed node for each starting 206 /// token is allowed. 207 void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens, 208 syntax::Tree *Node) { 209 assert(!Tokens.empty()); 210 bool Inserted = 211 DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) 212 .second; 213 (void)Inserted; 214 assert(Inserted && "Multiple delayed folds start at the same token"); 215 } 216 217 /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends 218 /// its endpoint to `ExtendedRange.end()` and returns true. 219 /// Otherwise, returns false. 220 bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) { 221 assert(!ExtendedRange.empty()); 222 auto It = DelayedFolds.find(ExtendedRange.data()); 223 if (It == DelayedFolds.end()) 224 return false; 225 assert(It->second.End <= ExtendedRange.end()); 226 It->second.End = ExtendedRange.end(); 227 return true; 228 } 229 230 // EXPECTS: all tokens were consumed and are owned by a single root node. 231 syntax::Node *finalize() && { 232 assert(Trees.size() == 1); 233 auto *Root = Trees.begin()->second.Node; 234 Trees = {}; 235 return Root; 236 } 237 238 std::string str(const syntax::Arena &A) const { 239 std::string R; 240 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 241 unsigned CoveredTokens = 242 It != Trees.end() 243 ? (std::next(It)->first - It->first) 244 : A.tokenBuffer().expandedTokens().end() - It->first; 245 246 R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", 247 It->second.Node->kind(), 248 It->first->text(A.sourceManager()), CoveredTokens); 249 R += It->second.Node->dump(A); 250 } 251 return R; 252 } 253 254 private: 255 /// Implementation detail of `foldChildren`, does acutal folding ignoring 256 /// delayed folds. 257 void foldChildrenEager(const syntax::Arena &A, 258 llvm::ArrayRef<syntax::Token> Tokens, 259 syntax::Tree *Node) { 260 assert(Node->firstChild() == nullptr && "node already has children"); 261 262 auto *FirstToken = Tokens.begin(); 263 auto BeginChildren = Trees.lower_bound(FirstToken); 264 assert((BeginChildren == Trees.end() || 265 BeginChildren->first == FirstToken) && 266 "fold crosses boundaries of existing subtrees"); 267 auto EndChildren = Trees.lower_bound(Tokens.end()); 268 assert( 269 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 270 "fold crosses boundaries of existing subtrees"); 271 272 // (!) we need to go in reverse order, because we can only prepend. 273 for (auto It = EndChildren; It != BeginChildren; --It) 274 Node->prependChildLowLevel(std::prev(It)->second.Node, 275 std::prev(It)->second.Role); 276 277 // Mark that this node came from the AST and is backed by the source code. 278 Node->Original = true; 279 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 280 281 Trees.erase(BeginChildren, EndChildren); 282 Trees.insert({FirstToken, NodeAndRole(Node)}); 283 } 284 /// A with a role that should be assigned to it when adding to a parent. 285 struct NodeAndRole { 286 explicit NodeAndRole(syntax::Node *Node) 287 : Node(Node), Role(NodeRole::Unknown) {} 288 289 syntax::Node *Node; 290 NodeRole Role; 291 }; 292 293 /// Maps from the start token to a subtree starting at that token. 294 /// Keys in the map are pointers into the array of expanded tokens, so 295 /// pointer order corresponds to the order of preprocessor tokens. 296 /// FIXME: storing the end tokens is redundant. 297 /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. 298 std::map<const syntax::Token *, NodeAndRole> Trees; 299 300 /// See documentation of `foldChildrenDelayed` for details. 301 struct DelayedFold { 302 const syntax::Token *End = nullptr; 303 syntax::Tree *Node = nullptr; 304 }; 305 std::map<const syntax::Token *, DelayedFold> DelayedFolds; 306 }; 307 308 /// For debugging purposes. 309 std::string str() { return Pending.str(Arena); } 310 311 syntax::Arena &Arena; 312 /// To quickly find tokens by their start location. 313 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 314 LocationToToken; 315 Forest Pending; 316 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 317 }; 318 319 namespace { 320 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 321 public: 322 explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 323 : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 324 325 bool shouldTraversePostOrder() const { return true; } 326 327 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) { 328 // Ensure declarators are covered by SimpleDeclaration. 329 Builder.noticeDeclaratorRange(Builder.getRange(D)); 330 // FIXME: build nodes for the declarator too. 331 return true; 332 } 333 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { 334 // Also a declarator. 335 Builder.noticeDeclaratorRange(Builder.getRange(D)); 336 // FIXME: build nodes for the declarator too. 337 return true; 338 } 339 340 bool VisitDecl(Decl *D) { 341 assert(!D->isImplicit()); 342 Builder.foldNode(Builder.getRange(D), 343 new (allocator()) syntax::UnknownDeclaration()); 344 return true; 345 } 346 347 bool WalkUpFromTagDecl(TagDecl *C) { 348 // FIXME: build the ClassSpecifier node. 349 if (C->isFreeStanding()) { 350 // Class is a declaration specifier and needs a spanning declaration node. 351 Builder.foldNode(Builder.getRange(C), 352 new (allocator()) syntax::SimpleDeclaration); 353 return true; 354 } 355 return true; 356 } 357 358 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 359 // (!) we do not want to call VisitDecl(), the declaration for translation 360 // unit is built by finalize(). 361 return true; 362 } 363 364 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 365 using NodeRole = syntax::NodeRole; 366 367 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 368 for (auto *Child : S->body()) 369 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 370 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 371 372 Builder.foldNode(Builder.getStmtRange(S), 373 new (allocator()) syntax::CompoundStatement); 374 return true; 375 } 376 377 // Some statements are not yet handled by syntax trees. 378 bool WalkUpFromStmt(Stmt *S) { 379 Builder.foldNode(Builder.getStmtRange(S), 380 new (allocator()) syntax::UnknownStatement); 381 return true; 382 } 383 384 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 385 // We override to traverse range initializer as VarDecl. 386 // RAV traverses it as a statement, we produce invalid node kinds in that 387 // case. 388 // FIXME: should do this in RAV instead? 389 if (S->getInit() && !TraverseStmt(S->getInit())) 390 return false; 391 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 392 return false; 393 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 394 return false; 395 if (S->getBody() && !TraverseStmt(S->getBody())) 396 return false; 397 return true; 398 } 399 400 bool TraverseStmt(Stmt *S) { 401 if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { 402 // We want to consume the semicolon, make sure SimpleDeclaration does not. 403 for (auto *D : DS->decls()) 404 Builder.noticeDeclaratorWithoutSemicolon(D); 405 } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 406 // (!) do not recurse into subexpressions. 407 // we do not have syntax trees for expressions yet, so we only want to see 408 // the first top-level expression. 409 return WalkUpFromExpr(E->IgnoreImplicit()); 410 } 411 return RecursiveASTVisitor::TraverseStmt(S); 412 } 413 414 // Some expressions are not yet handled by syntax trees. 415 bool WalkUpFromExpr(Expr *E) { 416 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 417 Builder.foldNode(Builder.getExprRange(E), 418 new (allocator()) syntax::UnknownExpression); 419 return true; 420 } 421 422 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 423 auto Tokens = Builder.getRange(S); 424 if (Tokens.front().kind() == tok::coloncolon) { 425 // Handle nested namespace definitions. Those start at '::' token, e.g. 426 // namespace a^::b {} 427 // FIXME: build corresponding nodes for the name of this namespace. 428 return true; 429 } 430 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition); 431 return true; 432 } 433 434 // The code below is very regular, it could even be generated with some 435 // preprocessor magic. We merely assign roles to the corresponding children 436 // and fold resulting nodes. 437 bool WalkUpFromDeclStmt(DeclStmt *S) { 438 Builder.foldNode(Builder.getStmtRange(S), 439 new (allocator()) syntax::DeclarationStatement); 440 return true; 441 } 442 443 bool WalkUpFromNullStmt(NullStmt *S) { 444 Builder.foldNode(Builder.getStmtRange(S), 445 new (allocator()) syntax::EmptyStatement); 446 return true; 447 } 448 449 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 450 Builder.markChildToken(S->getSwitchLoc(), 451 syntax::NodeRole::IntroducerKeyword); 452 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 453 Builder.foldNode(Builder.getStmtRange(S), 454 new (allocator()) syntax::SwitchStatement); 455 return true; 456 } 457 458 bool WalkUpFromCaseStmt(CaseStmt *S) { 459 Builder.markChildToken(S->getKeywordLoc(), 460 syntax::NodeRole::IntroducerKeyword); 461 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 462 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 463 Builder.foldNode(Builder.getStmtRange(S), 464 new (allocator()) syntax::CaseStatement); 465 return true; 466 } 467 468 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 469 Builder.markChildToken(S->getKeywordLoc(), 470 syntax::NodeRole::IntroducerKeyword); 471 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 472 Builder.foldNode(Builder.getStmtRange(S), 473 new (allocator()) syntax::DefaultStatement); 474 return true; 475 } 476 477 bool WalkUpFromIfStmt(IfStmt *S) { 478 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 479 Builder.markStmtChild(S->getThen(), 480 syntax::NodeRole::IfStatement_thenStatement); 481 Builder.markChildToken(S->getElseLoc(), 482 syntax::NodeRole::IfStatement_elseKeyword); 483 Builder.markStmtChild(S->getElse(), 484 syntax::NodeRole::IfStatement_elseStatement); 485 Builder.foldNode(Builder.getStmtRange(S), 486 new (allocator()) syntax::IfStatement); 487 return true; 488 } 489 490 bool WalkUpFromForStmt(ForStmt *S) { 491 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 492 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 493 Builder.foldNode(Builder.getStmtRange(S), 494 new (allocator()) syntax::ForStatement); 495 return true; 496 } 497 498 bool WalkUpFromWhileStmt(WhileStmt *S) { 499 Builder.markChildToken(S->getWhileLoc(), 500 syntax::NodeRole::IntroducerKeyword); 501 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 502 Builder.foldNode(Builder.getStmtRange(S), 503 new (allocator()) syntax::WhileStatement); 504 return true; 505 } 506 507 bool WalkUpFromContinueStmt(ContinueStmt *S) { 508 Builder.markChildToken(S->getContinueLoc(), 509 syntax::NodeRole::IntroducerKeyword); 510 Builder.foldNode(Builder.getStmtRange(S), 511 new (allocator()) syntax::ContinueStatement); 512 return true; 513 } 514 515 bool WalkUpFromBreakStmt(BreakStmt *S) { 516 Builder.markChildToken(S->getBreakLoc(), 517 syntax::NodeRole::IntroducerKeyword); 518 Builder.foldNode(Builder.getStmtRange(S), 519 new (allocator()) syntax::BreakStatement); 520 return true; 521 } 522 523 bool WalkUpFromReturnStmt(ReturnStmt *S) { 524 Builder.markChildToken(S->getReturnLoc(), 525 syntax::NodeRole::IntroducerKeyword); 526 Builder.markExprChild(S->getRetValue(), 527 syntax::NodeRole::ReturnStatement_value); 528 Builder.foldNode(Builder.getStmtRange(S), 529 new (allocator()) syntax::ReturnStatement); 530 return true; 531 } 532 533 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 534 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 535 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 536 Builder.foldNode(Builder.getStmtRange(S), 537 new (allocator()) syntax::RangeBasedForStatement); 538 return true; 539 } 540 541 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 542 Builder.foldNode(Builder.getRange(S), 543 new (allocator()) syntax::EmptyDeclaration); 544 return true; 545 } 546 547 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 548 Builder.markExprChild(S->getAssertExpr(), 549 syntax::NodeRole::StaticAssertDeclaration_condition); 550 Builder.markExprChild(S->getMessage(), 551 syntax::NodeRole::StaticAssertDeclaration_message); 552 Builder.foldNode(Builder.getRange(S), 553 new (allocator()) syntax::StaticAssertDeclaration); 554 return true; 555 } 556 557 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 558 Builder.foldNode(Builder.getRange(S), 559 new (allocator()) syntax::LinkageSpecificationDeclaration); 560 return true; 561 } 562 563 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 564 Builder.foldNode(Builder.getRange(S), 565 new (allocator()) syntax::NamespaceAliasDefinition); 566 return true; 567 } 568 569 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 570 Builder.foldNode(Builder.getRange(S), 571 new (allocator()) syntax::UsingNamespaceDirective); 572 return true; 573 } 574 575 bool WalkUpFromUsingDecl(UsingDecl *S) { 576 Builder.foldNode(Builder.getRange(S), 577 new (allocator()) syntax::UsingDeclaration); 578 return true; 579 } 580 581 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 582 Builder.foldNode(Builder.getRange(S), 583 new (allocator()) syntax::UsingDeclaration); 584 return true; 585 } 586 587 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 588 Builder.foldNode(Builder.getRange(S), 589 new (allocator()) syntax::UsingDeclaration); 590 return true; 591 } 592 593 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 594 Builder.foldNode(Builder.getRange(S), 595 new (allocator()) syntax::TypeAliasDeclaration); 596 return true; 597 } 598 599 private: 600 /// A small helper to save some typing. 601 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 602 603 syntax::TreeBuilder &Builder; 604 const LangOptions &LangOpts; 605 }; 606 } // namespace 607 608 void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, 609 syntax::Tree *New) { 610 Pending.foldChildren(Arena, Range, New); 611 } 612 613 void syntax::TreeBuilder::noticeDeclaratorRange( 614 llvm::ArrayRef<syntax::Token> Range) { 615 if (Pending.extendDelayedFold(Range)) 616 return; 617 Pending.foldChildrenDelayed(Range, 618 new (allocator()) syntax::SimpleDeclaration); 619 } 620 621 void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) { 622 DeclsWithoutSemicolons.insert(D); 623 } 624 625 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 626 if (Loc.isInvalid()) 627 return; 628 Pending.assignRole(*findToken(Loc), Role); 629 } 630 631 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 632 if (!Child) 633 return; 634 635 auto Range = getStmtRange(Child); 636 // This is an expression in a statement position, consume the trailing 637 // semicolon and form an 'ExpressionStatement' node. 638 if (auto *E = dyn_cast<Expr>(Child)) { 639 Pending.assignRole(getExprRange(E), 640 NodeRole::ExpressionStatement_expression); 641 // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. 642 Pending.foldChildren(Arena, Range, 643 new (allocator()) syntax::ExpressionStatement); 644 } 645 Pending.assignRole(Range, Role); 646 } 647 648 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 649 if (!Child) 650 return; 651 652 Pending.assignRole(getExprRange(Child), Role); 653 } 654 655 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 656 auto It = LocationToToken.find(L.getRawEncoding()); 657 assert(It != LocationToToken.end()); 658 return It->second; 659 } 660 661 syntax::TranslationUnit * 662 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 663 TreeBuilder Builder(A); 664 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 665 return std::move(Builder).finalize(); 666 } 667