xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp (revision e40139ff33b48b56a24c808b166b04b8ee6f5b21)
1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/BuildTree.h"
9 #include "clang/AST/RecursiveASTVisitor.h"
10 #include "clang/AST/Stmt.h"
11 #include "clang/Basic/LLVM.h"
12 #include "clang/Basic/SourceLocation.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Basic/TokenKinds.h"
15 #include "clang/Lex/Lexer.h"
16 #include "clang/Tooling/Syntax/Nodes.h"
17 #include "clang/Tooling/Syntax/Tokens.h"
18 #include "clang/Tooling/Syntax/Tree.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Support/Allocator.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <map>
27 
28 using namespace clang;
29 
30 /// A helper class for constructing the syntax tree while traversing a clang
31 /// AST.
32 ///
33 /// At each point of the traversal we maintain a list of pending nodes.
34 /// Initially all tokens are added as pending nodes. When processing a clang AST
35 /// node, the clients need to:
36 ///   - create a corresponding syntax node,
37 ///   - assign roles to all pending child nodes with 'markChild' and
38 ///     'markChildToken',
39 ///   - replace the child nodes with the new syntax node in the pending list
40 ///     with 'foldNode'.
41 ///
42 /// Note that all children are expected to be processed when building a node.
43 ///
44 /// Call finalize() to finish building the tree and consume the root node.
45 class syntax::TreeBuilder {
46 public:
47   TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {}
48 
49   llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
50 
51   /// Populate children for \p New node, assuming it covers tokens from \p
52   /// Range.
53   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
54 
55   /// Set role for a token starting at \p Loc.
56   void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
57 
58   /// Finish building the tree and consume the root node.
59   syntax::TranslationUnit *finalize() && {
60     auto Tokens = Arena.tokenBuffer().expandedTokens();
61     assert(!Tokens.empty());
62     assert(Tokens.back().kind() == tok::eof);
63 
64     // Build the root of the tree, consuming all the children.
65     Pending.foldChildren(Tokens.drop_back(),
66                          new (Arena.allocator()) syntax::TranslationUnit);
67 
68     return cast<syntax::TranslationUnit>(std::move(Pending).finalize());
69   }
70 
71   /// getRange() finds the syntax tokens corresponding to the passed source
72   /// locations.
73   /// \p First is the start position of the first token and \p Last is the start
74   /// position of the last token.
75   llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
76                                          SourceLocation Last) const {
77     assert(First.isValid());
78     assert(Last.isValid());
79     assert(First == Last ||
80            Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
81     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
82   }
83   llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
84     return getRange(D->getBeginLoc(), D->getEndLoc());
85   }
86   llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const {
87     return getRange(S->getBeginLoc(), S->getEndLoc());
88   }
89 
90 private:
91   /// Finds a token starting at \p L. The token must exist.
92   const syntax::Token *findToken(SourceLocation L) const;
93 
94   /// A collection of trees covering the input tokens.
95   /// When created, each tree corresponds to a single token in the file.
96   /// Clients call 'foldChildren' to attach one or more subtrees to a parent
97   /// node and update the list of trees accordingly.
98   ///
99   /// Ensures that added nodes properly nest and cover the whole token stream.
100   struct Forest {
101     Forest(syntax::Arena &A) {
102       assert(!A.tokenBuffer().expandedTokens().empty());
103       assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
104       // Create all leaf nodes.
105       // Note that we do not have 'eof' in the tree.
106       for (auto &T : A.tokenBuffer().expandedTokens().drop_back())
107         Trees.insert(Trees.end(),
108                      {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
109     }
110 
111     void assignRole(llvm::ArrayRef<syntax::Token> Range,
112                     syntax::NodeRole Role) {
113       assert(!Range.empty());
114       auto It = Trees.lower_bound(Range.begin());
115       assert(It != Trees.end() && "no node found");
116       assert(It->first == Range.begin() && "no child with the specified range");
117       assert((std::next(It) == Trees.end() ||
118               std::next(It)->first == Range.end()) &&
119              "no child with the specified range");
120       It->second.Role = Role;
121     }
122 
123     /// Add \p Node to the forest and fill its children nodes based on the \p
124     /// NodeRange.
125     void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens,
126                       syntax::Tree *Node) {
127       assert(!NodeTokens.empty());
128       assert(Node->firstChild() == nullptr && "node already has children");
129 
130       auto *FirstToken = NodeTokens.begin();
131       auto BeginChildren = Trees.lower_bound(FirstToken);
132       assert(BeginChildren != Trees.end() &&
133              BeginChildren->first == FirstToken &&
134              "fold crosses boundaries of existing subtrees");
135       auto EndChildren = Trees.lower_bound(NodeTokens.end());
136       assert((EndChildren == Trees.end() ||
137               EndChildren->first == NodeTokens.end()) &&
138              "fold crosses boundaries of existing subtrees");
139 
140       // (!) we need to go in reverse order, because we can only prepend.
141       for (auto It = EndChildren; It != BeginChildren; --It)
142         Node->prependChildLowLevel(std::prev(It)->second.Node,
143                                    std::prev(It)->second.Role);
144 
145       Trees.erase(BeginChildren, EndChildren);
146       Trees.insert({FirstToken, NodeAndRole(Node)});
147     }
148 
149     // EXPECTS: all tokens were consumed and are owned by a single root node.
150     syntax::Node *finalize() && {
151       assert(Trees.size() == 1);
152       auto *Root = Trees.begin()->second.Node;
153       Trees = {};
154       return Root;
155     }
156 
157     std::string str(const syntax::Arena &A) const {
158       std::string R;
159       for (auto It = Trees.begin(); It != Trees.end(); ++It) {
160         unsigned CoveredTokens =
161             It != Trees.end()
162                 ? (std::next(It)->first - It->first)
163                 : A.tokenBuffer().expandedTokens().end() - It->first;
164 
165         R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n",
166                            It->second.Node->kind(),
167                            It->first->text(A.sourceManager()), CoveredTokens);
168         R += It->second.Node->dump(A);
169       }
170       return R;
171     }
172 
173   private:
174     /// A with a role that should be assigned to it when adding to a parent.
175     struct NodeAndRole {
176       explicit NodeAndRole(syntax::Node *Node)
177           : Node(Node), Role(NodeRole::Unknown) {}
178 
179       syntax::Node *Node;
180       NodeRole Role;
181     };
182 
183     /// Maps from the start token to a subtree starting at that token.
184     /// FIXME: storing the end tokens is redundant.
185     /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
186     std::map<const syntax::Token *, NodeAndRole> Trees;
187   };
188 
189   /// For debugging purposes.
190   std::string str() { return Pending.str(Arena); }
191 
192   syntax::Arena &Arena;
193   Forest Pending;
194 };
195 
196 namespace {
197 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
198 public:
199   explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
200       : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
201 
202   bool shouldTraversePostOrder() const { return true; }
203 
204   bool TraverseDecl(Decl *D) {
205     if (!D || isa<TranslationUnitDecl>(D))
206       return RecursiveASTVisitor::TraverseDecl(D);
207     if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
208       return true; // Only build top-level decls for now, do not recurse.
209     return RecursiveASTVisitor::TraverseDecl(D);
210   }
211 
212   bool VisitDecl(Decl *D) {
213     assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
214            "expected a top-level decl");
215     assert(!D->isImplicit());
216     Builder.foldNode(Builder.getRange(D),
217                      new (allocator()) syntax::TopLevelDeclaration());
218     return true;
219   }
220 
221   bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
222     // (!) we do not want to call VisitDecl(), the declaration for translation
223     // unit is built by finalize().
224     return true;
225   }
226 
227   bool WalkUpFromCompoundStmt(CompoundStmt *S) {
228     using NodeRole = syntax::NodeRole;
229 
230     Builder.markChildToken(S->getLBracLoc(), tok::l_brace,
231                            NodeRole::CompoundStatement_lbrace);
232     Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
233                            NodeRole::CompoundStatement_rbrace);
234 
235     Builder.foldNode(Builder.getRange(S),
236                      new (allocator()) syntax::CompoundStatement);
237     return true;
238   }
239 
240 private:
241   /// A small helper to save some typing.
242   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
243 
244   syntax::TreeBuilder &Builder;
245   const LangOptions &LangOpts;
246 };
247 } // namespace
248 
249 void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
250                                    syntax::Tree *New) {
251   Pending.foldChildren(Range, New);
252 }
253 
254 void syntax::TreeBuilder::markChildToken(SourceLocation Loc,
255                                          tok::TokenKind Kind, NodeRole Role) {
256   if (Loc.isInvalid())
257     return;
258   Pending.assignRole(*findToken(Loc), Role);
259 }
260 
261 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
262   auto Tokens = Arena.tokenBuffer().expandedTokens();
263   auto &SM = Arena.sourceManager();
264   auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) {
265     return SM.isBeforeInTranslationUnit(T.location(), L);
266   });
267   assert(It != Tokens.end());
268   assert(It->location() == L);
269   return &*It;
270 }
271 
272 syntax::TranslationUnit *
273 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) {
274   TreeBuilder Builder(A);
275   BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
276   return std::move(Builder).finalize();
277 }
278