xref: /freebsd/contrib/llvm-project/clang/include/clang/Tooling/Syntax/Tree.h (revision 6f63e88c0166ed3e5f2805a9e667c7d24d304cf1)
1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9 //   - leaf nodes correspond to a token in the expanded token stream,
10 //   - tree nodes correspond to language grammar constructs.
11 //
12 // The tree is initially built from an AST. Each node of a newly built tree
13 // covers a continous subrange of expanded tokens (i.e. tokens after
14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
16 // corresponding the original order of expanded tokens.
17 //
18 // This is still work in progress and highly experimental, we leave room for
19 // ourselves to completely change the design and/or implementation.
20 //===----------------------------------------------------------------------===//
21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
23 
24 #include "clang/Basic/LangOptions.h"
25 #include "clang/Basic/SourceLocation.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/Basic/TokenKinds.h"
28 #include "clang/Tooling/Syntax/Tokens.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/Support/Allocator.h"
32 #include <cstdint>
33 
34 namespace clang {
35 namespace syntax {
36 
37 /// A memory arena for syntax trees. Also tracks the underlying token buffers,
38 /// source manager, etc.
39 class Arena {
40 public:
41   Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
42         TokenBuffer Tokens);
43 
44   const SourceManager &sourceManager() const { return SourceMgr; }
45   const LangOptions &langOptions() const { return LangOpts; }
46 
47   const TokenBuffer &tokenBuffer() const;
48   llvm::BumpPtrAllocator &allocator() { return Allocator; }
49 
50   /// Add \p Buffer to the underlying source manager, tokenize it and store the
51   /// resulting tokens. Useful when there is a need to materialize tokens that
52   /// were not written in user code.
53   std::pair<FileID, llvm::ArrayRef<syntax::Token>>
54   lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
55 
56 private:
57   SourceManager &SourceMgr;
58   const LangOptions &LangOpts;
59   TokenBuffer Tokens;
60   /// IDs and storage for additional tokenized files.
61   llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
62   /// Keeps all the allocated nodes and their intermediate data structures.
63   llvm::BumpPtrAllocator Allocator;
64 };
65 
66 class Tree;
67 class TreeBuilder;
68 class FactoryImpl;
69 class MutationsImpl;
70 
71 enum class NodeKind : uint16_t;
72 enum class NodeRole : uint8_t;
73 
74 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
75 /// a Tree (representing language constructrs).
76 class Node {
77 public:
78   /// Newly created nodes are detached from a tree, parent and sibling links are
79   /// set when the node is added as a child to another one.
80   Node(NodeKind Kind);
81 
82   NodeKind kind() const { return static_cast<NodeKind>(Kind); }
83   NodeRole role() const { return static_cast<NodeRole>(Role); }
84 
85   /// Whether the node is detached from a tree, i.e. does not have a parent.
86   bool isDetached() const;
87   /// Whether the node was created from the AST backed by the source code
88   /// rather than added later through mutation APIs or created with factory
89   /// functions.
90   /// When this flag is true, all subtrees are also original.
91   /// This flag is set to false on any modifications to the node or any of its
92   /// subtrees, even if this simply involves swapping existing subtrees.
93   bool isOriginal() const { return Original; }
94   /// If this function return false, the tree cannot be modified because there
95   /// is no reasonable way to produce the corresponding textual replacements.
96   /// This can happen when the node crosses macro expansion boundaries.
97   ///
98   /// Note that even if the node is not modifiable, its child nodes can be
99   /// modifiable.
100   bool canModify() const { return CanModify; }
101 
102   const Tree *parent() const { return Parent; }
103   Tree *parent() { return Parent; }
104 
105   const Node *nextSibling() const { return NextSibling; }
106   Node *nextSibling() { return NextSibling; }
107 
108   /// Dumps the structure of a subtree. For debugging and testing purposes.
109   std::string dump(const Arena &A) const;
110   /// Dumps the tokens forming this subtree.
111   std::string dumpTokens(const Arena &A) const;
112 
113   /// Asserts invariants on this node of the tree and its immediate children.
114   /// Will not recurse into the subtree. No-op if NDEBUG is set.
115   void assertInvariants() const;
116   /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
117   void assertInvariantsRecursive() const;
118 
119 private:
120   // Tree is allowed to change the Parent link and Role.
121   friend class Tree;
122   // TreeBuilder is allowed to set the Original and CanModify flags.
123   friend class TreeBuilder;
124   // MutationsImpl sets roles and CanModify flag.
125   friend class MutationsImpl;
126   // FactoryImpl sets CanModify flag.
127   friend class FactoryImpl;
128 
129   Tree *Parent;
130   Node *NextSibling;
131   unsigned Kind : 16;
132   unsigned Role : 8;
133   unsigned Original : 1;
134   unsigned CanModify : 1;
135 };
136 
137 /// A leaf node points to a single token inside the expanded token stream.
138 class Leaf final : public Node {
139 public:
140   Leaf(const syntax::Token *T);
141   static bool classof(const Node *N);
142 
143   const syntax::Token *token() const { return Tok; }
144 
145 private:
146   const syntax::Token *Tok;
147 };
148 
149 /// A node that has children and represents a syntactic language construct.
150 class Tree : public Node {
151 public:
152   using Node::Node;
153   static bool classof(const Node *N);
154 
155   Node *firstChild() { return FirstChild; }
156   const Node *firstChild() const { return FirstChild; }
157 
158   Leaf *firstLeaf();
159   const Leaf *firstLeaf() const {
160     return const_cast<Tree *>(this)->firstLeaf();
161   }
162 
163   Leaf *lastLeaf();
164   const Leaf *lastLeaf() const { return const_cast<Tree *>(this)->lastLeaf(); }
165 
166 protected:
167   /// Find the first node with a corresponding role.
168   syntax::Node *findChild(NodeRole R);
169 
170 private:
171   /// Prepend \p Child to the list of children and and sets the parent pointer.
172   /// A very low-level operation that does not check any invariants, only used
173   /// by TreeBuilder and FactoryImpl.
174   /// EXPECTS: Role != NodeRoleDetached.
175   void prependChildLowLevel(Node *Child, NodeRole Role);
176   friend class TreeBuilder;
177   friend class FactoryImpl;
178 
179   /// Replace a range of children [BeforeBegin->NextSibling, End) with a list of
180   /// new nodes starting at \p New.
181   /// Only used by MutationsImpl to implement higher-level mutation operations.
182   /// (!) \p New can be null to model removal of the child range.
183   void replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, Node *New);
184   friend class MutationsImpl;
185 
186   Node *FirstChild = nullptr;
187 };
188 
189 } // namespace syntax
190 } // namespace clang
191 
192 #endif
193