xref: /freebsd/contrib/llvm-project/clang/include/clang/Tooling/Syntax/Tree.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9fcaf7f86SDimitry Andric //   - leaf nodes correspond to tokens,
100b57cec5SDimitry Andric //   - tree nodes correspond to language grammar constructs.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // The tree is initially built from an AST. Each node of a newly built tree
13*bdd1243dSDimitry Andric // covers a continuous subrange of expanded tokens (i.e. tokens after
140b57cec5SDimitry Andric // preprocessing), the specific tokens coverered are stored in the leaf nodes of
150b57cec5SDimitry Andric // a tree. A post-order traversal of a tree will visit leaf nodes in an order
160b57cec5SDimitry Andric // corresponding the original order of expanded tokens.
170b57cec5SDimitry Andric //
180b57cec5SDimitry Andric // This is still work in progress and highly experimental, we leave room for
190b57cec5SDimitry Andric // ourselves to completely change the design and/or implementation.
200b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2104eeddc0SDimitry Andric #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
2204eeddc0SDimitry Andric #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric #include "clang/Basic/TokenKinds.h"
25fcaf7f86SDimitry Andric #include "clang/Tooling/Syntax/TokenManager.h"
26e8d8bef9SDimitry Andric #include "llvm/ADT/iterator.h"
270b57cec5SDimitry Andric #include "llvm/Support/Allocator.h"
280b57cec5SDimitry Andric #include <cstdint>
29fcaf7f86SDimitry Andric #include <vector>
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric namespace clang {
320b57cec5SDimitry Andric namespace syntax {
330b57cec5SDimitry Andric 
34fcaf7f86SDimitry Andric /// A memory arena for syntax trees.
35fcaf7f86SDimitry Andric // FIXME: use BumpPtrAllocator directly.
360b57cec5SDimitry Andric class Arena {
370b57cec5SDimitry Andric public:
getAllocator()38e8d8bef9SDimitry Andric   llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
39e8d8bef9SDimitry Andric private:
400b57cec5SDimitry Andric   /// Keeps all the allocated nodes and their intermediate data structures.
410b57cec5SDimitry Andric   llvm::BumpPtrAllocator Allocator;
420b57cec5SDimitry Andric };
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric class Tree;
450b57cec5SDimitry Andric class TreeBuilder;
46480093f4SDimitry Andric class FactoryImpl;
47480093f4SDimitry Andric class MutationsImpl;
48480093f4SDimitry Andric 
490b57cec5SDimitry Andric enum class NodeKind : uint16_t;
500b57cec5SDimitry Andric enum class NodeRole : uint8_t;
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
530b57cec5SDimitry Andric /// a Tree (representing language constructrs).
540b57cec5SDimitry Andric class Node {
55e8d8bef9SDimitry Andric protected:
560b57cec5SDimitry Andric   /// Newly created nodes are detached from a tree, parent and sibling links are
570b57cec5SDimitry Andric   /// set when the node is added as a child to another one.
580b57cec5SDimitry Andric   Node(NodeKind Kind);
59e8d8bef9SDimitry Andric   /// Nodes are allocated on Arenas; the destructor is never called.
60e8d8bef9SDimitry Andric   ~Node() = default;
610b57cec5SDimitry Andric 
62e8d8bef9SDimitry Andric public:
63e8d8bef9SDimitry Andric   /// Nodes cannot simply be copied without violating tree invariants.
64e8d8bef9SDimitry Andric   Node(const Node &) = delete;
65e8d8bef9SDimitry Andric   Node &operator=(const Node &) = delete;
66e8d8bef9SDimitry Andric   /// Idiomatically, nodes are allocated on an Arena and never moved.
67e8d8bef9SDimitry Andric   Node(Node &&) = delete;
68e8d8bef9SDimitry Andric   Node &operator=(Node &&) = delete;
69e8d8bef9SDimitry Andric 
getKind()70e8d8bef9SDimitry Andric   NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
getRole()71e8d8bef9SDimitry Andric   NodeRole getRole() const { return static_cast<NodeRole>(Role); }
720b57cec5SDimitry Andric 
73480093f4SDimitry Andric   /// Whether the node is detached from a tree, i.e. does not have a parent.
74480093f4SDimitry Andric   bool isDetached() const;
75480093f4SDimitry Andric   /// Whether the node was created from the AST backed by the source code
76480093f4SDimitry Andric   /// rather than added later through mutation APIs or created with factory
77480093f4SDimitry Andric   /// functions.
78480093f4SDimitry Andric   /// When this flag is true, all subtrees are also original.
79480093f4SDimitry Andric   /// This flag is set to false on any modifications to the node or any of its
80480093f4SDimitry Andric   /// subtrees, even if this simply involves swapping existing subtrees.
isOriginal()81480093f4SDimitry Andric   bool isOriginal() const { return Original; }
82480093f4SDimitry Andric   /// If this function return false, the tree cannot be modified because there
83480093f4SDimitry Andric   /// is no reasonable way to produce the corresponding textual replacements.
84480093f4SDimitry Andric   /// This can happen when the node crosses macro expansion boundaries.
85480093f4SDimitry Andric   ///
86480093f4SDimitry Andric   /// Note that even if the node is not modifiable, its child nodes can be
87480093f4SDimitry Andric   /// modifiable.
canModify()88480093f4SDimitry Andric   bool canModify() const { return CanModify; }
89480093f4SDimitry Andric 
getParent()90e8d8bef9SDimitry Andric   const Tree *getParent() const { return Parent; }
getParent()91e8d8bef9SDimitry Andric   Tree *getParent() { return Parent; }
920b57cec5SDimitry Andric 
getNextSibling()93e8d8bef9SDimitry Andric   const Node *getNextSibling() const { return NextSibling; }
getNextSibling()94e8d8bef9SDimitry Andric   Node *getNextSibling() { return NextSibling; }
getPreviousSibling()95e8d8bef9SDimitry Andric   const Node *getPreviousSibling() const { return PreviousSibling; }
getPreviousSibling()96e8d8bef9SDimitry Andric   Node *getPreviousSibling() { return PreviousSibling; }
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric   /// Dumps the structure of a subtree. For debugging and testing purposes.
99fcaf7f86SDimitry Andric   std::string dump(const TokenManager &SM) const;
1000b57cec5SDimitry Andric   /// Dumps the tokens forming this subtree.
101fcaf7f86SDimitry Andric   std::string dumpTokens(const TokenManager &SM) const;
1020b57cec5SDimitry Andric 
103480093f4SDimitry Andric   /// Asserts invariants on this node of the tree and its immediate children.
104480093f4SDimitry Andric   /// Will not recurse into the subtree. No-op if NDEBUG is set.
105480093f4SDimitry Andric   void assertInvariants() const;
106480093f4SDimitry Andric   /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
107480093f4SDimitry Andric   void assertInvariantsRecursive() const;
108480093f4SDimitry Andric 
1090b57cec5SDimitry Andric private:
1100b57cec5SDimitry Andric   // Tree is allowed to change the Parent link and Role.
1110b57cec5SDimitry Andric   friend class Tree;
112480093f4SDimitry Andric   // TreeBuilder is allowed to set the Original and CanModify flags.
113480093f4SDimitry Andric   friend class TreeBuilder;
114480093f4SDimitry Andric   // MutationsImpl sets roles and CanModify flag.
115480093f4SDimitry Andric   friend class MutationsImpl;
116480093f4SDimitry Andric   // FactoryImpl sets CanModify flag.
117480093f4SDimitry Andric   friend class FactoryImpl;
1180b57cec5SDimitry Andric 
1195ffd83dbSDimitry Andric   void setRole(NodeRole NR);
1205ffd83dbSDimitry Andric 
1210b57cec5SDimitry Andric   Tree *Parent;
1220b57cec5SDimitry Andric   Node *NextSibling;
123e8d8bef9SDimitry Andric   Node *PreviousSibling;
1240b57cec5SDimitry Andric   unsigned Kind : 16;
1250b57cec5SDimitry Andric   unsigned Role : 8;
126480093f4SDimitry Andric   unsigned Original : 1;
127480093f4SDimitry Andric   unsigned CanModify : 1;
1280b57cec5SDimitry Andric };
1290b57cec5SDimitry Andric 
130fcaf7f86SDimitry Andric /// A leaf node points to a single token.
131fcaf7f86SDimitry Andric // FIXME: add TokenKind field (borrow some bits from the Node::kind).
1320b57cec5SDimitry Andric class Leaf final : public Node {
1330b57cec5SDimitry Andric public:
134fcaf7f86SDimitry Andric   Leaf(TokenManager::Key K);
1350b57cec5SDimitry Andric   static bool classof(const Node *N);
1360b57cec5SDimitry Andric 
getTokenKey()137fcaf7f86SDimitry Andric   TokenManager::Key getTokenKey() const { return K; }
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric private:
140fcaf7f86SDimitry Andric   TokenManager::Key K;
1410b57cec5SDimitry Andric };
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric /// A node that has children and represents a syntactic language construct.
1440b57cec5SDimitry Andric class Tree : public Node {
145e8d8bef9SDimitry Andric   /// Iterator over children (common base for const/non-const).
146e8d8bef9SDimitry Andric   /// Not invalidated by tree mutations (holds a stable node pointer).
147e8d8bef9SDimitry Andric   template <typename DerivedT, typename NodeT>
148e8d8bef9SDimitry Andric   class ChildIteratorBase
149e8d8bef9SDimitry Andric       : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
150e8d8bef9SDimitry Andric                                           NodeT> {
151e8d8bef9SDimitry Andric   protected:
152e8d8bef9SDimitry Andric     NodeT *N = nullptr;
153e8d8bef9SDimitry Andric     using Base = ChildIteratorBase;
154e8d8bef9SDimitry Andric 
1550b57cec5SDimitry Andric   public:
156e8d8bef9SDimitry Andric     ChildIteratorBase() = default;
ChildIteratorBase(NodeT * N)157e8d8bef9SDimitry Andric     explicit ChildIteratorBase(NodeT *N) : N(N) {}
1580b57cec5SDimitry Andric 
15904eeddc0SDimitry Andric     friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) {
16004eeddc0SDimitry Andric       return LHS.N == RHS.N;
16104eeddc0SDimitry Andric     }
16204eeddc0SDimitry Andric 
163e8d8bef9SDimitry Andric     NodeT &operator*() const { return *N; }
164e8d8bef9SDimitry Andric     DerivedT &operator++() {
165e8d8bef9SDimitry Andric       N = N->getNextSibling();
166e8d8bef9SDimitry Andric       return *static_cast<DerivedT *>(this);
167480093f4SDimitry Andric     }
168480093f4SDimitry Andric 
169e8d8bef9SDimitry Andric     /// Truthy if valid (not past-the-end).
170e8d8bef9SDimitry Andric     /// This allows: if (auto It = find_if(N.children(), ...) )
171e8d8bef9SDimitry Andric     explicit operator bool() const { return N != nullptr; }
172e8d8bef9SDimitry Andric     /// The element, or nullptr if past-the-end.
asPointer()173e8d8bef9SDimitry Andric     NodeT *asPointer() const { return N; }
174e8d8bef9SDimitry Andric   };
175e8d8bef9SDimitry Andric 
176e8d8bef9SDimitry Andric public:
177e8d8bef9SDimitry Andric   static bool classof(const Node *N);
178e8d8bef9SDimitry Andric 
getFirstChild()179e8d8bef9SDimitry Andric   Node *getFirstChild() { return FirstChild; }
getFirstChild()180e8d8bef9SDimitry Andric   const Node *getFirstChild() const { return FirstChild; }
getLastChild()181e8d8bef9SDimitry Andric   Node *getLastChild() { return LastChild; }
getLastChild()182e8d8bef9SDimitry Andric   const Node *getLastChild() const { return LastChild; }
183e8d8bef9SDimitry Andric 
184e8d8bef9SDimitry Andric   const Leaf *findFirstLeaf() const;
findFirstLeaf()185e8d8bef9SDimitry Andric   Leaf *findFirstLeaf() {
186e8d8bef9SDimitry Andric     return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
187e8d8bef9SDimitry Andric   }
188e8d8bef9SDimitry Andric 
189e8d8bef9SDimitry Andric   const Leaf *findLastLeaf() const;
findLastLeaf()190e8d8bef9SDimitry Andric   Leaf *findLastLeaf() {
191e8d8bef9SDimitry Andric     return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
192e8d8bef9SDimitry Andric   }
193e8d8bef9SDimitry Andric 
194e8d8bef9SDimitry Andric   /// child_iterator is not invalidated by mutations.
195e8d8bef9SDimitry Andric   struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
196e8d8bef9SDimitry Andric     using Base::ChildIteratorBase;
197e8d8bef9SDimitry Andric   };
198e8d8bef9SDimitry Andric   struct ConstChildIterator
199e8d8bef9SDimitry Andric       : ChildIteratorBase<ConstChildIterator, const Node> {
200e8d8bef9SDimitry Andric     using Base::ChildIteratorBase;
201e8d8bef9SDimitry Andric     ConstChildIterator() = default;
ConstChildIteratorConstChildIterator202e8d8bef9SDimitry Andric     ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
203e8d8bef9SDimitry Andric   };
204e8d8bef9SDimitry Andric 
getChildren()205e8d8bef9SDimitry Andric   llvm::iterator_range<ChildIterator> getChildren() {
206e8d8bef9SDimitry Andric     return {ChildIterator(getFirstChild()), ChildIterator()};
207e8d8bef9SDimitry Andric   }
getChildren()208e8d8bef9SDimitry Andric   llvm::iterator_range<ConstChildIterator> getChildren() const {
209e8d8bef9SDimitry Andric     return {ConstChildIterator(getFirstChild()), ConstChildIterator()};
210e8d8bef9SDimitry Andric   }
211e8d8bef9SDimitry Andric 
212e8d8bef9SDimitry Andric   /// Find the first node with a corresponding role.
213e8d8bef9SDimitry Andric   const Node *findChild(NodeRole R) const;
findChild(NodeRole R)214e8d8bef9SDimitry Andric   Node *findChild(NodeRole R) {
215e8d8bef9SDimitry Andric     return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
216e8d8bef9SDimitry Andric   }
217480093f4SDimitry Andric 
2180b57cec5SDimitry Andric protected:
219e8d8bef9SDimitry Andric   using Node::Node;
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric private:
222e8d8bef9SDimitry Andric   /// Append \p Child to the list of children and sets the parent pointer.
2230b57cec5SDimitry Andric   /// A very low-level operation that does not check any invariants, only used
224480093f4SDimitry Andric   /// by TreeBuilder and FactoryImpl.
2255ffd83dbSDimitry Andric   /// EXPECTS: Role != Detached.
226e8d8bef9SDimitry Andric   void appendChildLowLevel(Node *Child, NodeRole Role);
227e8d8bef9SDimitry Andric   /// Similar but prepends.
2280b57cec5SDimitry Andric   void prependChildLowLevel(Node *Child, NodeRole Role);
229e8d8bef9SDimitry Andric 
230e8d8bef9SDimitry Andric   /// Like the previous overloads, but does not set role for \p Child.
2315ffd83dbSDimitry Andric   /// EXPECTS: Child->Role != Detached
232e8d8bef9SDimitry Andric   void appendChildLowLevel(Node *Child);
2335ffd83dbSDimitry Andric   void prependChildLowLevel(Node *Child);
2340b57cec5SDimitry Andric   friend class TreeBuilder;
235480093f4SDimitry Andric   friend class FactoryImpl;
236480093f4SDimitry Andric 
237e8d8bef9SDimitry Andric   /// Replace a range of children [Begin, End) with a list of
238480093f4SDimitry Andric   /// new nodes starting at \p New.
239480093f4SDimitry Andric   /// Only used by MutationsImpl to implement higher-level mutation operations.
240480093f4SDimitry Andric   /// (!) \p New can be null to model removal of the child range.
241e8d8bef9SDimitry Andric   /// (!) \p End can be null to model one past the end.
242e8d8bef9SDimitry Andric   /// (!) \p Begin can be null to model an append.
243e8d8bef9SDimitry Andric   void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
244480093f4SDimitry Andric   friend class MutationsImpl;
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   Node *FirstChild = nullptr;
247e8d8bef9SDimitry Andric   Node *LastChild = nullptr;
248e8d8bef9SDimitry Andric };
249e8d8bef9SDimitry Andric 
250e8d8bef9SDimitry Andric /// A list of Elements separated or terminated by a fixed token.
251e8d8bef9SDimitry Andric ///
252e8d8bef9SDimitry Andric /// This type models the following grammar construct:
253e8d8bef9SDimitry Andric /// delimited-list(element, delimiter, termination, canBeEmpty)
254e8d8bef9SDimitry Andric class List : public Tree {
255e8d8bef9SDimitry Andric public:
256e8d8bef9SDimitry Andric   template <typename Element> struct ElementAndDelimiter {
257e8d8bef9SDimitry Andric     Element *element;
258e8d8bef9SDimitry Andric     Leaf *delimiter;
259e8d8bef9SDimitry Andric   };
260e8d8bef9SDimitry Andric 
261e8d8bef9SDimitry Andric   enum class TerminationKind {
262e8d8bef9SDimitry Andric     Terminated,
263e8d8bef9SDimitry Andric     MaybeTerminated,
264e8d8bef9SDimitry Andric     Separated,
265e8d8bef9SDimitry Andric   };
266e8d8bef9SDimitry Andric 
267e8d8bef9SDimitry Andric   using Tree::Tree;
268e8d8bef9SDimitry Andric   static bool classof(const Node *N);
269e8d8bef9SDimitry Andric   /// Returns the elements and corresponding delimiters. Missing elements
270e8d8bef9SDimitry Andric   /// and delimiters are represented as null pointers.
271e8d8bef9SDimitry Andric   ///
272e8d8bef9SDimitry Andric   /// For example, in a separated list:
273e8d8bef9SDimitry Andric   /// "a, b, c"  <=> [("a" , ","), ("b" , "," ), ("c" , null)]
274e8d8bef9SDimitry Andric   /// "a,  , c"  <=> [("a" , ","), (null, "," ), ("c" , null)]
275e8d8bef9SDimitry Andric   /// "a, b  c"  <=> [("a" , ","), ("b" , null), ("c" , null)]
276e8d8bef9SDimitry Andric   /// "a, b,"    <=> [("a" , ","), ("b" , "," ), (null, null)]
277e8d8bef9SDimitry Andric   ///
278e8d8bef9SDimitry Andric   /// In a terminated or maybe-terminated list:
279e8d8bef9SDimitry Andric   /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
280e8d8bef9SDimitry Andric   /// "a;  ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
281e8d8bef9SDimitry Andric   /// "a; b  c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
282e8d8bef9SDimitry Andric   /// "a; b; c"  <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
283e8d8bef9SDimitry Andric   std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
284e8d8bef9SDimitry Andric 
285e8d8bef9SDimitry Andric   /// Returns the elements of the list. Missing elements are represented
286e8d8bef9SDimitry Andric   /// as null pointers in the same way as in the return value of
287e8d8bef9SDimitry Andric   /// `getElementsAsNodesAndDelimiters()`.
288e8d8bef9SDimitry Andric   std::vector<Node *> getElementsAsNodes();
289e8d8bef9SDimitry Andric 
290e8d8bef9SDimitry Andric   // These can't be implemented with the information we have!
291e8d8bef9SDimitry Andric 
292e8d8bef9SDimitry Andric   /// Returns the appropriate delimiter for this list.
293e8d8bef9SDimitry Andric   ///
294e8d8bef9SDimitry Andric   /// Useful for discovering the correct delimiter to use when adding
295e8d8bef9SDimitry Andric   /// elements to empty or one-element lists.
296e8d8bef9SDimitry Andric   clang::tok::TokenKind getDelimiterTokenKind() const;
297e8d8bef9SDimitry Andric 
298e8d8bef9SDimitry Andric   TerminationKind getTerminationKind() const;
299e8d8bef9SDimitry Andric 
300e8d8bef9SDimitry Andric   /// Whether this list can be empty in syntactically and semantically correct
301e8d8bef9SDimitry Andric   /// code.
302e8d8bef9SDimitry Andric   ///
303e8d8bef9SDimitry Andric   /// This list may be empty when the source code has errors even if
304e8d8bef9SDimitry Andric   /// canBeEmpty() returns false.
305e8d8bef9SDimitry Andric   bool canBeEmpty() const;
3060b57cec5SDimitry Andric };
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric } // namespace syntax
3090b57cec5SDimitry Andric } // namespace clang
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric #endif
312