1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Defines the basic structure of the syntax tree. There are two kinds of nodes: 9 // - leaf nodes correspond to tokens, 10 // - tree nodes correspond to language grammar constructs. 11 // 12 // The tree is initially built from an AST. Each node of a newly built tree 13 // covers a continous subrange of expanded tokens (i.e. tokens after 14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of 15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order 16 // corresponding the original order of expanded tokens. 17 // 18 // This is still work in progress and highly experimental, we leave room for 19 // ourselves to completely change the design and/or implementation. 20 //===----------------------------------------------------------------------===// 21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H 22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H 23 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Tooling/Syntax/TokenManager.h" 26 #include "llvm/ADT/iterator.h" 27 #include "llvm/Support/Allocator.h" 28 #include <cstdint> 29 #include <vector> 30 31 namespace clang { 32 namespace syntax { 33 34 /// A memory arena for syntax trees. 35 // FIXME: use BumpPtrAllocator directly. 36 class Arena { 37 public: 38 llvm::BumpPtrAllocator &getAllocator() { return Allocator; } 39 private: 40 /// Keeps all the allocated nodes and their intermediate data structures. 41 llvm::BumpPtrAllocator Allocator; 42 }; 43 44 class Tree; 45 class TreeBuilder; 46 class FactoryImpl; 47 class MutationsImpl; 48 49 enum class NodeKind : uint16_t; 50 enum class NodeRole : uint8_t; 51 52 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or 53 /// a Tree (representing language constructrs). 54 class Node { 55 protected: 56 /// Newly created nodes are detached from a tree, parent and sibling links are 57 /// set when the node is added as a child to another one. 58 Node(NodeKind Kind); 59 /// Nodes are allocated on Arenas; the destructor is never called. 60 ~Node() = default; 61 62 public: 63 /// Nodes cannot simply be copied without violating tree invariants. 64 Node(const Node &) = delete; 65 Node &operator=(const Node &) = delete; 66 /// Idiomatically, nodes are allocated on an Arena and never moved. 67 Node(Node &&) = delete; 68 Node &operator=(Node &&) = delete; 69 70 NodeKind getKind() const { return static_cast<NodeKind>(Kind); } 71 NodeRole getRole() const { return static_cast<NodeRole>(Role); } 72 73 /// Whether the node is detached from a tree, i.e. does not have a parent. 74 bool isDetached() const; 75 /// Whether the node was created from the AST backed by the source code 76 /// rather than added later through mutation APIs or created with factory 77 /// functions. 78 /// When this flag is true, all subtrees are also original. 79 /// This flag is set to false on any modifications to the node or any of its 80 /// subtrees, even if this simply involves swapping existing subtrees. 81 bool isOriginal() const { return Original; } 82 /// If this function return false, the tree cannot be modified because there 83 /// is no reasonable way to produce the corresponding textual replacements. 84 /// This can happen when the node crosses macro expansion boundaries. 85 /// 86 /// Note that even if the node is not modifiable, its child nodes can be 87 /// modifiable. 88 bool canModify() const { return CanModify; } 89 90 const Tree *getParent() const { return Parent; } 91 Tree *getParent() { return Parent; } 92 93 const Node *getNextSibling() const { return NextSibling; } 94 Node *getNextSibling() { return NextSibling; } 95 const Node *getPreviousSibling() const { return PreviousSibling; } 96 Node *getPreviousSibling() { return PreviousSibling; } 97 98 /// Dumps the structure of a subtree. For debugging and testing purposes. 99 std::string dump(const TokenManager &SM) const; 100 /// Dumps the tokens forming this subtree. 101 std::string dumpTokens(const TokenManager &SM) const; 102 103 /// Asserts invariants on this node of the tree and its immediate children. 104 /// Will not recurse into the subtree. No-op if NDEBUG is set. 105 void assertInvariants() const; 106 /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set. 107 void assertInvariantsRecursive() const; 108 109 private: 110 // Tree is allowed to change the Parent link and Role. 111 friend class Tree; 112 // TreeBuilder is allowed to set the Original and CanModify flags. 113 friend class TreeBuilder; 114 // MutationsImpl sets roles and CanModify flag. 115 friend class MutationsImpl; 116 // FactoryImpl sets CanModify flag. 117 friend class FactoryImpl; 118 119 void setRole(NodeRole NR); 120 121 Tree *Parent; 122 Node *NextSibling; 123 Node *PreviousSibling; 124 unsigned Kind : 16; 125 unsigned Role : 8; 126 unsigned Original : 1; 127 unsigned CanModify : 1; 128 }; 129 130 /// A leaf node points to a single token. 131 // FIXME: add TokenKind field (borrow some bits from the Node::kind). 132 class Leaf final : public Node { 133 public: 134 Leaf(TokenManager::Key K); 135 static bool classof(const Node *N); 136 137 TokenManager::Key getTokenKey() const { return K; } 138 139 private: 140 TokenManager::Key K; 141 }; 142 143 /// A node that has children and represents a syntactic language construct. 144 class Tree : public Node { 145 /// Iterator over children (common base for const/non-const). 146 /// Not invalidated by tree mutations (holds a stable node pointer). 147 template <typename DerivedT, typename NodeT> 148 class ChildIteratorBase 149 : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag, 150 NodeT> { 151 protected: 152 NodeT *N = nullptr; 153 using Base = ChildIteratorBase; 154 155 public: 156 ChildIteratorBase() = default; 157 explicit ChildIteratorBase(NodeT *N) : N(N) {} 158 159 friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) { 160 return LHS.N == RHS.N; 161 } 162 163 NodeT &operator*() const { return *N; } 164 DerivedT &operator++() { 165 N = N->getNextSibling(); 166 return *static_cast<DerivedT *>(this); 167 } 168 169 /// Truthy if valid (not past-the-end). 170 /// This allows: if (auto It = find_if(N.children(), ...) ) 171 explicit operator bool() const { return N != nullptr; } 172 /// The element, or nullptr if past-the-end. 173 NodeT *asPointer() const { return N; } 174 }; 175 176 public: 177 static bool classof(const Node *N); 178 179 Node *getFirstChild() { return FirstChild; } 180 const Node *getFirstChild() const { return FirstChild; } 181 Node *getLastChild() { return LastChild; } 182 const Node *getLastChild() const { return LastChild; } 183 184 const Leaf *findFirstLeaf() const; 185 Leaf *findFirstLeaf() { 186 return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf()); 187 } 188 189 const Leaf *findLastLeaf() const; 190 Leaf *findLastLeaf() { 191 return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf()); 192 } 193 194 /// child_iterator is not invalidated by mutations. 195 struct ChildIterator : ChildIteratorBase<ChildIterator, Node> { 196 using Base::ChildIteratorBase; 197 }; 198 struct ConstChildIterator 199 : ChildIteratorBase<ConstChildIterator, const Node> { 200 using Base::ChildIteratorBase; 201 ConstChildIterator() = default; 202 ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {} 203 }; 204 205 llvm::iterator_range<ChildIterator> getChildren() { 206 return {ChildIterator(getFirstChild()), ChildIterator()}; 207 } 208 llvm::iterator_range<ConstChildIterator> getChildren() const { 209 return {ConstChildIterator(getFirstChild()), ConstChildIterator()}; 210 } 211 212 /// Find the first node with a corresponding role. 213 const Node *findChild(NodeRole R) const; 214 Node *findChild(NodeRole R) { 215 return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R)); 216 } 217 218 protected: 219 using Node::Node; 220 221 private: 222 /// Append \p Child to the list of children and sets the parent pointer. 223 /// A very low-level operation that does not check any invariants, only used 224 /// by TreeBuilder and FactoryImpl. 225 /// EXPECTS: Role != Detached. 226 void appendChildLowLevel(Node *Child, NodeRole Role); 227 /// Similar but prepends. 228 void prependChildLowLevel(Node *Child, NodeRole Role); 229 230 /// Like the previous overloads, but does not set role for \p Child. 231 /// EXPECTS: Child->Role != Detached 232 void appendChildLowLevel(Node *Child); 233 void prependChildLowLevel(Node *Child); 234 friend class TreeBuilder; 235 friend class FactoryImpl; 236 237 /// Replace a range of children [Begin, End) with a list of 238 /// new nodes starting at \p New. 239 /// Only used by MutationsImpl to implement higher-level mutation operations. 240 /// (!) \p New can be null to model removal of the child range. 241 /// (!) \p End can be null to model one past the end. 242 /// (!) \p Begin can be null to model an append. 243 void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New); 244 friend class MutationsImpl; 245 246 Node *FirstChild = nullptr; 247 Node *LastChild = nullptr; 248 }; 249 250 /// A list of Elements separated or terminated by a fixed token. 251 /// 252 /// This type models the following grammar construct: 253 /// delimited-list(element, delimiter, termination, canBeEmpty) 254 class List : public Tree { 255 public: 256 template <typename Element> struct ElementAndDelimiter { 257 Element *element; 258 Leaf *delimiter; 259 }; 260 261 enum class TerminationKind { 262 Terminated, 263 MaybeTerminated, 264 Separated, 265 }; 266 267 using Tree::Tree; 268 static bool classof(const Node *N); 269 /// Returns the elements and corresponding delimiters. Missing elements 270 /// and delimiters are represented as null pointers. 271 /// 272 /// For example, in a separated list: 273 /// "a, b, c" <=> [("a" , ","), ("b" , "," ), ("c" , null)] 274 /// "a, , c" <=> [("a" , ","), (null, "," ), ("c" , null)] 275 /// "a, b c" <=> [("a" , ","), ("b" , null), ("c" , null)] 276 /// "a, b," <=> [("a" , ","), ("b" , "," ), (null, null)] 277 /// 278 /// In a terminated or maybe-terminated list: 279 /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )] 280 /// "a; ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )] 281 /// "a; b c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )] 282 /// "a; b; c" <=> [("a" , ";"), ("b" , ";" ), ("c" , null)] 283 std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters(); 284 285 /// Returns the elements of the list. Missing elements are represented 286 /// as null pointers in the same way as in the return value of 287 /// `getElementsAsNodesAndDelimiters()`. 288 std::vector<Node *> getElementsAsNodes(); 289 290 // These can't be implemented with the information we have! 291 292 /// Returns the appropriate delimiter for this list. 293 /// 294 /// Useful for discovering the correct delimiter to use when adding 295 /// elements to empty or one-element lists. 296 clang::tok::TokenKind getDelimiterTokenKind() const; 297 298 TerminationKind getTerminationKind() const; 299 300 /// Whether this list can be empty in syntactically and semantically correct 301 /// code. 302 /// 303 /// This list may be empty when the source code has errors even if 304 /// canBeEmpty() returns false. 305 bool canBeEmpty() const; 306 }; 307 308 } // namespace syntax 309 } // namespace clang 310 311 #endif 312