10b57cec5SDimitry Andric //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // Defines the basic structure of the syntax tree. There are two kinds of nodes: 9fcaf7f86SDimitry Andric // - leaf nodes correspond to tokens, 100b57cec5SDimitry Andric // - tree nodes correspond to language grammar constructs. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric // The tree is initially built from an AST. Each node of a newly built tree 13*bdd1243dSDimitry Andric // covers a continuous subrange of expanded tokens (i.e. tokens after 140b57cec5SDimitry Andric // preprocessing), the specific tokens coverered are stored in the leaf nodes of 150b57cec5SDimitry Andric // a tree. A post-order traversal of a tree will visit leaf nodes in an order 160b57cec5SDimitry Andric // corresponding the original order of expanded tokens. 170b57cec5SDimitry Andric // 180b57cec5SDimitry Andric // This is still work in progress and highly experimental, we leave room for 190b57cec5SDimitry Andric // ourselves to completely change the design and/or implementation. 200b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 2104eeddc0SDimitry Andric #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H 2204eeddc0SDimitry Andric #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric #include "clang/Basic/TokenKinds.h" 25fcaf7f86SDimitry Andric #include "clang/Tooling/Syntax/TokenManager.h" 26e8d8bef9SDimitry Andric #include "llvm/ADT/iterator.h" 270b57cec5SDimitry Andric #include "llvm/Support/Allocator.h" 280b57cec5SDimitry Andric #include <cstdint> 29fcaf7f86SDimitry Andric #include <vector> 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric namespace clang { 320b57cec5SDimitry Andric namespace syntax { 330b57cec5SDimitry Andric 34fcaf7f86SDimitry Andric /// A memory arena for syntax trees. 35fcaf7f86SDimitry Andric // FIXME: use BumpPtrAllocator directly. 360b57cec5SDimitry Andric class Arena { 370b57cec5SDimitry Andric public: getAllocator()38e8d8bef9SDimitry Andric llvm::BumpPtrAllocator &getAllocator() { return Allocator; } 39e8d8bef9SDimitry Andric private: 400b57cec5SDimitry Andric /// Keeps all the allocated nodes and their intermediate data structures. 410b57cec5SDimitry Andric llvm::BumpPtrAllocator Allocator; 420b57cec5SDimitry Andric }; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric class Tree; 450b57cec5SDimitry Andric class TreeBuilder; 46480093f4SDimitry Andric class FactoryImpl; 47480093f4SDimitry Andric class MutationsImpl; 48480093f4SDimitry Andric 490b57cec5SDimitry Andric enum class NodeKind : uint16_t; 500b57cec5SDimitry Andric enum class NodeRole : uint8_t; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or 530b57cec5SDimitry Andric /// a Tree (representing language constructrs). 540b57cec5SDimitry Andric class Node { 55e8d8bef9SDimitry Andric protected: 560b57cec5SDimitry Andric /// Newly created nodes are detached from a tree, parent and sibling links are 570b57cec5SDimitry Andric /// set when the node is added as a child to another one. 580b57cec5SDimitry Andric Node(NodeKind Kind); 59e8d8bef9SDimitry Andric /// Nodes are allocated on Arenas; the destructor is never called. 60e8d8bef9SDimitry Andric ~Node() = default; 610b57cec5SDimitry Andric 62e8d8bef9SDimitry Andric public: 63e8d8bef9SDimitry Andric /// Nodes cannot simply be copied without violating tree invariants. 64e8d8bef9SDimitry Andric Node(const Node &) = delete; 65e8d8bef9SDimitry Andric Node &operator=(const Node &) = delete; 66e8d8bef9SDimitry Andric /// Idiomatically, nodes are allocated on an Arena and never moved. 67e8d8bef9SDimitry Andric Node(Node &&) = delete; 68e8d8bef9SDimitry Andric Node &operator=(Node &&) = delete; 69e8d8bef9SDimitry Andric getKind()70e8d8bef9SDimitry Andric NodeKind getKind() const { return static_cast<NodeKind>(Kind); } getRole()71e8d8bef9SDimitry Andric NodeRole getRole() const { return static_cast<NodeRole>(Role); } 720b57cec5SDimitry Andric 73480093f4SDimitry Andric /// Whether the node is detached from a tree, i.e. does not have a parent. 74480093f4SDimitry Andric bool isDetached() const; 75480093f4SDimitry Andric /// Whether the node was created from the AST backed by the source code 76480093f4SDimitry Andric /// rather than added later through mutation APIs or created with factory 77480093f4SDimitry Andric /// functions. 78480093f4SDimitry Andric /// When this flag is true, all subtrees are also original. 79480093f4SDimitry Andric /// This flag is set to false on any modifications to the node or any of its 80480093f4SDimitry Andric /// subtrees, even if this simply involves swapping existing subtrees. isOriginal()81480093f4SDimitry Andric bool isOriginal() const { return Original; } 82480093f4SDimitry Andric /// If this function return false, the tree cannot be modified because there 83480093f4SDimitry Andric /// is no reasonable way to produce the corresponding textual replacements. 84480093f4SDimitry Andric /// This can happen when the node crosses macro expansion boundaries. 85480093f4SDimitry Andric /// 86480093f4SDimitry Andric /// Note that even if the node is not modifiable, its child nodes can be 87480093f4SDimitry Andric /// modifiable. canModify()88480093f4SDimitry Andric bool canModify() const { return CanModify; } 89480093f4SDimitry Andric getParent()90e8d8bef9SDimitry Andric const Tree *getParent() const { return Parent; } getParent()91e8d8bef9SDimitry Andric Tree *getParent() { return Parent; } 920b57cec5SDimitry Andric getNextSibling()93e8d8bef9SDimitry Andric const Node *getNextSibling() const { return NextSibling; } getNextSibling()94e8d8bef9SDimitry Andric Node *getNextSibling() { return NextSibling; } getPreviousSibling()95e8d8bef9SDimitry Andric const Node *getPreviousSibling() const { return PreviousSibling; } getPreviousSibling()96e8d8bef9SDimitry Andric Node *getPreviousSibling() { return PreviousSibling; } 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric /// Dumps the structure of a subtree. For debugging and testing purposes. 99fcaf7f86SDimitry Andric std::string dump(const TokenManager &SM) const; 1000b57cec5SDimitry Andric /// Dumps the tokens forming this subtree. 101fcaf7f86SDimitry Andric std::string dumpTokens(const TokenManager &SM) const; 1020b57cec5SDimitry Andric 103480093f4SDimitry Andric /// Asserts invariants on this node of the tree and its immediate children. 104480093f4SDimitry Andric /// Will not recurse into the subtree. No-op if NDEBUG is set. 105480093f4SDimitry Andric void assertInvariants() const; 106480093f4SDimitry Andric /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set. 107480093f4SDimitry Andric void assertInvariantsRecursive() const; 108480093f4SDimitry Andric 1090b57cec5SDimitry Andric private: 1100b57cec5SDimitry Andric // Tree is allowed to change the Parent link and Role. 1110b57cec5SDimitry Andric friend class Tree; 112480093f4SDimitry Andric // TreeBuilder is allowed to set the Original and CanModify flags. 113480093f4SDimitry Andric friend class TreeBuilder; 114480093f4SDimitry Andric // MutationsImpl sets roles and CanModify flag. 115480093f4SDimitry Andric friend class MutationsImpl; 116480093f4SDimitry Andric // FactoryImpl sets CanModify flag. 117480093f4SDimitry Andric friend class FactoryImpl; 1180b57cec5SDimitry Andric 1195ffd83dbSDimitry Andric void setRole(NodeRole NR); 1205ffd83dbSDimitry Andric 1210b57cec5SDimitry Andric Tree *Parent; 1220b57cec5SDimitry Andric Node *NextSibling; 123e8d8bef9SDimitry Andric Node *PreviousSibling; 1240b57cec5SDimitry Andric unsigned Kind : 16; 1250b57cec5SDimitry Andric unsigned Role : 8; 126480093f4SDimitry Andric unsigned Original : 1; 127480093f4SDimitry Andric unsigned CanModify : 1; 1280b57cec5SDimitry Andric }; 1290b57cec5SDimitry Andric 130fcaf7f86SDimitry Andric /// A leaf node points to a single token. 131fcaf7f86SDimitry Andric // FIXME: add TokenKind field (borrow some bits from the Node::kind). 1320b57cec5SDimitry Andric class Leaf final : public Node { 1330b57cec5SDimitry Andric public: 134fcaf7f86SDimitry Andric Leaf(TokenManager::Key K); 1350b57cec5SDimitry Andric static bool classof(const Node *N); 1360b57cec5SDimitry Andric getTokenKey()137fcaf7f86SDimitry Andric TokenManager::Key getTokenKey() const { return K; } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric private: 140fcaf7f86SDimitry Andric TokenManager::Key K; 1410b57cec5SDimitry Andric }; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric /// A node that has children and represents a syntactic language construct. 1440b57cec5SDimitry Andric class Tree : public Node { 145e8d8bef9SDimitry Andric /// Iterator over children (common base for const/non-const). 146e8d8bef9SDimitry Andric /// Not invalidated by tree mutations (holds a stable node pointer). 147e8d8bef9SDimitry Andric template <typename DerivedT, typename NodeT> 148e8d8bef9SDimitry Andric class ChildIteratorBase 149e8d8bef9SDimitry Andric : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag, 150e8d8bef9SDimitry Andric NodeT> { 151e8d8bef9SDimitry Andric protected: 152e8d8bef9SDimitry Andric NodeT *N = nullptr; 153e8d8bef9SDimitry Andric using Base = ChildIteratorBase; 154e8d8bef9SDimitry Andric 1550b57cec5SDimitry Andric public: 156e8d8bef9SDimitry Andric ChildIteratorBase() = default; ChildIteratorBase(NodeT * N)157e8d8bef9SDimitry Andric explicit ChildIteratorBase(NodeT *N) : N(N) {} 1580b57cec5SDimitry Andric 15904eeddc0SDimitry Andric friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) { 16004eeddc0SDimitry Andric return LHS.N == RHS.N; 16104eeddc0SDimitry Andric } 16204eeddc0SDimitry Andric 163e8d8bef9SDimitry Andric NodeT &operator*() const { return *N; } 164e8d8bef9SDimitry Andric DerivedT &operator++() { 165e8d8bef9SDimitry Andric N = N->getNextSibling(); 166e8d8bef9SDimitry Andric return *static_cast<DerivedT *>(this); 167480093f4SDimitry Andric } 168480093f4SDimitry Andric 169e8d8bef9SDimitry Andric /// Truthy if valid (not past-the-end). 170e8d8bef9SDimitry Andric /// This allows: if (auto It = find_if(N.children(), ...) ) 171e8d8bef9SDimitry Andric explicit operator bool() const { return N != nullptr; } 172e8d8bef9SDimitry Andric /// The element, or nullptr if past-the-end. asPointer()173e8d8bef9SDimitry Andric NodeT *asPointer() const { return N; } 174e8d8bef9SDimitry Andric }; 175e8d8bef9SDimitry Andric 176e8d8bef9SDimitry Andric public: 177e8d8bef9SDimitry Andric static bool classof(const Node *N); 178e8d8bef9SDimitry Andric getFirstChild()179e8d8bef9SDimitry Andric Node *getFirstChild() { return FirstChild; } getFirstChild()180e8d8bef9SDimitry Andric const Node *getFirstChild() const { return FirstChild; } getLastChild()181e8d8bef9SDimitry Andric Node *getLastChild() { return LastChild; } getLastChild()182e8d8bef9SDimitry Andric const Node *getLastChild() const { return LastChild; } 183e8d8bef9SDimitry Andric 184e8d8bef9SDimitry Andric const Leaf *findFirstLeaf() const; findFirstLeaf()185e8d8bef9SDimitry Andric Leaf *findFirstLeaf() { 186e8d8bef9SDimitry Andric return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf()); 187e8d8bef9SDimitry Andric } 188e8d8bef9SDimitry Andric 189e8d8bef9SDimitry Andric const Leaf *findLastLeaf() const; findLastLeaf()190e8d8bef9SDimitry Andric Leaf *findLastLeaf() { 191e8d8bef9SDimitry Andric return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf()); 192e8d8bef9SDimitry Andric } 193e8d8bef9SDimitry Andric 194e8d8bef9SDimitry Andric /// child_iterator is not invalidated by mutations. 195e8d8bef9SDimitry Andric struct ChildIterator : ChildIteratorBase<ChildIterator, Node> { 196e8d8bef9SDimitry Andric using Base::ChildIteratorBase; 197e8d8bef9SDimitry Andric }; 198e8d8bef9SDimitry Andric struct ConstChildIterator 199e8d8bef9SDimitry Andric : ChildIteratorBase<ConstChildIterator, const Node> { 200e8d8bef9SDimitry Andric using Base::ChildIteratorBase; 201e8d8bef9SDimitry Andric ConstChildIterator() = default; ConstChildIteratorConstChildIterator202e8d8bef9SDimitry Andric ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {} 203e8d8bef9SDimitry Andric }; 204e8d8bef9SDimitry Andric getChildren()205e8d8bef9SDimitry Andric llvm::iterator_range<ChildIterator> getChildren() { 206e8d8bef9SDimitry Andric return {ChildIterator(getFirstChild()), ChildIterator()}; 207e8d8bef9SDimitry Andric } getChildren()208e8d8bef9SDimitry Andric llvm::iterator_range<ConstChildIterator> getChildren() const { 209e8d8bef9SDimitry Andric return {ConstChildIterator(getFirstChild()), ConstChildIterator()}; 210e8d8bef9SDimitry Andric } 211e8d8bef9SDimitry Andric 212e8d8bef9SDimitry Andric /// Find the first node with a corresponding role. 213e8d8bef9SDimitry Andric const Node *findChild(NodeRole R) const; findChild(NodeRole R)214e8d8bef9SDimitry Andric Node *findChild(NodeRole R) { 215e8d8bef9SDimitry Andric return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R)); 216e8d8bef9SDimitry Andric } 217480093f4SDimitry Andric 2180b57cec5SDimitry Andric protected: 219e8d8bef9SDimitry Andric using Node::Node; 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric private: 222e8d8bef9SDimitry Andric /// Append \p Child to the list of children and sets the parent pointer. 2230b57cec5SDimitry Andric /// A very low-level operation that does not check any invariants, only used 224480093f4SDimitry Andric /// by TreeBuilder and FactoryImpl. 2255ffd83dbSDimitry Andric /// EXPECTS: Role != Detached. 226e8d8bef9SDimitry Andric void appendChildLowLevel(Node *Child, NodeRole Role); 227e8d8bef9SDimitry Andric /// Similar but prepends. 2280b57cec5SDimitry Andric void prependChildLowLevel(Node *Child, NodeRole Role); 229e8d8bef9SDimitry Andric 230e8d8bef9SDimitry Andric /// Like the previous overloads, but does not set role for \p Child. 2315ffd83dbSDimitry Andric /// EXPECTS: Child->Role != Detached 232e8d8bef9SDimitry Andric void appendChildLowLevel(Node *Child); 2335ffd83dbSDimitry Andric void prependChildLowLevel(Node *Child); 2340b57cec5SDimitry Andric friend class TreeBuilder; 235480093f4SDimitry Andric friend class FactoryImpl; 236480093f4SDimitry Andric 237e8d8bef9SDimitry Andric /// Replace a range of children [Begin, End) with a list of 238480093f4SDimitry Andric /// new nodes starting at \p New. 239480093f4SDimitry Andric /// Only used by MutationsImpl to implement higher-level mutation operations. 240480093f4SDimitry Andric /// (!) \p New can be null to model removal of the child range. 241e8d8bef9SDimitry Andric /// (!) \p End can be null to model one past the end. 242e8d8bef9SDimitry Andric /// (!) \p Begin can be null to model an append. 243e8d8bef9SDimitry Andric void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New); 244480093f4SDimitry Andric friend class MutationsImpl; 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric Node *FirstChild = nullptr; 247e8d8bef9SDimitry Andric Node *LastChild = nullptr; 248e8d8bef9SDimitry Andric }; 249e8d8bef9SDimitry Andric 250e8d8bef9SDimitry Andric /// A list of Elements separated or terminated by a fixed token. 251e8d8bef9SDimitry Andric /// 252e8d8bef9SDimitry Andric /// This type models the following grammar construct: 253e8d8bef9SDimitry Andric /// delimited-list(element, delimiter, termination, canBeEmpty) 254e8d8bef9SDimitry Andric class List : public Tree { 255e8d8bef9SDimitry Andric public: 256e8d8bef9SDimitry Andric template <typename Element> struct ElementAndDelimiter { 257e8d8bef9SDimitry Andric Element *element; 258e8d8bef9SDimitry Andric Leaf *delimiter; 259e8d8bef9SDimitry Andric }; 260e8d8bef9SDimitry Andric 261e8d8bef9SDimitry Andric enum class TerminationKind { 262e8d8bef9SDimitry Andric Terminated, 263e8d8bef9SDimitry Andric MaybeTerminated, 264e8d8bef9SDimitry Andric Separated, 265e8d8bef9SDimitry Andric }; 266e8d8bef9SDimitry Andric 267e8d8bef9SDimitry Andric using Tree::Tree; 268e8d8bef9SDimitry Andric static bool classof(const Node *N); 269e8d8bef9SDimitry Andric /// Returns the elements and corresponding delimiters. Missing elements 270e8d8bef9SDimitry Andric /// and delimiters are represented as null pointers. 271e8d8bef9SDimitry Andric /// 272e8d8bef9SDimitry Andric /// For example, in a separated list: 273e8d8bef9SDimitry Andric /// "a, b, c" <=> [("a" , ","), ("b" , "," ), ("c" , null)] 274e8d8bef9SDimitry Andric /// "a, , c" <=> [("a" , ","), (null, "," ), ("c" , null)] 275e8d8bef9SDimitry Andric /// "a, b c" <=> [("a" , ","), ("b" , null), ("c" , null)] 276e8d8bef9SDimitry Andric /// "a, b," <=> [("a" , ","), ("b" , "," ), (null, null)] 277e8d8bef9SDimitry Andric /// 278e8d8bef9SDimitry Andric /// In a terminated or maybe-terminated list: 279e8d8bef9SDimitry Andric /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )] 280e8d8bef9SDimitry Andric /// "a; ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )] 281e8d8bef9SDimitry Andric /// "a; b c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )] 282e8d8bef9SDimitry Andric /// "a; b; c" <=> [("a" , ";"), ("b" , ";" ), ("c" , null)] 283e8d8bef9SDimitry Andric std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters(); 284e8d8bef9SDimitry Andric 285e8d8bef9SDimitry Andric /// Returns the elements of the list. Missing elements are represented 286e8d8bef9SDimitry Andric /// as null pointers in the same way as in the return value of 287e8d8bef9SDimitry Andric /// `getElementsAsNodesAndDelimiters()`. 288e8d8bef9SDimitry Andric std::vector<Node *> getElementsAsNodes(); 289e8d8bef9SDimitry Andric 290e8d8bef9SDimitry Andric // These can't be implemented with the information we have! 291e8d8bef9SDimitry Andric 292e8d8bef9SDimitry Andric /// Returns the appropriate delimiter for this list. 293e8d8bef9SDimitry Andric /// 294e8d8bef9SDimitry Andric /// Useful for discovering the correct delimiter to use when adding 295e8d8bef9SDimitry Andric /// elements to empty or one-element lists. 296e8d8bef9SDimitry Andric clang::tok::TokenKind getDelimiterTokenKind() const; 297e8d8bef9SDimitry Andric 298e8d8bef9SDimitry Andric TerminationKind getTerminationKind() const; 299e8d8bef9SDimitry Andric 300e8d8bef9SDimitry Andric /// Whether this list can be empty in syntactically and semantically correct 301e8d8bef9SDimitry Andric /// code. 302e8d8bef9SDimitry Andric /// 303e8d8bef9SDimitry Andric /// This list may be empty when the source code has errors even if 304e8d8bef9SDimitry Andric /// canBeEmpty() returns false. 305e8d8bef9SDimitry Andric bool canBeEmpty() const; 3060b57cec5SDimitry Andric }; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric } // namespace syntax 3090b57cec5SDimitry Andric } // namespace clang 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric #endif 312