1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Defines the basic structure of the syntax tree. There are two kinds of nodes: 9 // - leaf nodes correspond to a token in the expanded token stream, 10 // - tree nodes correspond to language grammar constructs. 11 // 12 // The tree is initially built from an AST. Each node of a newly built tree 13 // covers a continous subrange of expanded tokens (i.e. tokens after 14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of 15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order 16 // corresponding the original order of expanded tokens. 17 // 18 // This is still work in progress and highly experimental, we leave room for 19 // ourselves to completely change the design and/or implementation. 20 //===----------------------------------------------------------------------===// 21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H 22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H 23 24 #include "clang/Basic/LangOptions.h" 25 #include "clang/Basic/SourceLocation.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/Basic/TokenKinds.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/Support/Allocator.h" 32 #include <cstdint> 33 34 namespace clang { 35 namespace syntax { 36 37 /// A memory arena for syntax trees. Also tracks the underlying token buffers, 38 /// source manager, etc. 39 class Arena { 40 public: 41 Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, 42 TokenBuffer Tokens); 43 44 const SourceManager &sourceManager() const { return SourceMgr; } 45 const LangOptions &langOptions() const { return LangOpts; } 46 47 const TokenBuffer &tokenBuffer() const; 48 llvm::BumpPtrAllocator &allocator() { return Allocator; } 49 50 /// Add \p Buffer to the underlying source manager, tokenize it and store the 51 /// resulting tokens. Useful when there is a need to materialize tokens that 52 /// were not written in user code. 53 std::pair<FileID, llvm::ArrayRef<syntax::Token>> 54 lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer); 55 56 private: 57 SourceManager &SourceMgr; 58 const LangOptions &LangOpts; 59 TokenBuffer Tokens; 60 /// IDs and storage for additional tokenized files. 61 llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens; 62 /// Keeps all the allocated nodes and their intermediate data structures. 63 llvm::BumpPtrAllocator Allocator; 64 }; 65 66 class Tree; 67 class TreeBuilder; 68 enum class NodeKind : uint16_t; 69 enum class NodeRole : uint8_t; 70 71 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or 72 /// a Tree (representing language constructrs). 73 class Node { 74 public: 75 /// Newly created nodes are detached from a tree, parent and sibling links are 76 /// set when the node is added as a child to another one. 77 Node(NodeKind Kind); 78 79 NodeKind kind() const { return static_cast<NodeKind>(Kind); } 80 NodeRole role() const { return static_cast<NodeRole>(Role); } 81 82 const Tree *parent() const { return Parent; } 83 Tree *parent() { return Parent; } 84 85 const Node *nextSibling() const { return NextSibling; } 86 Node *nextSibling() { return NextSibling; } 87 88 /// Dumps the structure of a subtree. For debugging and testing purposes. 89 std::string dump(const Arena &A) const; 90 /// Dumps the tokens forming this subtree. 91 std::string dumpTokens(const Arena &A) const; 92 93 private: 94 // Tree is allowed to change the Parent link and Role. 95 friend class Tree; 96 97 Tree *Parent; 98 Node *NextSibling; 99 unsigned Kind : 16; 100 unsigned Role : 8; 101 }; 102 103 /// A leaf node points to a single token inside the expanded token stream. 104 class Leaf final : public Node { 105 public: 106 Leaf(const syntax::Token *T); 107 static bool classof(const Node *N); 108 109 const syntax::Token *token() const { return Tok; } 110 111 private: 112 const syntax::Token *Tok; 113 }; 114 115 /// A node that has children and represents a syntactic language construct. 116 class Tree : public Node { 117 public: 118 using Node::Node; 119 static bool classof(const Node *N); 120 121 Node *firstChild() { return FirstChild; } 122 const Node *firstChild() const { return FirstChild; } 123 124 protected: 125 /// Find the first node with a corresponding role. 126 syntax::Node *findChild(NodeRole R); 127 128 private: 129 /// Prepend \p Child to the list of children and and sets the parent pointer. 130 /// A very low-level operation that does not check any invariants, only used 131 /// by TreeBuilder. 132 /// EXPECTS: Role != NodeRoleDetached. 133 void prependChildLowLevel(Node *Child, NodeRole Role); 134 friend class TreeBuilder; 135 136 Node *FirstChild = nullptr; 137 }; 138 139 } // namespace syntax 140 } // namespace clang 141 142 #endif 143