xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/Syntax/Tree.cpp (revision 81b22a9892b1047e551fc3f1d6d58031bc59a4c3)
1  //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  #include "clang/Tooling/Syntax/Tree.h"
9  #include "clang/Basic/TokenKinds.h"
10  #include "clang/Tooling/Syntax/Nodes.h"
11  #include "llvm/ADT/ArrayRef.h"
12  #include "llvm/ADT/STLExtras.h"
13  #include "llvm/Support/Casting.h"
14  #include <cassert>
15  
16  using namespace clang;
17  
18  namespace {
19  static void traverse(const syntax::Node *N,
20                       llvm::function_ref<void(const syntax::Node *)> Visit) {
21    if (auto *T = dyn_cast<syntax::Tree>(N)) {
22      for (const syntax::Node &C : T->getChildren())
23        traverse(&C, Visit);
24    }
25    Visit(N);
26  }
27  static void traverse(syntax::Node *N,
28                       llvm::function_ref<void(syntax::Node *)> Visit) {
29    traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
30      Visit(const_cast<syntax::Node *>(N));
31    });
32  }
33  } // namespace
34  
35  syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
36                       const TokenBuffer &Tokens)
37      : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
38  
39  const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
40    return Tokens;
41  }
42  
43  std::pair<FileID, ArrayRef<syntax::Token>>
44  syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
45    auto FID = SourceMgr.createFileID(std::move(Input));
46    auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
47    assert(It.second && "duplicate FileID");
48    return {FID, It.first->second};
49  }
50  
51  syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
52    assert(Tok != nullptr);
53  }
54  
55  syntax::Node::Node(NodeKind Kind)
56      : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
57        Kind(static_cast<unsigned>(Kind)), Role(0), Original(false),
58        CanModify(false) {
59    this->setRole(NodeRole::Detached);
60  }
61  
62  bool syntax::Node::isDetached() const {
63    return getRole() == NodeRole::Detached;
64  }
65  
66  void syntax::Node::setRole(NodeRole NR) {
67    this->Role = static_cast<unsigned>(NR);
68  }
69  
70  void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) {
71    assert(Child->getRole() == NodeRole::Detached);
72    assert(Role != NodeRole::Detached);
73  
74    Child->setRole(Role);
75    appendChildLowLevel(Child);
76  }
77  
78  void syntax::Tree::appendChildLowLevel(Node *Child) {
79    assert(Child->Parent == nullptr);
80    assert(Child->NextSibling == nullptr);
81    assert(Child->PreviousSibling == nullptr);
82    assert(Child->getRole() != NodeRole::Detached);
83  
84    Child->Parent = this;
85    if (this->LastChild) {
86      Child->PreviousSibling = this->LastChild;
87      this->LastChild->NextSibling = Child;
88    } else
89      this->FirstChild = Child;
90  
91    this->LastChild = Child;
92  }
93  
94  void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
95    assert(Child->getRole() == NodeRole::Detached);
96    assert(Role != NodeRole::Detached);
97  
98    Child->setRole(Role);
99    prependChildLowLevel(Child);
100  }
101  
102  void syntax::Tree::prependChildLowLevel(Node *Child) {
103    assert(Child->Parent == nullptr);
104    assert(Child->NextSibling == nullptr);
105    assert(Child->PreviousSibling == nullptr);
106    assert(Child->getRole() != NodeRole::Detached);
107  
108    Child->Parent = this;
109    if (this->FirstChild) {
110      Child->NextSibling = this->FirstChild;
111      this->FirstChild->PreviousSibling = Child;
112    } else
113      this->LastChild = Child;
114  
115    this->FirstChild = Child;
116  }
117  
118  void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End,
119                                               Node *New) {
120    assert((!Begin || Begin->Parent == this) &&
121           "`Begin` is not a child of `this`.");
122    assert((!End || End->Parent == this) && "`End` is not a child of `this`.");
123    assert(canModify() && "Cannot modify `this`.");
124  
125  #ifndef NDEBUG
126    for (auto *N = New; N; N = N->NextSibling) {
127      assert(N->Parent == nullptr);
128      assert(N->getRole() != NodeRole::Detached && "Roles must be set");
129      // FIXME: sanity-check the role.
130    }
131  
132    auto Reachable = [](Node *From, Node *N) {
133      if (!N)
134        return true;
135      for (auto *It = From; It; It = It->NextSibling)
136        if (It == N)
137          return true;
138      return false;
139    };
140    assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable.");
141    assert(Reachable(Begin, End) && "`End` is not after `Begin`.");
142  #endif
143  
144    if (!New && Begin == End)
145      return;
146  
147    // Mark modification.
148    for (auto *T = this; T && T->Original; T = T->Parent)
149      T->Original = false;
150  
151    // Save the node before the range to be removed. Later we insert the `New`
152    // range after this node.
153    auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild;
154  
155    // Detach old nodes.
156    for (auto *N = Begin; N != End;) {
157      auto *Next = N->NextSibling;
158  
159      N->setRole(NodeRole::Detached);
160      N->Parent = nullptr;
161      N->NextSibling = nullptr;
162      N->PreviousSibling = nullptr;
163      if (N->Original)
164        traverse(N, [](Node *C) { C->Original = false; });
165  
166      N = Next;
167    }
168  
169    // Attach new range.
170    auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild;
171    auto *&NewLast = End ? End->PreviousSibling : LastChild;
172  
173    if (!New) {
174      NewFirst = End;
175      NewLast = BeforeBegin;
176      return;
177    }
178  
179    New->PreviousSibling = BeforeBegin;
180    NewFirst = New;
181  
182    Node *LastInNew;
183    for (auto *N = New; N != nullptr; N = N->NextSibling) {
184      LastInNew = N;
185      N->Parent = this;
186    }
187    LastInNew->NextSibling = End;
188    NewLast = LastInNew;
189  }
190  
191  namespace {
192  static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
193                       const SourceManager &SM) {
194    assert(L);
195    const auto *Token = L->getToken();
196    assert(Token);
197    // Handle 'eof' separately, calling text() on it produces an empty string.
198    if (Token->kind() == tok::eof)
199      OS << "<eof>";
200    else
201      OS << Token->text(SM);
202  }
203  
204  static void dumpNode(raw_ostream &OS, const syntax::Node *N,
205                       const SourceManager &SM, std::vector<bool> IndentMask) {
206    auto DumpExtraInfo = [&OS](const syntax::Node *N) {
207      if (N->getRole() != syntax::NodeRole::Unknown)
208        OS << " " << N->getRole();
209      if (!N->isOriginal())
210        OS << " synthesized";
211      if (!N->canModify())
212        OS << " unmodifiable";
213    };
214  
215    assert(N);
216    if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
217      OS << "'";
218      dumpLeaf(OS, L, SM);
219      OS << "'";
220      DumpExtraInfo(N);
221      OS << "\n";
222      return;
223    }
224  
225    const auto *T = cast<syntax::Tree>(N);
226    OS << T->getKind();
227    DumpExtraInfo(N);
228    OS << "\n";
229  
230    for (const syntax::Node &It : T->getChildren()) {
231      for (bool Filled : IndentMask) {
232        if (Filled)
233          OS << "| ";
234        else
235          OS << "  ";
236      }
237      if (!It.getNextSibling()) {
238        OS << "`-";
239        IndentMask.push_back(false);
240      } else {
241        OS << "|-";
242        IndentMask.push_back(true);
243      }
244      dumpNode(OS, &It, SM, IndentMask);
245      IndentMask.pop_back();
246    }
247  }
248  } // namespace
249  
250  std::string syntax::Node::dump(const SourceManager &SM) const {
251    std::string Str;
252    llvm::raw_string_ostream OS(Str);
253    dumpNode(OS, this, SM, /*IndentMask=*/{});
254    return std::move(OS.str());
255  }
256  
257  std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
258    std::string Storage;
259    llvm::raw_string_ostream OS(Storage);
260    traverse(this, [&](const syntax::Node *N) {
261      if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
262        dumpLeaf(OS, L, SM);
263        OS << " ";
264      }
265    });
266    return OS.str();
267  }
268  
269  void syntax::Node::assertInvariants() const {
270  #ifndef NDEBUG
271    if (isDetached())
272      assert(getParent() == nullptr);
273    else
274      assert(getParent() != nullptr);
275  
276    const auto *T = dyn_cast<Tree>(this);
277    if (!T)
278      return;
279    for (const Node &C : T->getChildren()) {
280      if (T->isOriginal())
281        assert(C.isOriginal());
282      assert(!C.isDetached());
283      assert(C.getParent() == T);
284      const auto *Next = C.getNextSibling();
285      assert(!Next || &C == Next->getPreviousSibling());
286      if (!C.getNextSibling())
287        assert(&C == T->getLastChild() &&
288               "Last child is reachable by advancing from the first child.");
289    }
290  
291    const auto *L = dyn_cast<List>(T);
292    if (!L)
293      return;
294    for (const Node &C : T->getChildren()) {
295      assert(C.getRole() == NodeRole::ListElement ||
296             C.getRole() == NodeRole::ListDelimiter);
297      if (C.getRole() == NodeRole::ListDelimiter) {
298        assert(isa<Leaf>(C));
299        assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
300      }
301    }
302  
303  #endif
304  }
305  
306  void syntax::Node::assertInvariantsRecursive() const {
307  #ifndef NDEBUG
308    traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
309  #endif
310  }
311  
312  const syntax::Leaf *syntax::Tree::findFirstLeaf() const {
313    for (const Node &C : getChildren()) {
314      if (const auto *L = dyn_cast<syntax::Leaf>(&C))
315        return L;
316      if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf())
317        return L;
318    }
319    return nullptr;
320  }
321  
322  const syntax::Leaf *syntax::Tree::findLastLeaf() const {
323    for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) {
324      if (const auto *L = dyn_cast<syntax::Leaf>(C))
325        return L;
326      if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf())
327        return L;
328    }
329    return nullptr;
330  }
331  
332  const syntax::Node *syntax::Tree::findChild(NodeRole R) const {
333    for (const Node &C : getChildren()) {
334      if (C.getRole() == R)
335        return &C;
336    }
337    return nullptr;
338  }
339  
340  std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
341  syntax::List::getElementsAsNodesAndDelimiters() {
342    if (!getFirstChild())
343      return {};
344  
345    std::vector<syntax::List::ElementAndDelimiter<Node>> Children;
346    syntax::Node *ElementWithoutDelimiter = nullptr;
347    for (Node &C : getChildren()) {
348      switch (C.getRole()) {
349      case syntax::NodeRole::ListElement: {
350        if (ElementWithoutDelimiter) {
351          Children.push_back({ElementWithoutDelimiter, nullptr});
352        }
353        ElementWithoutDelimiter = &C;
354        break;
355      }
356      case syntax::NodeRole::ListDelimiter: {
357        Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)});
358        ElementWithoutDelimiter = nullptr;
359        break;
360      }
361      default:
362        llvm_unreachable(
363            "A list can have only elements and delimiters as children.");
364      }
365    }
366  
367    switch (getTerminationKind()) {
368    case syntax::List::TerminationKind::Separated: {
369      Children.push_back({ElementWithoutDelimiter, nullptr});
370      break;
371    }
372    case syntax::List::TerminationKind::Terminated:
373    case syntax::List::TerminationKind::MaybeTerminated: {
374      if (ElementWithoutDelimiter) {
375        Children.push_back({ElementWithoutDelimiter, nullptr});
376      }
377      break;
378    }
379    }
380  
381    return Children;
382  }
383  
384  // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
385  // ignoring delimiters
386  std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
387    if (!getFirstChild())
388      return {};
389  
390    std::vector<syntax::Node *> Children;
391    syntax::Node *ElementWithoutDelimiter = nullptr;
392    for (Node &C : getChildren()) {
393      switch (C.getRole()) {
394      case syntax::NodeRole::ListElement: {
395        if (ElementWithoutDelimiter) {
396          Children.push_back(ElementWithoutDelimiter);
397        }
398        ElementWithoutDelimiter = &C;
399        break;
400      }
401      case syntax::NodeRole::ListDelimiter: {
402        Children.push_back(ElementWithoutDelimiter);
403        ElementWithoutDelimiter = nullptr;
404        break;
405      }
406      default:
407        llvm_unreachable("A list has only elements or delimiters.");
408      }
409    }
410  
411    switch (getTerminationKind()) {
412    case syntax::List::TerminationKind::Separated: {
413      Children.push_back(ElementWithoutDelimiter);
414      break;
415    }
416    case syntax::List::TerminationKind::Terminated:
417    case syntax::List::TerminationKind::MaybeTerminated: {
418      if (ElementWithoutDelimiter) {
419        Children.push_back(ElementWithoutDelimiter);
420      }
421      break;
422    }
423    }
424  
425    return Children;
426  }
427  
428  clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const {
429    switch (this->getKind()) {
430    case NodeKind::NestedNameSpecifier:
431      return clang::tok::coloncolon;
432    case NodeKind::CallArguments:
433    case NodeKind::ParameterDeclarationList:
434    case NodeKind::DeclaratorList:
435      return clang::tok::comma;
436    default:
437      llvm_unreachable("This is not a subclass of List, thus "
438                       "getDelimiterTokenKind() cannot be called");
439    }
440  }
441  
442  syntax::List::TerminationKind syntax::List::getTerminationKind() const {
443    switch (this->getKind()) {
444    case NodeKind::NestedNameSpecifier:
445      return TerminationKind::Terminated;
446    case NodeKind::CallArguments:
447    case NodeKind::ParameterDeclarationList:
448    case NodeKind::DeclaratorList:
449      return TerminationKind::Separated;
450    default:
451      llvm_unreachable("This is not a subclass of List, thus "
452                       "getTerminationKind() cannot be called");
453    }
454  }
455  
456  bool syntax::List::canBeEmpty() const {
457    switch (this->getKind()) {
458    case NodeKind::NestedNameSpecifier:
459      return false;
460    case NodeKind::CallArguments:
461      return true;
462    case NodeKind::ParameterDeclarationList:
463      return true;
464    case NodeKind::DeclaratorList:
465      return true;
466    default:
467      llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
468                       "cannot be called");
469    }
470  }
471