xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/Syntax/Tree.cpp (revision e9a994639b2af232f994ba2ad23ca45a17718d2b)
1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tree.h"
9 #include "clang/Basic/TokenKinds.h"
10 #include "clang/Tooling/Syntax/Nodes.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/Casting.h"
14 #include <cassert>
15 
16 using namespace clang;
17 
18 namespace {
19 static void traverse(const syntax::Node *N,
20                      llvm::function_ref<void(const syntax::Node *)> Visit) {
21   if (auto *T = dyn_cast<syntax::Tree>(N)) {
22     for (const syntax::Node &C : T->getChildren())
23       traverse(&C, Visit);
24   }
25   Visit(N);
26 }
27 static void traverse(syntax::Node *N,
28                      llvm::function_ref<void(syntax::Node *)> Visit) {
29   traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
30     Visit(const_cast<syntax::Node *>(N));
31   });
32 }
33 } // namespace
34 
35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
36                      const TokenBuffer &Tokens)
37     : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
38 
39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
40   return Tokens;
41 }
42 
43 std::pair<FileID, ArrayRef<syntax::Token>>
44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
45   auto FID = SourceMgr.createFileID(std::move(Input));
46   auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
47   assert(It.second && "duplicate FileID");
48   return {FID, It.first->second};
49 }
50 
51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
52   assert(Tok != nullptr);
53 }
54 
55 syntax::Node::Node(NodeKind Kind)
56     : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
57       Kind(static_cast<unsigned>(Kind)), Role(0), Original(false),
58       CanModify(false) {
59   this->setRole(NodeRole::Detached);
60 }
61 
62 bool syntax::Node::isDetached() const {
63   return getRole() == NodeRole::Detached;
64 }
65 
66 void syntax::Node::setRole(NodeRole NR) {
67   this->Role = static_cast<unsigned>(NR);
68 }
69 
70 void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) {
71   assert(Child->getRole() == NodeRole::Detached);
72   assert(Role != NodeRole::Detached);
73 
74   Child->setRole(Role);
75   appendChildLowLevel(Child);
76 }
77 
78 void syntax::Tree::appendChildLowLevel(Node *Child) {
79   assert(Child->Parent == nullptr);
80   assert(Child->NextSibling == nullptr);
81   assert(Child->PreviousSibling == nullptr);
82   assert(Child->getRole() != NodeRole::Detached);
83 
84   Child->Parent = this;
85   if (this->LastChild) {
86     Child->PreviousSibling = this->LastChild;
87     this->LastChild->NextSibling = Child;
88   } else
89     this->FirstChild = Child;
90 
91   this->LastChild = Child;
92 }
93 
94 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
95   assert(Child->getRole() == NodeRole::Detached);
96   assert(Role != NodeRole::Detached);
97 
98   Child->setRole(Role);
99   prependChildLowLevel(Child);
100 }
101 
102 void syntax::Tree::prependChildLowLevel(Node *Child) {
103   assert(Child->Parent == nullptr);
104   assert(Child->NextSibling == nullptr);
105   assert(Child->PreviousSibling == nullptr);
106   assert(Child->getRole() != NodeRole::Detached);
107 
108   Child->Parent = this;
109   if (this->FirstChild) {
110     Child->NextSibling = this->FirstChild;
111     this->FirstChild->PreviousSibling = Child;
112   } else
113     this->LastChild = Child;
114 
115   this->FirstChild = Child;
116 }
117 
118 void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End,
119                                              Node *New) {
120   assert((!Begin || Begin->Parent == this) &&
121          "`Begin` is not a child of `this`.");
122   assert((!End || End->Parent == this) && "`End` is not a child of `this`.");
123   assert(canModify() && "Cannot modify `this`.");
124 
125 #ifndef NDEBUG
126   for (auto *N = New; N; N = N->NextSibling) {
127     assert(N->Parent == nullptr);
128     assert(N->getRole() != NodeRole::Detached && "Roles must be set");
129     // FIXME: sanity-check the role.
130   }
131 
132   auto Reachable = [](Node *From, Node *N) {
133     if (!N)
134       return true;
135     for (auto *It = From; It; It = It->NextSibling)
136       if (It == N)
137         return true;
138     return false;
139   };
140   assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable.");
141   assert(Reachable(Begin, End) && "`End` is not after `Begin`.");
142 #endif
143 
144   if (!New && Begin == End)
145     return;
146 
147   // Mark modification.
148   for (auto *T = this; T && T->Original; T = T->Parent)
149     T->Original = false;
150 
151   // Save the node before the range to be removed. Later we insert the `New`
152   // range after this node.
153   auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild;
154 
155   // Detach old nodes.
156   for (auto *N = Begin; N != End;) {
157     auto *Next = N->NextSibling;
158 
159     N->setRole(NodeRole::Detached);
160     N->Parent = nullptr;
161     N->NextSibling = nullptr;
162     N->PreviousSibling = nullptr;
163     if (N->Original)
164       traverse(N, [](Node *C) { C->Original = false; });
165 
166     N = Next;
167   }
168 
169   // Attach new range.
170   auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild;
171   auto *&NewLast = End ? End->PreviousSibling : LastChild;
172 
173   if (!New) {
174     NewFirst = End;
175     NewLast = BeforeBegin;
176     return;
177   }
178 
179   New->PreviousSibling = BeforeBegin;
180   NewFirst = New;
181 
182   Node *LastInNew;
183   for (auto *N = New; N != nullptr; N = N->NextSibling) {
184     LastInNew = N;
185     N->Parent = this;
186   }
187   LastInNew->NextSibling = End;
188   NewLast = LastInNew;
189 }
190 
191 namespace {
192 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
193                      const SourceManager &SM) {
194   assert(L);
195   const auto *Token = L->getToken();
196   assert(Token);
197   // Handle 'eof' separately, calling text() on it produces an empty string.
198   if (Token->kind() == tok::eof)
199     OS << "<eof>";
200   else
201     OS << Token->text(SM);
202 }
203 
204 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
205                      const SourceManager &SM, std::vector<bool> IndentMask) {
206   auto DumpExtraInfo = [&OS](const syntax::Node *N) {
207     if (N->getRole() != syntax::NodeRole::Unknown)
208       OS << " " << N->getRole();
209     if (!N->isOriginal())
210       OS << " synthesized";
211     if (!N->canModify())
212       OS << " unmodifiable";
213   };
214 
215   assert(N);
216   if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
217     OS << "'";
218     dumpLeaf(OS, L, SM);
219     OS << "'";
220     DumpExtraInfo(N);
221     OS << "\n";
222     return;
223   }
224 
225   const auto *T = cast<syntax::Tree>(N);
226   OS << T->getKind();
227   DumpExtraInfo(N);
228   OS << "\n";
229 
230   for (const syntax::Node &It : T->getChildren()) {
231     for (bool Filled : IndentMask) {
232       if (Filled)
233         OS << "| ";
234       else
235         OS << "  ";
236     }
237     if (!It.getNextSibling()) {
238       OS << "`-";
239       IndentMask.push_back(false);
240     } else {
241       OS << "|-";
242       IndentMask.push_back(true);
243     }
244     dumpNode(OS, &It, SM, IndentMask);
245     IndentMask.pop_back();
246   }
247 }
248 } // namespace
249 
250 std::string syntax::Node::dump(const SourceManager &SM) const {
251   std::string Str;
252   llvm::raw_string_ostream OS(Str);
253   dumpNode(OS, this, SM, /*IndentMask=*/{});
254   return std::move(OS.str());
255 }
256 
257 std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
258   std::string Storage;
259   llvm::raw_string_ostream OS(Storage);
260   traverse(this, [&](const syntax::Node *N) {
261     if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
262       dumpLeaf(OS, L, SM);
263       OS << " ";
264     }
265   });
266   return OS.str();
267 }
268 
269 void syntax::Node::assertInvariants() const {
270 #ifndef NDEBUG
271   if (isDetached())
272     assert(getParent() == nullptr);
273   else
274     assert(getParent() != nullptr);
275 
276   const auto *T = dyn_cast<Tree>(this);
277   if (!T)
278     return;
279   for (const Node &C : T->getChildren()) {
280     if (T->isOriginal())
281       assert(C.isOriginal());
282     assert(!C.isDetached());
283     assert(C.getParent() == T);
284     const auto *Next = C.getNextSibling();
285     assert(!Next || &C == Next->getPreviousSibling());
286     if (!C.getNextSibling())
287       assert(&C == T->getLastChild() &&
288              "Last child is reachable by advancing from the first child.");
289   }
290 
291   const auto *L = dyn_cast<List>(T);
292   if (!L)
293     return;
294   for (const Node &C : T->getChildren()) {
295     assert(C.getRole() == NodeRole::ListElement ||
296            C.getRole() == NodeRole::ListDelimiter);
297     if (C.getRole() == NodeRole::ListDelimiter) {
298       assert(isa<Leaf>(C));
299       assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
300     }
301   }
302 
303 #endif
304 }
305 
306 void syntax::Node::assertInvariantsRecursive() const {
307 #ifndef NDEBUG
308   traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
309 #endif
310 }
311 
312 const syntax::Leaf *syntax::Tree::findFirstLeaf() const {
313   for (const Node &C : getChildren()) {
314     if (const auto *L = dyn_cast<syntax::Leaf>(&C))
315       return L;
316     if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf())
317       return L;
318   }
319   return nullptr;
320 }
321 
322 const syntax::Leaf *syntax::Tree::findLastLeaf() const {
323   for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) {
324     if (const auto *L = dyn_cast<syntax::Leaf>(C))
325       return L;
326     if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf())
327       return L;
328   }
329   return nullptr;
330 }
331 
332 const syntax::Node *syntax::Tree::findChild(NodeRole R) const {
333   for (const Node &C : getChildren()) {
334     if (C.getRole() == R)
335       return &C;
336   }
337   return nullptr;
338 }
339 
340 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
341 syntax::List::getElementsAsNodesAndDelimiters() {
342   if (!getFirstChild())
343     return {};
344 
345   std::vector<syntax::List::ElementAndDelimiter<Node>> Children;
346   syntax::Node *ElementWithoutDelimiter = nullptr;
347   for (Node &C : getChildren()) {
348     switch (C.getRole()) {
349     case syntax::NodeRole::ListElement: {
350       if (ElementWithoutDelimiter) {
351         Children.push_back({ElementWithoutDelimiter, nullptr});
352       }
353       ElementWithoutDelimiter = &C;
354       break;
355     }
356     case syntax::NodeRole::ListDelimiter: {
357       Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)});
358       ElementWithoutDelimiter = nullptr;
359       break;
360     }
361     default:
362       llvm_unreachable(
363           "A list can have only elements and delimiters as children.");
364     }
365   }
366 
367   switch (getTerminationKind()) {
368   case syntax::List::TerminationKind::Separated: {
369     Children.push_back({ElementWithoutDelimiter, nullptr});
370     break;
371   }
372   case syntax::List::TerminationKind::Terminated:
373   case syntax::List::TerminationKind::MaybeTerminated: {
374     if (ElementWithoutDelimiter) {
375       Children.push_back({ElementWithoutDelimiter, nullptr});
376     }
377     break;
378   }
379   }
380 
381   return Children;
382 }
383 
384 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
385 // ignoring delimiters
386 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
387   if (!getFirstChild())
388     return {};
389 
390   std::vector<syntax::Node *> Children;
391   syntax::Node *ElementWithoutDelimiter = nullptr;
392   for (Node &C : getChildren()) {
393     switch (C.getRole()) {
394     case syntax::NodeRole::ListElement: {
395       if (ElementWithoutDelimiter) {
396         Children.push_back(ElementWithoutDelimiter);
397       }
398       ElementWithoutDelimiter = &C;
399       break;
400     }
401     case syntax::NodeRole::ListDelimiter: {
402       Children.push_back(ElementWithoutDelimiter);
403       ElementWithoutDelimiter = nullptr;
404       break;
405     }
406     default:
407       llvm_unreachable("A list has only elements or delimiters.");
408     }
409   }
410 
411   switch (getTerminationKind()) {
412   case syntax::List::TerminationKind::Separated: {
413     Children.push_back(ElementWithoutDelimiter);
414     break;
415   }
416   case syntax::List::TerminationKind::Terminated:
417   case syntax::List::TerminationKind::MaybeTerminated: {
418     if (ElementWithoutDelimiter) {
419       Children.push_back(ElementWithoutDelimiter);
420     }
421     break;
422   }
423   }
424 
425   return Children;
426 }
427 
428 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const {
429   switch (this->getKind()) {
430   case NodeKind::NestedNameSpecifier:
431     return clang::tok::coloncolon;
432   case NodeKind::CallArguments:
433   case NodeKind::ParameterDeclarationList:
434   case NodeKind::DeclaratorList:
435     return clang::tok::comma;
436   default:
437     llvm_unreachable("This is not a subclass of List, thus "
438                      "getDelimiterTokenKind() cannot be called");
439   }
440 }
441 
442 syntax::List::TerminationKind syntax::List::getTerminationKind() const {
443   switch (this->getKind()) {
444   case NodeKind::NestedNameSpecifier:
445     return TerminationKind::Terminated;
446   case NodeKind::CallArguments:
447   case NodeKind::ParameterDeclarationList:
448   case NodeKind::DeclaratorList:
449     return TerminationKind::Separated;
450   default:
451     llvm_unreachable("This is not a subclass of List, thus "
452                      "getTerminationKind() cannot be called");
453   }
454 }
455 
456 bool syntax::List::canBeEmpty() const {
457   switch (this->getKind()) {
458   case NodeKind::NestedNameSpecifier:
459     return false;
460   case NodeKind::CallArguments:
461     return true;
462   case NodeKind::ParameterDeclarationList:
463     return true;
464   case NodeKind::DeclaratorList:
465     return true;
466   default:
467     llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
468                      "cannot be called");
469   }
470 }
471