1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/Tree.h" 9 #include "clang/Basic/TokenKinds.h" 10 #include "clang/Tooling/Syntax/Nodes.h" 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/ADT/BitVector.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/Support/Casting.h" 15 #include <cassert> 16 17 using namespace clang; 18 19 namespace { 20 static void traverse(const syntax::Node *N, 21 llvm::function_ref<void(const syntax::Node *)> Visit) { 22 if (auto *T = dyn_cast<syntax::Tree>(N)) { 23 for (const syntax::Node &C : T->getChildren()) 24 traverse(&C, Visit); 25 } 26 Visit(N); 27 } 28 static void traverse(syntax::Node *N, 29 llvm::function_ref<void(syntax::Node *)> Visit) { 30 traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) { 31 Visit(const_cast<syntax::Node *>(N)); 32 }); 33 } 34 } // namespace 35 36 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, 37 const TokenBuffer &Tokens) 38 : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} 39 40 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { 41 return Tokens; 42 } 43 44 std::pair<FileID, ArrayRef<syntax::Token>> 45 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) { 46 auto FID = SourceMgr.createFileID(std::move(Input)); 47 auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); 48 assert(It.second && "duplicate FileID"); 49 return {FID, It.first->second}; 50 } 51 52 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { 53 assert(Tok != nullptr); 54 } 55 56 syntax::Node::Node(NodeKind Kind) 57 : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr), 58 Kind(static_cast<unsigned>(Kind)), Role(0), Original(false), 59 CanModify(false) { 60 this->setRole(NodeRole::Detached); 61 } 62 63 bool syntax::Node::isDetached() const { 64 return getRole() == NodeRole::Detached; 65 } 66 67 void syntax::Node::setRole(NodeRole NR) { 68 this->Role = static_cast<unsigned>(NR); 69 } 70 71 void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) { 72 assert(Child->getRole() == NodeRole::Detached); 73 assert(Role != NodeRole::Detached); 74 75 Child->setRole(Role); 76 appendChildLowLevel(Child); 77 } 78 79 void syntax::Tree::appendChildLowLevel(Node *Child) { 80 assert(Child->Parent == nullptr); 81 assert(Child->NextSibling == nullptr); 82 assert(Child->PreviousSibling == nullptr); 83 assert(Child->getRole() != NodeRole::Detached); 84 85 Child->Parent = this; 86 if (this->LastChild) { 87 Child->PreviousSibling = this->LastChild; 88 this->LastChild->NextSibling = Child; 89 } else 90 this->FirstChild = Child; 91 92 this->LastChild = Child; 93 } 94 95 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { 96 assert(Child->getRole() == NodeRole::Detached); 97 assert(Role != NodeRole::Detached); 98 99 Child->setRole(Role); 100 prependChildLowLevel(Child); 101 } 102 103 void syntax::Tree::prependChildLowLevel(Node *Child) { 104 assert(Child->Parent == nullptr); 105 assert(Child->NextSibling == nullptr); 106 assert(Child->PreviousSibling == nullptr); 107 assert(Child->getRole() != NodeRole::Detached); 108 109 Child->Parent = this; 110 if (this->FirstChild) { 111 Child->NextSibling = this->FirstChild; 112 this->FirstChild->PreviousSibling = Child; 113 } else 114 this->LastChild = Child; 115 116 this->FirstChild = Child; 117 } 118 119 void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End, 120 Node *New) { 121 assert((!Begin || Begin->Parent == this) && 122 "`Begin` is not a child of `this`."); 123 assert((!End || End->Parent == this) && "`End` is not a child of `this`."); 124 assert(canModify() && "Cannot modify `this`."); 125 126 #ifndef NDEBUG 127 for (auto *N = New; N; N = N->NextSibling) { 128 assert(N->Parent == nullptr); 129 assert(N->getRole() != NodeRole::Detached && "Roles must be set"); 130 // FIXME: validate the role. 131 } 132 133 auto Reachable = [](Node *From, Node *N) { 134 if (!N) 135 return true; 136 for (auto *It = From; It; It = It->NextSibling) 137 if (It == N) 138 return true; 139 return false; 140 }; 141 assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable."); 142 assert(Reachable(Begin, End) && "`End` is not after `Begin`."); 143 #endif 144 145 if (!New && Begin == End) 146 return; 147 148 // Mark modification. 149 for (auto *T = this; T && T->Original; T = T->Parent) 150 T->Original = false; 151 152 // Save the node before the range to be removed. Later we insert the `New` 153 // range after this node. 154 auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild; 155 156 // Detach old nodes. 157 for (auto *N = Begin; N != End;) { 158 auto *Next = N->NextSibling; 159 160 N->setRole(NodeRole::Detached); 161 N->Parent = nullptr; 162 N->NextSibling = nullptr; 163 N->PreviousSibling = nullptr; 164 if (N->Original) 165 traverse(N, [](Node *C) { C->Original = false; }); 166 167 N = Next; 168 } 169 170 // Attach new range. 171 auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild; 172 auto *&NewLast = End ? End->PreviousSibling : LastChild; 173 174 if (!New) { 175 NewFirst = End; 176 NewLast = BeforeBegin; 177 return; 178 } 179 180 New->PreviousSibling = BeforeBegin; 181 NewFirst = New; 182 183 Node *LastInNew; 184 for (auto *N = New; N != nullptr; N = N->NextSibling) { 185 LastInNew = N; 186 N->Parent = this; 187 } 188 LastInNew->NextSibling = End; 189 NewLast = LastInNew; 190 } 191 192 namespace { 193 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, 194 const SourceManager &SM) { 195 assert(L); 196 const auto *Token = L->getToken(); 197 assert(Token); 198 // Handle 'eof' separately, calling text() on it produces an empty string. 199 if (Token->kind() == tok::eof) 200 OS << "<eof>"; 201 else 202 OS << Token->text(SM); 203 } 204 205 static void dumpNode(raw_ostream &OS, const syntax::Node *N, 206 const SourceManager &SM, llvm::BitVector IndentMask) { 207 auto DumpExtraInfo = [&OS](const syntax::Node *N) { 208 if (N->getRole() != syntax::NodeRole::Unknown) 209 OS << " " << N->getRole(); 210 if (!N->isOriginal()) 211 OS << " synthesized"; 212 if (!N->canModify()) 213 OS << " unmodifiable"; 214 }; 215 216 assert(N); 217 if (const auto *L = dyn_cast<syntax::Leaf>(N)) { 218 OS << "'"; 219 dumpLeaf(OS, L, SM); 220 OS << "'"; 221 DumpExtraInfo(N); 222 OS << "\n"; 223 return; 224 } 225 226 const auto *T = cast<syntax::Tree>(N); 227 OS << T->getKind(); 228 DumpExtraInfo(N); 229 OS << "\n"; 230 231 for (const syntax::Node &It : T->getChildren()) { 232 for (unsigned Idx = 0; Idx < IndentMask.size(); ++Idx) { 233 if (IndentMask[Idx]) 234 OS << "| "; 235 else 236 OS << " "; 237 } 238 if (!It.getNextSibling()) { 239 OS << "`-"; 240 IndentMask.push_back(false); 241 } else { 242 OS << "|-"; 243 IndentMask.push_back(true); 244 } 245 dumpNode(OS, &It, SM, IndentMask); 246 IndentMask.pop_back(); 247 } 248 } 249 } // namespace 250 251 std::string syntax::Node::dump(const SourceManager &SM) const { 252 std::string Str; 253 llvm::raw_string_ostream OS(Str); 254 dumpNode(OS, this, SM, /*IndentMask=*/{}); 255 return std::move(OS.str()); 256 } 257 258 std::string syntax::Node::dumpTokens(const SourceManager &SM) const { 259 std::string Storage; 260 llvm::raw_string_ostream OS(Storage); 261 traverse(this, [&](const syntax::Node *N) { 262 if (const auto *L = dyn_cast<syntax::Leaf>(N)) { 263 dumpLeaf(OS, L, SM); 264 OS << " "; 265 } 266 }); 267 return Storage; 268 } 269 270 void syntax::Node::assertInvariants() const { 271 #ifndef NDEBUG 272 if (isDetached()) 273 assert(getParent() == nullptr); 274 else 275 assert(getParent() != nullptr); 276 277 const auto *T = dyn_cast<Tree>(this); 278 if (!T) 279 return; 280 for (const Node &C : T->getChildren()) { 281 if (T->isOriginal()) 282 assert(C.isOriginal()); 283 assert(!C.isDetached()); 284 assert(C.getParent() == T); 285 const auto *Next = C.getNextSibling(); 286 assert(!Next || &C == Next->getPreviousSibling()); 287 if (!C.getNextSibling()) 288 assert(&C == T->getLastChild() && 289 "Last child is reachable by advancing from the first child."); 290 } 291 292 const auto *L = dyn_cast<List>(T); 293 if (!L) 294 return; 295 for (const Node &C : T->getChildren()) { 296 assert(C.getRole() == NodeRole::ListElement || 297 C.getRole() == NodeRole::ListDelimiter); 298 if (C.getRole() == NodeRole::ListDelimiter) { 299 assert(isa<Leaf>(C)); 300 assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind()); 301 } 302 } 303 304 #endif 305 } 306 307 void syntax::Node::assertInvariantsRecursive() const { 308 #ifndef NDEBUG 309 traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); }); 310 #endif 311 } 312 313 const syntax::Leaf *syntax::Tree::findFirstLeaf() const { 314 for (const Node &C : getChildren()) { 315 if (const auto *L = dyn_cast<syntax::Leaf>(&C)) 316 return L; 317 if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf()) 318 return L; 319 } 320 return nullptr; 321 } 322 323 const syntax::Leaf *syntax::Tree::findLastLeaf() const { 324 for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) { 325 if (const auto *L = dyn_cast<syntax::Leaf>(C)) 326 return L; 327 if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf()) 328 return L; 329 } 330 return nullptr; 331 } 332 333 const syntax::Node *syntax::Tree::findChild(NodeRole R) const { 334 for (const Node &C : getChildren()) { 335 if (C.getRole() == R) 336 return &C; 337 } 338 return nullptr; 339 } 340 341 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>> 342 syntax::List::getElementsAsNodesAndDelimiters() { 343 if (!getFirstChild()) 344 return {}; 345 346 std::vector<syntax::List::ElementAndDelimiter<Node>> Children; 347 syntax::Node *ElementWithoutDelimiter = nullptr; 348 for (Node &C : getChildren()) { 349 switch (C.getRole()) { 350 case syntax::NodeRole::ListElement: { 351 if (ElementWithoutDelimiter) { 352 Children.push_back({ElementWithoutDelimiter, nullptr}); 353 } 354 ElementWithoutDelimiter = &C; 355 break; 356 } 357 case syntax::NodeRole::ListDelimiter: { 358 Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)}); 359 ElementWithoutDelimiter = nullptr; 360 break; 361 } 362 default: 363 llvm_unreachable( 364 "A list can have only elements and delimiters as children."); 365 } 366 } 367 368 switch (getTerminationKind()) { 369 case syntax::List::TerminationKind::Separated: { 370 Children.push_back({ElementWithoutDelimiter, nullptr}); 371 break; 372 } 373 case syntax::List::TerminationKind::Terminated: 374 case syntax::List::TerminationKind::MaybeTerminated: { 375 if (ElementWithoutDelimiter) { 376 Children.push_back({ElementWithoutDelimiter, nullptr}); 377 } 378 break; 379 } 380 } 381 382 return Children; 383 } 384 385 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but 386 // ignoring delimiters 387 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() { 388 if (!getFirstChild()) 389 return {}; 390 391 std::vector<syntax::Node *> Children; 392 syntax::Node *ElementWithoutDelimiter = nullptr; 393 for (Node &C : getChildren()) { 394 switch (C.getRole()) { 395 case syntax::NodeRole::ListElement: { 396 if (ElementWithoutDelimiter) { 397 Children.push_back(ElementWithoutDelimiter); 398 } 399 ElementWithoutDelimiter = &C; 400 break; 401 } 402 case syntax::NodeRole::ListDelimiter: { 403 Children.push_back(ElementWithoutDelimiter); 404 ElementWithoutDelimiter = nullptr; 405 break; 406 } 407 default: 408 llvm_unreachable("A list has only elements or delimiters."); 409 } 410 } 411 412 switch (getTerminationKind()) { 413 case syntax::List::TerminationKind::Separated: { 414 Children.push_back(ElementWithoutDelimiter); 415 break; 416 } 417 case syntax::List::TerminationKind::Terminated: 418 case syntax::List::TerminationKind::MaybeTerminated: { 419 if (ElementWithoutDelimiter) { 420 Children.push_back(ElementWithoutDelimiter); 421 } 422 break; 423 } 424 } 425 426 return Children; 427 } 428 429 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const { 430 switch (this->getKind()) { 431 case NodeKind::NestedNameSpecifier: 432 return clang::tok::coloncolon; 433 case NodeKind::CallArguments: 434 case NodeKind::ParameterDeclarationList: 435 case NodeKind::DeclaratorList: 436 return clang::tok::comma; 437 default: 438 llvm_unreachable("This is not a subclass of List, thus " 439 "getDelimiterTokenKind() cannot be called"); 440 } 441 } 442 443 syntax::List::TerminationKind syntax::List::getTerminationKind() const { 444 switch (this->getKind()) { 445 case NodeKind::NestedNameSpecifier: 446 return TerminationKind::Terminated; 447 case NodeKind::CallArguments: 448 case NodeKind::ParameterDeclarationList: 449 case NodeKind::DeclaratorList: 450 return TerminationKind::Separated; 451 default: 452 llvm_unreachable("This is not a subclass of List, thus " 453 "getTerminationKind() cannot be called"); 454 } 455 } 456 457 bool syntax::List::canBeEmpty() const { 458 switch (this->getKind()) { 459 case NodeKind::NestedNameSpecifier: 460 return false; 461 case NodeKind::CallArguments: 462 return true; 463 case NodeKind::ParameterDeclarationList: 464 return true; 465 case NodeKind::DeclaratorList: 466 return true; 467 default: 468 llvm_unreachable("This is not a subclass of List, thus canBeEmpty() " 469 "cannot be called"); 470 } 471 } 472