1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/Tree.h" 9 #include "clang/Basic/TokenKinds.h" 10 #include "clang/Tooling/Syntax/Nodes.h" 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/Support/Casting.h" 14 #include <cassert> 15 16 using namespace clang; 17 18 namespace { 19 static void traverse(const syntax::Node *N, 20 llvm::function_ref<void(const syntax::Node *)> Visit) { 21 if (auto *T = dyn_cast<syntax::Tree>(N)) { 22 for (const syntax::Node &C : T->getChildren()) 23 traverse(&C, Visit); 24 } 25 Visit(N); 26 } 27 static void traverse(syntax::Node *N, 28 llvm::function_ref<void(syntax::Node *)> Visit) { 29 traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) { 30 Visit(const_cast<syntax::Node *>(N)); 31 }); 32 } 33 } // namespace 34 35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, 36 const TokenBuffer &Tokens) 37 : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} 38 39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { 40 return Tokens; 41 } 42 43 std::pair<FileID, ArrayRef<syntax::Token>> 44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) { 45 auto FID = SourceMgr.createFileID(std::move(Input)); 46 auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); 47 assert(It.second && "duplicate FileID"); 48 return {FID, It.first->second}; 49 } 50 51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { 52 assert(Tok != nullptr); 53 } 54 55 syntax::Node::Node(NodeKind Kind) 56 : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr), 57 Kind(static_cast<unsigned>(Kind)), Role(0), Original(false), 58 CanModify(false) { 59 this->setRole(NodeRole::Detached); 60 } 61 62 bool syntax::Node::isDetached() const { 63 return getRole() == NodeRole::Detached; 64 } 65 66 void syntax::Node::setRole(NodeRole NR) { 67 this->Role = static_cast<unsigned>(NR); 68 } 69 70 void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) { 71 assert(Child->getRole() == NodeRole::Detached); 72 assert(Role != NodeRole::Detached); 73 74 Child->setRole(Role); 75 appendChildLowLevel(Child); 76 } 77 78 void syntax::Tree::appendChildLowLevel(Node *Child) { 79 assert(Child->Parent == nullptr); 80 assert(Child->NextSibling == nullptr); 81 assert(Child->PreviousSibling == nullptr); 82 assert(Child->getRole() != NodeRole::Detached); 83 84 Child->Parent = this; 85 if (this->LastChild) { 86 Child->PreviousSibling = this->LastChild; 87 this->LastChild->NextSibling = Child; 88 } else 89 this->FirstChild = Child; 90 91 this->LastChild = Child; 92 } 93 94 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { 95 assert(Child->getRole() == NodeRole::Detached); 96 assert(Role != NodeRole::Detached); 97 98 Child->setRole(Role); 99 prependChildLowLevel(Child); 100 } 101 102 void syntax::Tree::prependChildLowLevel(Node *Child) { 103 assert(Child->Parent == nullptr); 104 assert(Child->NextSibling == nullptr); 105 assert(Child->PreviousSibling == nullptr); 106 assert(Child->getRole() != NodeRole::Detached); 107 108 Child->Parent = this; 109 if (this->FirstChild) { 110 Child->NextSibling = this->FirstChild; 111 this->FirstChild->PreviousSibling = Child; 112 } else 113 this->LastChild = Child; 114 115 this->FirstChild = Child; 116 } 117 118 void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End, 119 Node *New) { 120 assert((!Begin || Begin->Parent == this) && 121 "`Begin` is not a child of `this`."); 122 assert((!End || End->Parent == this) && "`End` is not a child of `this`."); 123 assert(canModify() && "Cannot modify `this`."); 124 125 #ifndef NDEBUG 126 for (auto *N = New; N; N = N->NextSibling) { 127 assert(N->Parent == nullptr); 128 assert(N->getRole() != NodeRole::Detached && "Roles must be set"); 129 // FIXME: validate the role. 130 } 131 132 auto Reachable = [](Node *From, Node *N) { 133 if (!N) 134 return true; 135 for (auto *It = From; It; It = It->NextSibling) 136 if (It == N) 137 return true; 138 return false; 139 }; 140 assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable."); 141 assert(Reachable(Begin, End) && "`End` is not after `Begin`."); 142 #endif 143 144 if (!New && Begin == End) 145 return; 146 147 // Mark modification. 148 for (auto *T = this; T && T->Original; T = T->Parent) 149 T->Original = false; 150 151 // Save the node before the range to be removed. Later we insert the `New` 152 // range after this node. 153 auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild; 154 155 // Detach old nodes. 156 for (auto *N = Begin; N != End;) { 157 auto *Next = N->NextSibling; 158 159 N->setRole(NodeRole::Detached); 160 N->Parent = nullptr; 161 N->NextSibling = nullptr; 162 N->PreviousSibling = nullptr; 163 if (N->Original) 164 traverse(N, [](Node *C) { C->Original = false; }); 165 166 N = Next; 167 } 168 169 // Attach new range. 170 auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild; 171 auto *&NewLast = End ? End->PreviousSibling : LastChild; 172 173 if (!New) { 174 NewFirst = End; 175 NewLast = BeforeBegin; 176 return; 177 } 178 179 New->PreviousSibling = BeforeBegin; 180 NewFirst = New; 181 182 Node *LastInNew; 183 for (auto *N = New; N != nullptr; N = N->NextSibling) { 184 LastInNew = N; 185 N->Parent = this; 186 } 187 LastInNew->NextSibling = End; 188 NewLast = LastInNew; 189 } 190 191 namespace { 192 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, 193 const SourceManager &SM) { 194 assert(L); 195 const auto *Token = L->getToken(); 196 assert(Token); 197 // Handle 'eof' separately, calling text() on it produces an empty string. 198 if (Token->kind() == tok::eof) 199 OS << "<eof>"; 200 else 201 OS << Token->text(SM); 202 } 203 204 static void dumpNode(raw_ostream &OS, const syntax::Node *N, 205 const SourceManager &SM, std::vector<bool> IndentMask) { 206 auto DumpExtraInfo = [&OS](const syntax::Node *N) { 207 if (N->getRole() != syntax::NodeRole::Unknown) 208 OS << " " << N->getRole(); 209 if (!N->isOriginal()) 210 OS << " synthesized"; 211 if (!N->canModify()) 212 OS << " unmodifiable"; 213 }; 214 215 assert(N); 216 if (const auto *L = dyn_cast<syntax::Leaf>(N)) { 217 OS << "'"; 218 dumpLeaf(OS, L, SM); 219 OS << "'"; 220 DumpExtraInfo(N); 221 OS << "\n"; 222 return; 223 } 224 225 const auto *T = cast<syntax::Tree>(N); 226 OS << T->getKind(); 227 DumpExtraInfo(N); 228 OS << "\n"; 229 230 for (const syntax::Node &It : T->getChildren()) { 231 for (bool Filled : IndentMask) { 232 if (Filled) 233 OS << "| "; 234 else 235 OS << " "; 236 } 237 if (!It.getNextSibling()) { 238 OS << "`-"; 239 IndentMask.push_back(false); 240 } else { 241 OS << "|-"; 242 IndentMask.push_back(true); 243 } 244 dumpNode(OS, &It, SM, IndentMask); 245 IndentMask.pop_back(); 246 } 247 } 248 } // namespace 249 250 std::string syntax::Node::dump(const SourceManager &SM) const { 251 std::string Str; 252 llvm::raw_string_ostream OS(Str); 253 dumpNode(OS, this, SM, /*IndentMask=*/{}); 254 return std::move(OS.str()); 255 } 256 257 std::string syntax::Node::dumpTokens(const SourceManager &SM) const { 258 std::string Storage; 259 llvm::raw_string_ostream OS(Storage); 260 traverse(this, [&](const syntax::Node *N) { 261 if (const auto *L = dyn_cast<syntax::Leaf>(N)) { 262 dumpLeaf(OS, L, SM); 263 OS << " "; 264 } 265 }); 266 return Storage; 267 } 268 269 void syntax::Node::assertInvariants() const { 270 #ifndef NDEBUG 271 if (isDetached()) 272 assert(getParent() == nullptr); 273 else 274 assert(getParent() != nullptr); 275 276 const auto *T = dyn_cast<Tree>(this); 277 if (!T) 278 return; 279 for (const Node &C : T->getChildren()) { 280 if (T->isOriginal()) 281 assert(C.isOriginal()); 282 assert(!C.isDetached()); 283 assert(C.getParent() == T); 284 const auto *Next = C.getNextSibling(); 285 assert(!Next || &C == Next->getPreviousSibling()); 286 if (!C.getNextSibling()) 287 assert(&C == T->getLastChild() && 288 "Last child is reachable by advancing from the first child."); 289 } 290 291 const auto *L = dyn_cast<List>(T); 292 if (!L) 293 return; 294 for (const Node &C : T->getChildren()) { 295 assert(C.getRole() == NodeRole::ListElement || 296 C.getRole() == NodeRole::ListDelimiter); 297 if (C.getRole() == NodeRole::ListDelimiter) { 298 assert(isa<Leaf>(C)); 299 assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind()); 300 } 301 } 302 303 #endif 304 } 305 306 void syntax::Node::assertInvariantsRecursive() const { 307 #ifndef NDEBUG 308 traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); }); 309 #endif 310 } 311 312 const syntax::Leaf *syntax::Tree::findFirstLeaf() const { 313 for (const Node &C : getChildren()) { 314 if (const auto *L = dyn_cast<syntax::Leaf>(&C)) 315 return L; 316 if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf()) 317 return L; 318 } 319 return nullptr; 320 } 321 322 const syntax::Leaf *syntax::Tree::findLastLeaf() const { 323 for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) { 324 if (const auto *L = dyn_cast<syntax::Leaf>(C)) 325 return L; 326 if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf()) 327 return L; 328 } 329 return nullptr; 330 } 331 332 const syntax::Node *syntax::Tree::findChild(NodeRole R) const { 333 for (const Node &C : getChildren()) { 334 if (C.getRole() == R) 335 return &C; 336 } 337 return nullptr; 338 } 339 340 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>> 341 syntax::List::getElementsAsNodesAndDelimiters() { 342 if (!getFirstChild()) 343 return {}; 344 345 std::vector<syntax::List::ElementAndDelimiter<Node>> Children; 346 syntax::Node *ElementWithoutDelimiter = nullptr; 347 for (Node &C : getChildren()) { 348 switch (C.getRole()) { 349 case syntax::NodeRole::ListElement: { 350 if (ElementWithoutDelimiter) { 351 Children.push_back({ElementWithoutDelimiter, nullptr}); 352 } 353 ElementWithoutDelimiter = &C; 354 break; 355 } 356 case syntax::NodeRole::ListDelimiter: { 357 Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)}); 358 ElementWithoutDelimiter = nullptr; 359 break; 360 } 361 default: 362 llvm_unreachable( 363 "A list can have only elements and delimiters as children."); 364 } 365 } 366 367 switch (getTerminationKind()) { 368 case syntax::List::TerminationKind::Separated: { 369 Children.push_back({ElementWithoutDelimiter, nullptr}); 370 break; 371 } 372 case syntax::List::TerminationKind::Terminated: 373 case syntax::List::TerminationKind::MaybeTerminated: { 374 if (ElementWithoutDelimiter) { 375 Children.push_back({ElementWithoutDelimiter, nullptr}); 376 } 377 break; 378 } 379 } 380 381 return Children; 382 } 383 384 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but 385 // ignoring delimiters 386 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() { 387 if (!getFirstChild()) 388 return {}; 389 390 std::vector<syntax::Node *> Children; 391 syntax::Node *ElementWithoutDelimiter = nullptr; 392 for (Node &C : getChildren()) { 393 switch (C.getRole()) { 394 case syntax::NodeRole::ListElement: { 395 if (ElementWithoutDelimiter) { 396 Children.push_back(ElementWithoutDelimiter); 397 } 398 ElementWithoutDelimiter = &C; 399 break; 400 } 401 case syntax::NodeRole::ListDelimiter: { 402 Children.push_back(ElementWithoutDelimiter); 403 ElementWithoutDelimiter = nullptr; 404 break; 405 } 406 default: 407 llvm_unreachable("A list has only elements or delimiters."); 408 } 409 } 410 411 switch (getTerminationKind()) { 412 case syntax::List::TerminationKind::Separated: { 413 Children.push_back(ElementWithoutDelimiter); 414 break; 415 } 416 case syntax::List::TerminationKind::Terminated: 417 case syntax::List::TerminationKind::MaybeTerminated: { 418 if (ElementWithoutDelimiter) { 419 Children.push_back(ElementWithoutDelimiter); 420 } 421 break; 422 } 423 } 424 425 return Children; 426 } 427 428 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const { 429 switch (this->getKind()) { 430 case NodeKind::NestedNameSpecifier: 431 return clang::tok::coloncolon; 432 case NodeKind::CallArguments: 433 case NodeKind::ParameterDeclarationList: 434 case NodeKind::DeclaratorList: 435 return clang::tok::comma; 436 default: 437 llvm_unreachable("This is not a subclass of List, thus " 438 "getDelimiterTokenKind() cannot be called"); 439 } 440 } 441 442 syntax::List::TerminationKind syntax::List::getTerminationKind() const { 443 switch (this->getKind()) { 444 case NodeKind::NestedNameSpecifier: 445 return TerminationKind::Terminated; 446 case NodeKind::CallArguments: 447 case NodeKind::ParameterDeclarationList: 448 case NodeKind::DeclaratorList: 449 return TerminationKind::Separated; 450 default: 451 llvm_unreachable("This is not a subclass of List, thus " 452 "getTerminationKind() cannot be called"); 453 } 454 } 455 456 bool syntax::List::canBeEmpty() const { 457 switch (this->getKind()) { 458 case NodeKind::NestedNameSpecifier: 459 return false; 460 case NodeKind::CallArguments: 461 return true; 462 case NodeKind::ParameterDeclarationList: 463 return true; 464 case NodeKind::DeclaratorList: 465 return true; 466 default: 467 llvm_unreachable("This is not a subclass of List, thus canBeEmpty() " 468 "cannot be called"); 469 } 470 } 471