1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType() 51 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 52 << "\"] "; 53 for (SmallVectorImpl<UnwrappedLine>::const_iterator 54 CI = I->Children.begin(), 55 CE = I->Children.end(); 56 CI != CE; ++CI) { 57 OS << "\n"; 58 printLine(OS, *CI, (Prefix + " ").str()); 59 NewLine = true; 60 } 61 } 62 if (!NewLine) 63 OS << "\n"; 64 } 65 66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 67 printLine(llvm::dbgs(), Line); 68 } 69 70 class ScopedDeclarationState { 71 public: 72 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 73 bool MustBeDeclaration) 74 : Line(Line), Stack(Stack) { 75 Line.MustBeDeclaration = MustBeDeclaration; 76 Stack.push_back(MustBeDeclaration); 77 } 78 ~ScopedDeclarationState() { 79 Stack.pop_back(); 80 if (!Stack.empty()) 81 Line.MustBeDeclaration = Stack.back(); 82 else 83 Line.MustBeDeclaration = true; 84 } 85 86 private: 87 UnwrappedLine &Line; 88 llvm::BitVector &Stack; 89 }; 90 91 } // end anonymous namespace 92 93 class ScopedLineState { 94 public: 95 ScopedLineState(UnwrappedLineParser &Parser, 96 bool SwitchToPreprocessorLines = false) 97 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 98 if (SwitchToPreprocessorLines) 99 Parser.CurrentLines = &Parser.PreprocessorDirectives; 100 else if (!Parser.Line->Tokens.empty()) 101 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 102 PreBlockLine = std::move(Parser.Line); 103 Parser.Line = std::make_unique<UnwrappedLine>(); 104 Parser.Line->Level = PreBlockLine->Level; 105 Parser.Line->PPLevel = PreBlockLine->PPLevel; 106 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 107 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 108 } 109 110 ~ScopedLineState() { 111 if (!Parser.Line->Tokens.empty()) 112 Parser.addUnwrappedLine(); 113 assert(Parser.Line->Tokens.empty()); 114 Parser.Line = std::move(PreBlockLine); 115 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 116 Parser.MustBreakBeforeNextToken = true; 117 Parser.CurrentLines = OriginalLines; 118 } 119 120 private: 121 UnwrappedLineParser &Parser; 122 123 std::unique_ptr<UnwrappedLine> PreBlockLine; 124 SmallVectorImpl<UnwrappedLine> *OriginalLines; 125 }; 126 127 class CompoundStatementIndenter { 128 public: 129 CompoundStatementIndenter(UnwrappedLineParser *Parser, 130 const FormatStyle &Style, unsigned &LineLevel) 131 : CompoundStatementIndenter(Parser, LineLevel, 132 Style.BraceWrapping.AfterControlStatement, 133 Style.BraceWrapping.IndentBraces) {} 134 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 135 bool WrapBrace, bool IndentBrace) 136 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 137 if (WrapBrace) 138 Parser->addUnwrappedLine(); 139 if (IndentBrace) 140 ++LineLevel; 141 } 142 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 143 144 private: 145 unsigned &LineLevel; 146 unsigned OldLineLevel; 147 }; 148 149 UnwrappedLineParser::UnwrappedLineParser( 150 SourceManager &SourceMgr, const FormatStyle &Style, 151 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 152 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 153 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 154 IdentifierTable &IdentTable) 155 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 156 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 157 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 158 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 159 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 160 ? IG_Rejected 161 : IG_Inited), 162 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 163 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} 164 165 void UnwrappedLineParser::reset() { 166 PPBranchLevel = -1; 167 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 168 ? IG_Rejected 169 : IG_Inited; 170 IncludeGuardToken = nullptr; 171 Line.reset(new UnwrappedLine); 172 CommentsBeforeNextToken.clear(); 173 FormatTok = nullptr; 174 MustBreakBeforeNextToken = false; 175 IsDecltypeAutoFunction = false; 176 PreprocessorDirectives.clear(); 177 CurrentLines = &Lines; 178 DeclarationScopeStack.clear(); 179 NestedTooDeep.clear(); 180 NestedLambdas.clear(); 181 PPStack.clear(); 182 Line->FirstStartColumn = FirstStartColumn; 183 184 if (!Unexpanded.empty()) 185 for (FormatToken *Token : AllTokens) 186 Token->MacroCtx.reset(); 187 CurrentExpandedLines.clear(); 188 ExpandedLines.clear(); 189 Unexpanded.clear(); 190 InExpansion = false; 191 Reconstruct.reset(); 192 } 193 194 void UnwrappedLineParser::parse() { 195 IndexedTokenSource TokenSource(AllTokens); 196 Line->FirstStartColumn = FirstStartColumn; 197 do { 198 LLVM_DEBUG(llvm::dbgs() << "----\n"); 199 reset(); 200 Tokens = &TokenSource; 201 TokenSource.reset(); 202 203 readToken(); 204 parseFile(); 205 206 // If we found an include guard then all preprocessor directives (other than 207 // the guard) are over-indented by one. 208 if (IncludeGuard == IG_Found) { 209 for (auto &Line : Lines) 210 if (Line.InPPDirective && Line.Level > 0) 211 --Line.Level; 212 } 213 214 // Create line with eof token. 215 assert(eof()); 216 pushToken(FormatTok); 217 addUnwrappedLine(); 218 219 // In a first run, format everything with the lines containing macro calls 220 // replaced by the expansion. 221 if (!ExpandedLines.empty()) { 222 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 223 for (const auto &Line : Lines) { 224 if (!Line.Tokens.empty()) { 225 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 226 if (it != ExpandedLines.end()) { 227 for (const auto &Expanded : it->second) { 228 LLVM_DEBUG(printDebugInfo(Expanded)); 229 Callback.consumeUnwrappedLine(Expanded); 230 } 231 continue; 232 } 233 } 234 LLVM_DEBUG(printDebugInfo(Line)); 235 Callback.consumeUnwrappedLine(Line); 236 } 237 Callback.finishRun(); 238 } 239 240 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 241 for (const UnwrappedLine &Line : Lines) { 242 LLVM_DEBUG(printDebugInfo(Line)); 243 Callback.consumeUnwrappedLine(Line); 244 } 245 Callback.finishRun(); 246 Lines.clear(); 247 while (!PPLevelBranchIndex.empty() && 248 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 249 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 250 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 251 } 252 if (!PPLevelBranchIndex.empty()) { 253 ++PPLevelBranchIndex.back(); 254 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 255 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 256 } 257 } while (!PPLevelBranchIndex.empty()); 258 } 259 260 void UnwrappedLineParser::parseFile() { 261 // The top-level context in a file always has declarations, except for pre- 262 // processor directives and JavaScript files. 263 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 264 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 265 MustBeDeclaration); 266 if (Style.Language == FormatStyle::LK_TextProto) 267 parseBracedList(); 268 else 269 parseLevel(); 270 // Make sure to format the remaining tokens. 271 // 272 // LK_TextProto is special since its top-level is parsed as the body of a 273 // braced list, which does not necessarily have natural line separators such 274 // as a semicolon. Comments after the last entry that have been determined to 275 // not belong to that line, as in: 276 // key: value 277 // // endfile comment 278 // do not have a chance to be put on a line of their own until this point. 279 // Here we add this newline before end-of-file comments. 280 if (Style.Language == FormatStyle::LK_TextProto && 281 !CommentsBeforeNextToken.empty()) { 282 addUnwrappedLine(); 283 } 284 flushComments(true); 285 addUnwrappedLine(); 286 } 287 288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 289 do { 290 switch (FormatTok->Tok.getKind()) { 291 case tok::l_brace: 292 return; 293 default: 294 if (FormatTok->is(Keywords.kw_where)) { 295 addUnwrappedLine(); 296 nextToken(); 297 parseCSharpGenericTypeConstraint(); 298 break; 299 } 300 nextToken(); 301 break; 302 } 303 } while (!eof()); 304 } 305 306 void UnwrappedLineParser::parseCSharpAttribute() { 307 int UnpairedSquareBrackets = 1; 308 do { 309 switch (FormatTok->Tok.getKind()) { 310 case tok::r_square: 311 nextToken(); 312 --UnpairedSquareBrackets; 313 if (UnpairedSquareBrackets == 0) { 314 addUnwrappedLine(); 315 return; 316 } 317 break; 318 case tok::l_square: 319 ++UnpairedSquareBrackets; 320 nextToken(); 321 break; 322 default: 323 nextToken(); 324 break; 325 } 326 } while (!eof()); 327 } 328 329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 330 if (!Lines.empty() && Lines.back().InPPDirective) 331 return true; 332 333 const FormatToken *Previous = Tokens->getPreviousToken(); 334 return Previous && Previous->is(tok::comment) && 335 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 336 } 337 338 /// \brief Parses a level, that is ???. 339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 340 /// \param IfKind The \p if statement kind in the level. 341 /// \param IfLeftBrace The left brace of the \p if block in the level. 342 /// \returns true if a simple block of if/else/for/while, or false otherwise. 343 /// (A simple block has a single statement.) 344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 345 IfStmtKind *IfKind, 346 FormatToken **IfLeftBrace) { 347 const bool InRequiresExpression = 348 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 349 const bool IsPrecededByCommentOrPPDirective = 350 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 351 FormatToken *IfLBrace = nullptr; 352 bool HasDoWhile = false; 353 bool HasLabel = false; 354 unsigned StatementCount = 0; 355 bool SwitchLabelEncountered = false; 356 357 do { 358 if (FormatTok->isAttribute()) { 359 nextToken(); 360 continue; 361 } 362 tok::TokenKind kind = FormatTok->Tok.getKind(); 363 if (FormatTok->getType() == TT_MacroBlockBegin) 364 kind = tok::l_brace; 365 else if (FormatTok->getType() == TT_MacroBlockEnd) 366 kind = tok::r_brace; 367 368 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 369 &HasLabel, &StatementCount] { 370 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 371 HasDoWhile ? nullptr : &HasDoWhile, 372 HasLabel ? nullptr : &HasLabel); 373 ++StatementCount; 374 assert(StatementCount > 0 && "StatementCount overflow!"); 375 }; 376 377 switch (kind) { 378 case tok::comment: 379 nextToken(); 380 addUnwrappedLine(); 381 break; 382 case tok::l_brace: 383 if (InRequiresExpression) { 384 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 385 } else if (FormatTok->Previous && 386 FormatTok->Previous->ClosesRequiresClause) { 387 // We need the 'default' case here to correctly parse a function 388 // l_brace. 389 ParseDefault(); 390 continue; 391 } 392 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) && 393 tryToParseBracedList()) { 394 continue; 395 } 396 parseBlock(); 397 ++StatementCount; 398 assert(StatementCount > 0 && "StatementCount overflow!"); 399 addUnwrappedLine(); 400 break; 401 case tok::r_brace: 402 if (OpeningBrace) { 403 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 404 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 405 return false; 406 } 407 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 408 HasDoWhile || IsPrecededByCommentOrPPDirective || 409 precededByCommentOrPPDirective()) { 410 return false; 411 } 412 const FormatToken *Next = Tokens->peekNextToken(); 413 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 414 return false; 415 if (IfLeftBrace) 416 *IfLeftBrace = IfLBrace; 417 return true; 418 } 419 nextToken(); 420 addUnwrappedLine(); 421 break; 422 case tok::kw_default: { 423 unsigned StoredPosition = Tokens->getPosition(); 424 FormatToken *Next; 425 do { 426 Next = Tokens->getNextToken(); 427 assert(Next); 428 } while (Next->is(tok::comment)); 429 FormatTok = Tokens->setPosition(StoredPosition); 430 if (Next->isNot(tok::colon)) { 431 // default not followed by ':' is not a case label; treat it like 432 // an identifier. 433 parseStructuralElement(); 434 break; 435 } 436 // Else, if it is 'default:', fall through to the case handling. 437 [[fallthrough]]; 438 } 439 case tok::kw_case: 440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 441 (Style.isJavaScript() && Line->MustBeDeclaration)) { 442 // Proto: there are no switch/case statements 443 // Verilog: Case labels don't have this word. We handle case 444 // labels including default in TokenAnnotator. 445 // JavaScript: A 'case: string' style field declaration. 446 ParseDefault(); 447 break; 448 } 449 if (!SwitchLabelEncountered && 450 (Style.IndentCaseLabels || 451 (Line->InPPDirective && Line->Level == 1))) { 452 ++Line->Level; 453 } 454 SwitchLabelEncountered = true; 455 parseStructuralElement(); 456 break; 457 case tok::l_square: 458 if (Style.isCSharp()) { 459 nextToken(); 460 parseCSharpAttribute(); 461 break; 462 } 463 if (handleCppAttributes()) 464 break; 465 [[fallthrough]]; 466 default: 467 ParseDefault(); 468 break; 469 } 470 } while (!eof()); 471 472 return false; 473 } 474 475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 476 // We'll parse forward through the tokens until we hit 477 // a closing brace or eof - note that getNextToken() will 478 // parse macros, so this will magically work inside macro 479 // definitions, too. 480 unsigned StoredPosition = Tokens->getPosition(); 481 FormatToken *Tok = FormatTok; 482 const FormatToken *PrevTok = Tok->Previous; 483 // Keep a stack of positions of lbrace tokens. We will 484 // update information about whether an lbrace starts a 485 // braced init list or a different block during the loop. 486 struct StackEntry { 487 FormatToken *Tok; 488 const FormatToken *PrevTok; 489 }; 490 SmallVector<StackEntry, 8> LBraceStack; 491 assert(Tok->is(tok::l_brace)); 492 493 do { 494 FormatToken *NextTok; 495 do { 496 NextTok = Tokens->getNextToken(); 497 } while (NextTok->is(tok::comment)); 498 499 if (!Line->InMacroBody) { 500 // Skip PPDirective lines and comments. 501 while (NextTok->is(tok::hash)) { 502 do { 503 NextTok = Tokens->getNextToken(); 504 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); 505 506 while (NextTok->is(tok::comment)) 507 NextTok = Tokens->getNextToken(); 508 } 509 } 510 511 switch (Tok->Tok.getKind()) { 512 case tok::l_brace: 513 if (Style.isJavaScript() && PrevTok) { 514 if (PrevTok->isOneOf(tok::colon, tok::less)) { 515 // A ':' indicates this code is in a type, or a braced list 516 // following a label in an object literal ({a: {b: 1}}). 517 // A '<' could be an object used in a comparison, but that is nonsense 518 // code (can never return true), so more likely it is a generic type 519 // argument (`X<{a: string; b: number}>`). 520 // The code below could be confused by semicolons between the 521 // individual members in a type member list, which would normally 522 // trigger BK_Block. In both cases, this must be parsed as an inline 523 // braced init. 524 Tok->setBlockKind(BK_BracedInit); 525 } else if (PrevTok->is(tok::r_paren)) { 526 // `) { }` can only occur in function or method declarations in JS. 527 Tok->setBlockKind(BK_Block); 528 } 529 } else { 530 Tok->setBlockKind(BK_Unknown); 531 } 532 LBraceStack.push_back({Tok, PrevTok}); 533 break; 534 case tok::r_brace: 535 if (LBraceStack.empty()) 536 break; 537 if (LBraceStack.back().Tok->is(BK_Unknown)) { 538 bool ProbablyBracedList = false; 539 if (Style.Language == FormatStyle::LK_Proto) { 540 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 541 } else { 542 // Using OriginalColumn to distinguish between ObjC methods and 543 // binary operators is a bit hacky. 544 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 545 NextTok->OriginalColumn == 0; 546 547 // Try to detect a braced list. Note that regardless how we mark inner 548 // braces here, we will overwrite the BlockKind later if we parse a 549 // braced list (where all blocks inside are by default braced lists), 550 // or when we explicitly detect blocks (for example while parsing 551 // lambdas). 552 553 // If we already marked the opening brace as braced list, the closing 554 // must also be part of it. 555 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace); 556 557 ProbablyBracedList = ProbablyBracedList || 558 (Style.isJavaScript() && 559 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 560 Keywords.kw_as)); 561 ProbablyBracedList = ProbablyBracedList || 562 (Style.isCpp() && NextTok->is(tok::l_paren)); 563 564 // If there is a comma, semicolon or right paren after the closing 565 // brace, we assume this is a braced initializer list. 566 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 567 // braced list in JS. 568 ProbablyBracedList = 569 ProbablyBracedList || 570 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 571 tok::r_paren, tok::r_square, tok::ellipsis); 572 573 // Distinguish between braced list in a constructor initializer list 574 // followed by constructor body, or just adjacent blocks. 575 ProbablyBracedList = 576 ProbablyBracedList || 577 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 578 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 579 tok::greater)); 580 581 ProbablyBracedList = 582 ProbablyBracedList || 583 (NextTok->is(tok::identifier) && 584 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 585 586 ProbablyBracedList = ProbablyBracedList || 587 (NextTok->is(tok::semi) && 588 (!ExpectClassBody || LBraceStack.size() != 1)); 589 590 ProbablyBracedList = 591 ProbablyBracedList || 592 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 593 594 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 595 // We can have an array subscript after a braced init 596 // list, but C++11 attributes are expected after blocks. 597 NextTok = Tokens->getNextToken(); 598 ProbablyBracedList = NextTok->isNot(tok::l_square); 599 } 600 601 // Cpp macro definition body that is a nonempty braced list or block: 602 if (Style.isCpp() && Line->InMacroBody && PrevTok != FormatTok && 603 !FormatTok->Previous && NextTok->is(tok::eof) && 604 // A statement can end with only `;` (simple statement), a block 605 // closing brace (compound statement), or `:` (label statement). 606 // If PrevTok is a block opening brace, Tok ends an empty block. 607 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { 608 ProbablyBracedList = true; 609 } 610 } 611 if (ProbablyBracedList) { 612 Tok->setBlockKind(BK_BracedInit); 613 LBraceStack.back().Tok->setBlockKind(BK_BracedInit); 614 } else { 615 Tok->setBlockKind(BK_Block); 616 LBraceStack.back().Tok->setBlockKind(BK_Block); 617 } 618 } 619 LBraceStack.pop_back(); 620 break; 621 case tok::identifier: 622 if (Tok->isNot(TT_StatementMacro)) 623 break; 624 [[fallthrough]]; 625 case tok::at: 626 case tok::semi: 627 case tok::kw_if: 628 case tok::kw_while: 629 case tok::kw_for: 630 case tok::kw_switch: 631 case tok::kw_try: 632 case tok::kw___try: 633 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 634 LBraceStack.back().Tok->setBlockKind(BK_Block); 635 break; 636 default: 637 break; 638 } 639 640 PrevTok = Tok; 641 Tok = NextTok; 642 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 643 644 // Assume other blocks for all unclosed opening braces. 645 for (const auto &Entry : LBraceStack) 646 if (Entry.Tok->is(BK_Unknown)) 647 Entry.Tok->setBlockKind(BK_Block); 648 649 FormatTok = Tokens->setPosition(StoredPosition); 650 } 651 652 // Sets the token type of the directly previous right brace. 653 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 654 if (auto Prev = FormatTok->getPreviousNonComment(); 655 Prev && Prev->is(tok::r_brace)) { 656 Prev->setFinalizedType(Type); 657 } 658 } 659 660 template <class T> 661 static inline void hash_combine(std::size_t &seed, const T &v) { 662 std::hash<T> hasher; 663 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 664 } 665 666 size_t UnwrappedLineParser::computePPHash() const { 667 size_t h = 0; 668 for (const auto &i : PPStack) { 669 hash_combine(h, size_t(i.Kind)); 670 hash_combine(h, i.Line); 671 } 672 return h; 673 } 674 675 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 676 // is not null, subtracts its length (plus the preceding space) when computing 677 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 678 // running the token annotator on it so that we can restore them afterward. 679 bool UnwrappedLineParser::mightFitOnOneLine( 680 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 681 const auto ColumnLimit = Style.ColumnLimit; 682 if (ColumnLimit == 0) 683 return true; 684 685 auto &Tokens = ParsedLine.Tokens; 686 assert(!Tokens.empty()); 687 688 const auto *LastToken = Tokens.back().Tok; 689 assert(LastToken); 690 691 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 692 693 int Index = 0; 694 for (const auto &Token : Tokens) { 695 assert(Token.Tok); 696 auto &SavedToken = SavedTokens[Index++]; 697 SavedToken.Tok = new FormatToken; 698 SavedToken.Tok->copyFrom(*Token.Tok); 699 SavedToken.Children = std::move(Token.Children); 700 } 701 702 AnnotatedLine Line(ParsedLine); 703 assert(Line.Last == LastToken); 704 705 TokenAnnotator Annotator(Style, Keywords); 706 Annotator.annotate(Line); 707 Annotator.calculateFormattingInformation(Line); 708 709 auto Length = LastToken->TotalLength; 710 if (OpeningBrace) { 711 assert(OpeningBrace != Tokens.front().Tok); 712 if (auto Prev = OpeningBrace->Previous; 713 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 714 Length -= ColumnLimit; 715 } 716 Length -= OpeningBrace->TokenText.size() + 1; 717 } 718 719 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 720 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 721 Length -= FirstToken->TokenText.size() + 1; 722 } 723 724 Index = 0; 725 for (auto &Token : Tokens) { 726 const auto &SavedToken = SavedTokens[Index++]; 727 Token.Tok->copyFrom(*SavedToken.Tok); 728 Token.Children = std::move(SavedToken.Children); 729 delete SavedToken.Tok; 730 } 731 732 // If these change PPLevel needs to be used for get correct indentation. 733 assert(!Line.InMacroBody); 734 assert(!Line.InPPDirective); 735 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 736 } 737 738 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 739 unsigned AddLevels, bool MunchSemi, 740 bool KeepBraces, 741 IfStmtKind *IfKind, 742 bool UnindentWhitesmithsBraces) { 743 auto HandleVerilogBlockLabel = [this]() { 744 // ":" name 745 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 746 nextToken(); 747 if (Keywords.isVerilogIdentifier(*FormatTok)) 748 nextToken(); 749 } 750 }; 751 752 // Whether this is a Verilog-specific block that has a special header like a 753 // module. 754 const bool VerilogHierarchy = 755 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 756 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 757 (Style.isVerilog() && 758 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 759 "'{' or macro block token expected"); 760 FormatToken *Tok = FormatTok; 761 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 762 auto Index = CurrentLines->size(); 763 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 764 FormatTok->setBlockKind(BK_Block); 765 766 // For Whitesmiths mode, jump to the next level prior to skipping over the 767 // braces. 768 if (!VerilogHierarchy && AddLevels > 0 && 769 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 770 ++Line->Level; 771 } 772 773 size_t PPStartHash = computePPHash(); 774 775 const unsigned InitialLevel = Line->Level; 776 if (VerilogHierarchy) { 777 AddLevels += parseVerilogHierarchyHeader(); 778 } else { 779 nextToken(/*LevelDifference=*/AddLevels); 780 HandleVerilogBlockLabel(); 781 } 782 783 // Bail out if there are too many levels. Otherwise, the stack might overflow. 784 if (Line->Level > 300) 785 return nullptr; 786 787 if (MacroBlock && FormatTok->is(tok::l_paren)) 788 parseParens(); 789 790 size_t NbPreprocessorDirectives = 791 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 792 addUnwrappedLine(); 793 size_t OpeningLineIndex = 794 CurrentLines->empty() 795 ? (UnwrappedLine::kInvalidIndex) 796 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 797 798 // Whitesmiths is weird here. The brace needs to be indented for the namespace 799 // block, but the block itself may not be indented depending on the style 800 // settings. This allows the format to back up one level in those cases. 801 if (UnindentWhitesmithsBraces) 802 --Line->Level; 803 804 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 805 MustBeDeclaration); 806 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 807 Line->Level += AddLevels; 808 809 FormatToken *IfLBrace = nullptr; 810 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 811 812 if (eof()) 813 return IfLBrace; 814 815 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 816 : FormatTok->isNot(tok::r_brace)) { 817 Line->Level = InitialLevel; 818 FormatTok->setBlockKind(BK_Block); 819 return IfLBrace; 820 } 821 822 if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace)) 823 FormatTok->setFinalizedType(TT_NamespaceRBrace); 824 825 const bool IsFunctionRBrace = 826 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 827 828 auto RemoveBraces = [=]() mutable { 829 if (!SimpleBlock) 830 return false; 831 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 832 assert(FormatTok->is(tok::r_brace)); 833 const bool WrappedOpeningBrace = !Tok->Previous; 834 if (WrappedOpeningBrace && FollowedByComment) 835 return false; 836 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 837 if (KeepBraces && !HasRequiredIfBraces) 838 return false; 839 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 840 const FormatToken *Previous = Tokens->getPreviousToken(); 841 assert(Previous); 842 if (Previous->is(tok::r_brace) && !Previous->Optional) 843 return false; 844 } 845 assert(!CurrentLines->empty()); 846 auto &LastLine = CurrentLines->back(); 847 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 848 return false; 849 if (Tok->is(TT_ElseLBrace)) 850 return true; 851 if (WrappedOpeningBrace) { 852 assert(Index > 0); 853 --Index; // The line above the wrapped l_brace. 854 Tok = nullptr; 855 } 856 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 857 }; 858 if (RemoveBraces()) { 859 Tok->MatchingParen = FormatTok; 860 FormatTok->MatchingParen = Tok; 861 } 862 863 size_t PPEndHash = computePPHash(); 864 865 // Munch the closing brace. 866 nextToken(/*LevelDifference=*/-AddLevels); 867 868 // When this is a function block and there is an unnecessary semicolon 869 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 870 // it later). 871 if (Style.RemoveSemicolon && IsFunctionRBrace) { 872 while (FormatTok->is(tok::semi)) { 873 FormatTok->Optional = true; 874 nextToken(); 875 } 876 } 877 878 HandleVerilogBlockLabel(); 879 880 if (MacroBlock && FormatTok->is(tok::l_paren)) 881 parseParens(); 882 883 Line->Level = InitialLevel; 884 885 if (FormatTok->is(tok::kw_noexcept)) { 886 // A noexcept in a requires expression. 887 nextToken(); 888 } 889 890 if (FormatTok->is(tok::arrow)) { 891 // Following the } or noexcept we can find a trailing return type arrow 892 // as part of an implicit conversion constraint. 893 nextToken(); 894 parseStructuralElement(); 895 } 896 897 if (MunchSemi && FormatTok->is(tok::semi)) 898 nextToken(); 899 900 if (PPStartHash == PPEndHash) { 901 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 902 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 903 // Update the opening line to add the forward reference as well 904 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 905 CurrentLines->size() - 1; 906 } 907 } 908 909 return IfLBrace; 910 } 911 912 static bool isGoogScope(const UnwrappedLine &Line) { 913 // FIXME: Closure-library specific stuff should not be hard-coded but be 914 // configurable. 915 if (Line.Tokens.size() < 4) 916 return false; 917 auto I = Line.Tokens.begin(); 918 if (I->Tok->TokenText != "goog") 919 return false; 920 ++I; 921 if (I->Tok->isNot(tok::period)) 922 return false; 923 ++I; 924 if (I->Tok->TokenText != "scope") 925 return false; 926 ++I; 927 return I->Tok->is(tok::l_paren); 928 } 929 930 static bool isIIFE(const UnwrappedLine &Line, 931 const AdditionalKeywords &Keywords) { 932 // Look for the start of an immediately invoked anonymous function. 933 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 934 // This is commonly done in JavaScript to create a new, anonymous scope. 935 // Example: (function() { ... })() 936 if (Line.Tokens.size() < 3) 937 return false; 938 auto I = Line.Tokens.begin(); 939 if (I->Tok->isNot(tok::l_paren)) 940 return false; 941 ++I; 942 if (I->Tok->isNot(Keywords.kw_function)) 943 return false; 944 ++I; 945 return I->Tok->is(tok::l_paren); 946 } 947 948 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 949 const FormatToken &InitialToken) { 950 tok::TokenKind Kind = InitialToken.Tok.getKind(); 951 if (InitialToken.is(TT_NamespaceMacro)) 952 Kind = tok::kw_namespace; 953 954 switch (Kind) { 955 case tok::kw_namespace: 956 return Style.BraceWrapping.AfterNamespace; 957 case tok::kw_class: 958 return Style.BraceWrapping.AfterClass; 959 case tok::kw_union: 960 return Style.BraceWrapping.AfterUnion; 961 case tok::kw_struct: 962 return Style.BraceWrapping.AfterStruct; 963 case tok::kw_enum: 964 return Style.BraceWrapping.AfterEnum; 965 default: 966 return false; 967 } 968 } 969 970 void UnwrappedLineParser::parseChildBlock() { 971 assert(FormatTok->is(tok::l_brace)); 972 FormatTok->setBlockKind(BK_Block); 973 const FormatToken *OpeningBrace = FormatTok; 974 nextToken(); 975 { 976 bool SkipIndent = (Style.isJavaScript() && 977 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 978 ScopedLineState LineState(*this); 979 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 980 /*MustBeDeclaration=*/false); 981 Line->Level += SkipIndent ? 0 : 1; 982 parseLevel(OpeningBrace); 983 flushComments(isOnNewLine(*FormatTok)); 984 Line->Level -= SkipIndent ? 0 : 1; 985 } 986 nextToken(); 987 } 988 989 void UnwrappedLineParser::parsePPDirective() { 990 assert(FormatTok->is(tok::hash) && "'#' expected"); 991 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 992 993 nextToken(); 994 995 if (!FormatTok->Tok.getIdentifierInfo()) { 996 parsePPUnknown(); 997 return; 998 } 999 1000 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1001 case tok::pp_define: 1002 parsePPDefine(); 1003 return; 1004 case tok::pp_if: 1005 parsePPIf(/*IfDef=*/false); 1006 break; 1007 case tok::pp_ifdef: 1008 case tok::pp_ifndef: 1009 parsePPIf(/*IfDef=*/true); 1010 break; 1011 case tok::pp_else: 1012 case tok::pp_elifdef: 1013 case tok::pp_elifndef: 1014 case tok::pp_elif: 1015 parsePPElse(); 1016 break; 1017 case tok::pp_endif: 1018 parsePPEndIf(); 1019 break; 1020 case tok::pp_pragma: 1021 parsePPPragma(); 1022 break; 1023 default: 1024 parsePPUnknown(); 1025 break; 1026 } 1027 } 1028 1029 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1030 size_t Line = CurrentLines->size(); 1031 if (CurrentLines == &PreprocessorDirectives) 1032 Line += Lines.size(); 1033 1034 if (Unreachable || 1035 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1036 PPStack.push_back({PP_Unreachable, Line}); 1037 } else { 1038 PPStack.push_back({PP_Conditional, Line}); 1039 } 1040 } 1041 1042 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1043 ++PPBranchLevel; 1044 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1045 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1046 PPLevelBranchIndex.push_back(0); 1047 PPLevelBranchCount.push_back(0); 1048 } 1049 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1050 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1051 conditionalCompilationCondition(Unreachable || Skip); 1052 } 1053 1054 void UnwrappedLineParser::conditionalCompilationAlternative() { 1055 if (!PPStack.empty()) 1056 PPStack.pop_back(); 1057 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1058 if (!PPChainBranchIndex.empty()) 1059 ++PPChainBranchIndex.top(); 1060 conditionalCompilationCondition( 1061 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1062 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1063 } 1064 1065 void UnwrappedLineParser::conditionalCompilationEnd() { 1066 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1067 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1068 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1069 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1070 } 1071 // Guard against #endif's without #if. 1072 if (PPBranchLevel > -1) 1073 --PPBranchLevel; 1074 if (!PPChainBranchIndex.empty()) 1075 PPChainBranchIndex.pop(); 1076 if (!PPStack.empty()) 1077 PPStack.pop_back(); 1078 } 1079 1080 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1081 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1082 nextToken(); 1083 bool Unreachable = false; 1084 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1085 Unreachable = true; 1086 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1087 Unreachable = true; 1088 conditionalCompilationStart(Unreachable); 1089 FormatToken *IfCondition = FormatTok; 1090 // If there's a #ifndef on the first line, and the only lines before it are 1091 // comments, it could be an include guard. 1092 bool MaybeIncludeGuard = IfNDef; 1093 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1094 for (auto &Line : Lines) { 1095 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1096 MaybeIncludeGuard = false; 1097 IncludeGuard = IG_Rejected; 1098 break; 1099 } 1100 } 1101 } 1102 --PPBranchLevel; 1103 parsePPUnknown(); 1104 ++PPBranchLevel; 1105 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1106 IncludeGuard = IG_IfNdefed; 1107 IncludeGuardToken = IfCondition; 1108 } 1109 } 1110 1111 void UnwrappedLineParser::parsePPElse() { 1112 // If a potential include guard has an #else, it's not an include guard. 1113 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1114 IncludeGuard = IG_Rejected; 1115 // Don't crash when there is an #else without an #if. 1116 assert(PPBranchLevel >= -1); 1117 if (PPBranchLevel == -1) 1118 conditionalCompilationStart(/*Unreachable=*/true); 1119 conditionalCompilationAlternative(); 1120 --PPBranchLevel; 1121 parsePPUnknown(); 1122 ++PPBranchLevel; 1123 } 1124 1125 void UnwrappedLineParser::parsePPEndIf() { 1126 conditionalCompilationEnd(); 1127 parsePPUnknown(); 1128 // If the #endif of a potential include guard is the last thing in the file, 1129 // then we found an include guard. 1130 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1131 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1132 IncludeGuard = IG_Found; 1133 } 1134 } 1135 1136 void UnwrappedLineParser::parsePPDefine() { 1137 nextToken(); 1138 1139 if (!FormatTok->Tok.getIdentifierInfo()) { 1140 IncludeGuard = IG_Rejected; 1141 IncludeGuardToken = nullptr; 1142 parsePPUnknown(); 1143 return; 1144 } 1145 1146 if (IncludeGuard == IG_IfNdefed && 1147 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1148 IncludeGuard = IG_Defined; 1149 IncludeGuardToken = nullptr; 1150 for (auto &Line : Lines) { 1151 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1152 IncludeGuard = IG_Rejected; 1153 break; 1154 } 1155 } 1156 } 1157 1158 // In the context of a define, even keywords should be treated as normal 1159 // identifiers. Setting the kind to identifier is not enough, because we need 1160 // to treat additional keywords like __except as well, which are already 1161 // identifiers. Setting the identifier info to null interferes with include 1162 // guard processing above, and changes preprocessing nesting. 1163 FormatTok->Tok.setKind(tok::identifier); 1164 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1165 nextToken(); 1166 if (FormatTok->Tok.getKind() == tok::l_paren && 1167 !FormatTok->hasWhitespaceBefore()) { 1168 parseParens(); 1169 } 1170 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1171 Line->Level += PPBranchLevel + 1; 1172 addUnwrappedLine(); 1173 ++Line->Level; 1174 1175 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1176 assert((int)Line->PPLevel >= 0); 1177 Line->InMacroBody = true; 1178 1179 if (Style.SkipMacroDefinitionBody) { 1180 do { 1181 FormatTok->Finalized = true; 1182 nextToken(); 1183 } while (!eof()); 1184 addUnwrappedLine(); 1185 return; 1186 } 1187 1188 if (FormatTok->is(tok::identifier) && 1189 Tokens->peekNextToken()->is(tok::colon)) { 1190 nextToken(); 1191 nextToken(); 1192 } 1193 1194 // Errors during a preprocessor directive can only affect the layout of the 1195 // preprocessor directive, and thus we ignore them. An alternative approach 1196 // would be to use the same approach we use on the file level (no 1197 // re-indentation if there was a structural error) within the macro 1198 // definition. 1199 parseFile(); 1200 } 1201 1202 void UnwrappedLineParser::parsePPPragma() { 1203 Line->InPragmaDirective = true; 1204 parsePPUnknown(); 1205 } 1206 1207 void UnwrappedLineParser::parsePPUnknown() { 1208 do { 1209 nextToken(); 1210 } while (!eof()); 1211 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1212 Line->Level += PPBranchLevel + 1; 1213 addUnwrappedLine(); 1214 } 1215 1216 // Here we exclude certain tokens that are not usually the first token in an 1217 // unwrapped line. This is used in attempt to distinguish macro calls without 1218 // trailing semicolons from other constructs split to several lines. 1219 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1220 // Semicolon can be a null-statement, l_square can be a start of a macro or 1221 // a C++11 attribute, but this doesn't seem to be common. 1222 assert(Tok.isNot(TT_AttributeSquare)); 1223 return !Tok.isOneOf(tok::semi, tok::l_brace, 1224 // Tokens that can only be used as binary operators and a 1225 // part of overloaded operator names. 1226 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1227 tok::less, tok::greater, tok::slash, tok::percent, 1228 tok::lessless, tok::greatergreater, tok::equal, 1229 tok::plusequal, tok::minusequal, tok::starequal, 1230 tok::slashequal, tok::percentequal, tok::ampequal, 1231 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1232 tok::lesslessequal, 1233 // Colon is used in labels, base class lists, initializer 1234 // lists, range-based for loops, ternary operator, but 1235 // should never be the first token in an unwrapped line. 1236 tok::colon, 1237 // 'noexcept' is a trailing annotation. 1238 tok::kw_noexcept); 1239 } 1240 1241 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1242 const FormatToken *FormatTok) { 1243 // FIXME: This returns true for C/C++ keywords like 'struct'. 1244 return FormatTok->is(tok::identifier) && 1245 (!FormatTok->Tok.getIdentifierInfo() || 1246 !FormatTok->isOneOf( 1247 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1248 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1249 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1250 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1251 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1252 Keywords.kw_instanceof, Keywords.kw_interface, 1253 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1254 } 1255 1256 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1257 const FormatToken *FormatTok) { 1258 return FormatTok->Tok.isLiteral() || 1259 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1260 mustBeJSIdent(Keywords, FormatTok); 1261 } 1262 1263 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1264 // when encountered after a value (see mustBeJSIdentOrValue). 1265 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1266 const FormatToken *FormatTok) { 1267 return FormatTok->isOneOf( 1268 tok::kw_return, Keywords.kw_yield, 1269 // conditionals 1270 tok::kw_if, tok::kw_else, 1271 // loops 1272 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1273 // switch/case 1274 tok::kw_switch, tok::kw_case, 1275 // exceptions 1276 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1277 // declaration 1278 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1279 Keywords.kw_async, Keywords.kw_function, 1280 // import/export 1281 Keywords.kw_import, tok::kw_export); 1282 } 1283 1284 // Checks whether a token is a type in K&R C (aka C78). 1285 static bool isC78Type(const FormatToken &Tok) { 1286 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1287 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1288 tok::identifier); 1289 } 1290 1291 // This function checks whether a token starts the first parameter declaration 1292 // in a K&R C (aka C78) function definition, e.g.: 1293 // int f(a, b) 1294 // short a, b; 1295 // { 1296 // return a + b; 1297 // } 1298 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1299 const FormatToken *FuncName) { 1300 assert(Tok); 1301 assert(Next); 1302 assert(FuncName); 1303 1304 if (FuncName->isNot(tok::identifier)) 1305 return false; 1306 1307 const FormatToken *Prev = FuncName->Previous; 1308 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1309 return false; 1310 1311 if (!isC78Type(*Tok) && 1312 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1313 return false; 1314 } 1315 1316 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1317 return false; 1318 1319 Tok = Tok->Previous; 1320 if (!Tok || Tok->isNot(tok::r_paren)) 1321 return false; 1322 1323 Tok = Tok->Previous; 1324 if (!Tok || Tok->isNot(tok::identifier)) 1325 return false; 1326 1327 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1328 } 1329 1330 bool UnwrappedLineParser::parseModuleImport() { 1331 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1332 1333 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1334 !Token->Tok.getIdentifierInfo() && 1335 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1336 return false; 1337 } 1338 1339 nextToken(); 1340 while (!eof()) { 1341 if (FormatTok->is(tok::colon)) { 1342 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1343 } 1344 // Handle import <foo/bar.h> as we would an include statement. 1345 else if (FormatTok->is(tok::less)) { 1346 nextToken(); 1347 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1348 // Mark tokens up to the trailing line comments as implicit string 1349 // literals. 1350 if (FormatTok->isNot(tok::comment) && 1351 !FormatTok->TokenText.starts_with("//")) { 1352 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1353 } 1354 nextToken(); 1355 } 1356 } 1357 if (FormatTok->is(tok::semi)) { 1358 nextToken(); 1359 break; 1360 } 1361 nextToken(); 1362 } 1363 1364 addUnwrappedLine(); 1365 return true; 1366 } 1367 1368 // readTokenWithJavaScriptASI reads the next token and terminates the current 1369 // line if JavaScript Automatic Semicolon Insertion must 1370 // happen between the current token and the next token. 1371 // 1372 // This method is conservative - it cannot cover all edge cases of JavaScript, 1373 // but only aims to correctly handle certain well known cases. It *must not* 1374 // return true in speculative cases. 1375 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1376 FormatToken *Previous = FormatTok; 1377 readToken(); 1378 FormatToken *Next = FormatTok; 1379 1380 bool IsOnSameLine = 1381 CommentsBeforeNextToken.empty() 1382 ? Next->NewlinesBefore == 0 1383 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1384 if (IsOnSameLine) 1385 return; 1386 1387 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1388 bool PreviousStartsTemplateExpr = 1389 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1390 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1391 // If the line contains an '@' sign, the previous token might be an 1392 // annotation, which can precede another identifier/value. 1393 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1394 return LineNode.Tok->is(tok::at); 1395 }); 1396 if (HasAt) 1397 return; 1398 } 1399 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1400 return addUnwrappedLine(); 1401 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1402 bool NextEndsTemplateExpr = 1403 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1404 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1405 (PreviousMustBeValue || 1406 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1407 tok::minusminus))) { 1408 return addUnwrappedLine(); 1409 } 1410 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1411 isJSDeclOrStmt(Keywords, Next)) { 1412 return addUnwrappedLine(); 1413 } 1414 } 1415 1416 void UnwrappedLineParser::parseStructuralElement( 1417 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1418 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1419 if (Style.Language == FormatStyle::LK_TableGen && 1420 FormatTok->is(tok::pp_include)) { 1421 nextToken(); 1422 if (FormatTok->is(tok::string_literal)) 1423 nextToken(); 1424 addUnwrappedLine(); 1425 return; 1426 } 1427 1428 if (Style.isCpp()) { 1429 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1430 } 1431 } else if (Style.isVerilog()) { 1432 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1433 parseForOrWhileLoop(/*HasParens=*/false); 1434 return; 1435 } 1436 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1437 parseForOrWhileLoop(); 1438 return; 1439 } 1440 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1441 Keywords.kw_assume, Keywords.kw_cover)) { 1442 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1443 return; 1444 } 1445 1446 // Skip things that can exist before keywords like 'if' and 'case'. 1447 while (true) { 1448 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1449 Keywords.kw_unique0)) { 1450 nextToken(); 1451 } else if (FormatTok->is(tok::l_paren) && 1452 Tokens->peekNextToken()->is(tok::star)) { 1453 parseParens(); 1454 } else { 1455 break; 1456 } 1457 } 1458 } 1459 1460 // Tokens that only make sense at the beginning of a line. 1461 switch (FormatTok->Tok.getKind()) { 1462 case tok::kw_asm: 1463 nextToken(); 1464 if (FormatTok->is(tok::l_brace)) { 1465 FormatTok->setFinalizedType(TT_InlineASMBrace); 1466 nextToken(); 1467 while (FormatTok && !eof()) { 1468 if (FormatTok->is(tok::r_brace)) { 1469 FormatTok->setFinalizedType(TT_InlineASMBrace); 1470 nextToken(); 1471 addUnwrappedLine(); 1472 break; 1473 } 1474 FormatTok->Finalized = true; 1475 nextToken(); 1476 } 1477 } 1478 break; 1479 case tok::kw_namespace: 1480 parseNamespace(); 1481 return; 1482 case tok::kw_public: 1483 case tok::kw_protected: 1484 case tok::kw_private: 1485 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1486 Style.isCSharp()) { 1487 nextToken(); 1488 } else { 1489 parseAccessSpecifier(); 1490 } 1491 return; 1492 case tok::kw_if: { 1493 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1494 // field/method declaration. 1495 break; 1496 } 1497 FormatToken *Tok = parseIfThenElse(IfKind); 1498 if (IfLeftBrace) 1499 *IfLeftBrace = Tok; 1500 return; 1501 } 1502 case tok::kw_for: 1503 case tok::kw_while: 1504 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1505 // field/method declaration. 1506 break; 1507 } 1508 parseForOrWhileLoop(); 1509 return; 1510 case tok::kw_do: 1511 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1512 // field/method declaration. 1513 break; 1514 } 1515 parseDoWhile(); 1516 if (HasDoWhile) 1517 *HasDoWhile = true; 1518 return; 1519 case tok::kw_switch: 1520 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1521 // 'switch: string' field declaration. 1522 break; 1523 } 1524 parseSwitch(); 1525 return; 1526 case tok::kw_default: 1527 // In Verilog default along with other labels are handled in the next loop. 1528 if (Style.isVerilog()) 1529 break; 1530 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1531 // 'default: string' field declaration. 1532 break; 1533 } 1534 nextToken(); 1535 if (FormatTok->is(tok::colon)) { 1536 FormatTok->setFinalizedType(TT_CaseLabelColon); 1537 parseLabel(); 1538 return; 1539 } 1540 // e.g. "default void f() {}" in a Java interface. 1541 break; 1542 case tok::kw_case: 1543 // Proto: there are no switch/case statements. 1544 if (Style.Language == FormatStyle::LK_Proto) { 1545 nextToken(); 1546 return; 1547 } 1548 if (Style.isVerilog()) { 1549 parseBlock(); 1550 addUnwrappedLine(); 1551 return; 1552 } 1553 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1554 // 'case: string' field declaration. 1555 nextToken(); 1556 break; 1557 } 1558 parseCaseLabel(); 1559 return; 1560 case tok::kw_try: 1561 case tok::kw___try: 1562 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1563 // field/method declaration. 1564 break; 1565 } 1566 parseTryCatch(); 1567 return; 1568 case tok::kw_extern: 1569 nextToken(); 1570 if (Style.isVerilog()) { 1571 // In Verilog and extern module declaration looks like a start of module. 1572 // But there is no body and endmodule. So we handle it separately. 1573 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1574 parseVerilogHierarchyHeader(); 1575 return; 1576 } 1577 } else if (FormatTok->is(tok::string_literal)) { 1578 nextToken(); 1579 if (FormatTok->is(tok::l_brace)) { 1580 if (Style.BraceWrapping.AfterExternBlock) 1581 addUnwrappedLine(); 1582 // Either we indent or for backwards compatibility we follow the 1583 // AfterExternBlock style. 1584 unsigned AddLevels = 1585 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1586 (Style.BraceWrapping.AfterExternBlock && 1587 Style.IndentExternBlock == 1588 FormatStyle::IEBS_AfterExternBlock) 1589 ? 1u 1590 : 0u; 1591 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1592 addUnwrappedLine(); 1593 return; 1594 } 1595 } 1596 break; 1597 case tok::kw_export: 1598 if (Style.isJavaScript()) { 1599 parseJavaScriptEs6ImportExport(); 1600 return; 1601 } 1602 if (Style.isCpp()) { 1603 nextToken(); 1604 if (FormatTok->is(tok::kw_namespace)) { 1605 parseNamespace(); 1606 return; 1607 } 1608 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1609 return; 1610 } 1611 break; 1612 case tok::kw_inline: 1613 nextToken(); 1614 if (FormatTok->is(tok::kw_namespace)) { 1615 parseNamespace(); 1616 return; 1617 } 1618 break; 1619 case tok::identifier: 1620 if (FormatTok->is(TT_ForEachMacro)) { 1621 parseForOrWhileLoop(); 1622 return; 1623 } 1624 if (FormatTok->is(TT_MacroBlockBegin)) { 1625 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1626 /*MunchSemi=*/false); 1627 return; 1628 } 1629 if (FormatTok->is(Keywords.kw_import)) { 1630 if (Style.isJavaScript()) { 1631 parseJavaScriptEs6ImportExport(); 1632 return; 1633 } 1634 if (Style.Language == FormatStyle::LK_Proto) { 1635 nextToken(); 1636 if (FormatTok->is(tok::kw_public)) 1637 nextToken(); 1638 if (FormatTok->isNot(tok::string_literal)) 1639 return; 1640 nextToken(); 1641 if (FormatTok->is(tok::semi)) 1642 nextToken(); 1643 addUnwrappedLine(); 1644 return; 1645 } 1646 if (Style.isCpp() && parseModuleImport()) 1647 return; 1648 } 1649 if (Style.isCpp() && 1650 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1651 Keywords.kw_slots, Keywords.kw_qslots)) { 1652 nextToken(); 1653 if (FormatTok->is(tok::colon)) { 1654 nextToken(); 1655 addUnwrappedLine(); 1656 return; 1657 } 1658 } 1659 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1660 parseStatementMacro(); 1661 return; 1662 } 1663 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1664 parseNamespace(); 1665 return; 1666 } 1667 // In Verilog labels can be any expression, so we don't do them here. 1668 // JS doesn't have macros, and within classes colons indicate fields, not 1669 // labels. 1670 // TableGen doesn't have labels. 1671 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1672 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1673 nextToken(); 1674 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1675 FormatTok->setFinalizedType(TT_GotoLabelColon); 1676 parseLabel(!Style.IndentGotoLabels); 1677 if (HasLabel) 1678 *HasLabel = true; 1679 return; 1680 } 1681 // In all other cases, parse the declaration. 1682 break; 1683 default: 1684 break; 1685 } 1686 1687 const bool InRequiresExpression = 1688 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 1689 do { 1690 const FormatToken *Previous = FormatTok->Previous; 1691 switch (FormatTok->Tok.getKind()) { 1692 case tok::at: 1693 nextToken(); 1694 if (FormatTok->is(tok::l_brace)) { 1695 nextToken(); 1696 parseBracedList(); 1697 break; 1698 } else if (Style.Language == FormatStyle::LK_Java && 1699 FormatTok->is(Keywords.kw_interface)) { 1700 nextToken(); 1701 break; 1702 } 1703 switch (FormatTok->Tok.getObjCKeywordID()) { 1704 case tok::objc_public: 1705 case tok::objc_protected: 1706 case tok::objc_package: 1707 case tok::objc_private: 1708 return parseAccessSpecifier(); 1709 case tok::objc_interface: 1710 case tok::objc_implementation: 1711 return parseObjCInterfaceOrImplementation(); 1712 case tok::objc_protocol: 1713 if (parseObjCProtocol()) 1714 return; 1715 break; 1716 case tok::objc_end: 1717 return; // Handled by the caller. 1718 case tok::objc_optional: 1719 case tok::objc_required: 1720 nextToken(); 1721 addUnwrappedLine(); 1722 return; 1723 case tok::objc_autoreleasepool: 1724 nextToken(); 1725 if (FormatTok->is(tok::l_brace)) { 1726 if (Style.BraceWrapping.AfterControlStatement == 1727 FormatStyle::BWACS_Always) { 1728 addUnwrappedLine(); 1729 } 1730 parseBlock(); 1731 } 1732 addUnwrappedLine(); 1733 return; 1734 case tok::objc_synchronized: 1735 nextToken(); 1736 if (FormatTok->is(tok::l_paren)) { 1737 // Skip synchronization object 1738 parseParens(); 1739 } 1740 if (FormatTok->is(tok::l_brace)) { 1741 if (Style.BraceWrapping.AfterControlStatement == 1742 FormatStyle::BWACS_Always) { 1743 addUnwrappedLine(); 1744 } 1745 parseBlock(); 1746 } 1747 addUnwrappedLine(); 1748 return; 1749 case tok::objc_try: 1750 // This branch isn't strictly necessary (the kw_try case below would 1751 // do this too after the tok::at is parsed above). But be explicit. 1752 parseTryCatch(); 1753 return; 1754 default: 1755 break; 1756 } 1757 break; 1758 case tok::kw_requires: { 1759 if (Style.isCpp()) { 1760 bool ParsedClause = parseRequires(); 1761 if (ParsedClause) 1762 return; 1763 } else { 1764 nextToken(); 1765 } 1766 break; 1767 } 1768 case tok::kw_enum: 1769 // Ignore if this is part of "template <enum ...". 1770 if (Previous && Previous->is(tok::less)) { 1771 nextToken(); 1772 break; 1773 } 1774 1775 // parseEnum falls through and does not yet add an unwrapped line as an 1776 // enum definition can start a structural element. 1777 if (!parseEnum()) 1778 break; 1779 // This only applies to C++ and Verilog. 1780 if (!Style.isCpp() && !Style.isVerilog()) { 1781 addUnwrappedLine(); 1782 return; 1783 } 1784 break; 1785 case tok::kw_typedef: 1786 nextToken(); 1787 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1788 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1789 Keywords.kw_CF_CLOSED_ENUM, 1790 Keywords.kw_NS_CLOSED_ENUM)) { 1791 parseEnum(); 1792 } 1793 break; 1794 case tok::kw_class: 1795 if (Style.isVerilog()) { 1796 parseBlock(); 1797 addUnwrappedLine(); 1798 return; 1799 } 1800 if (Style.isTableGen()) { 1801 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1802 // This is same as def and so on. 1803 nextToken(); 1804 break; 1805 } 1806 [[fallthrough]]; 1807 case tok::kw_struct: 1808 case tok::kw_union: 1809 if (parseStructLike()) 1810 return; 1811 break; 1812 case tok::kw_decltype: 1813 nextToken(); 1814 if (FormatTok->is(tok::l_paren)) { 1815 parseParens(); 1816 assert(FormatTok->Previous); 1817 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1818 tok::l_paren)) { 1819 Line->SeenDecltypeAuto = true; 1820 } 1821 } 1822 break; 1823 case tok::period: 1824 nextToken(); 1825 // In Java, classes have an implicit static member "class". 1826 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1827 FormatTok->is(tok::kw_class)) { 1828 nextToken(); 1829 } 1830 if (Style.isJavaScript() && FormatTok && 1831 FormatTok->Tok.getIdentifierInfo()) { 1832 // JavaScript only has pseudo keywords, all keywords are allowed to 1833 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1834 nextToken(); 1835 } 1836 break; 1837 case tok::semi: 1838 nextToken(); 1839 addUnwrappedLine(); 1840 return; 1841 case tok::r_brace: 1842 addUnwrappedLine(); 1843 return; 1844 case tok::l_paren: { 1845 parseParens(); 1846 // Break the unwrapped line if a K&R C function definition has a parameter 1847 // declaration. 1848 if (OpeningBrace || !Style.isCpp() || !Previous || eof()) 1849 break; 1850 if (isC78ParameterDecl(FormatTok, 1851 Tokens->peekNextToken(/*SkipComment=*/true), 1852 Previous)) { 1853 addUnwrappedLine(); 1854 return; 1855 } 1856 break; 1857 } 1858 case tok::kw_operator: 1859 nextToken(); 1860 if (FormatTok->isBinaryOperator()) 1861 nextToken(); 1862 break; 1863 case tok::caret: 1864 nextToken(); 1865 // Block return type. 1866 if (FormatTok->Tok.isAnyIdentifier() || 1867 FormatTok->isSimpleTypeSpecifier()) { 1868 nextToken(); 1869 // Return types: pointers are ok too. 1870 while (FormatTok->is(tok::star)) 1871 nextToken(); 1872 } 1873 // Block argument list. 1874 if (FormatTok->is(tok::l_paren)) 1875 parseParens(); 1876 // Block body. 1877 if (FormatTok->is(tok::l_brace)) 1878 parseChildBlock(); 1879 break; 1880 case tok::l_brace: 1881 if (InRequiresExpression) 1882 FormatTok->setFinalizedType(TT_BracedListLBrace); 1883 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1884 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1885 // A block outside of parentheses must be the last part of a 1886 // structural element. 1887 // FIXME: Figure out cases where this is not true, and add projections 1888 // for them (the one we know is missing are lambdas). 1889 if (Style.Language == FormatStyle::LK_Java && 1890 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1891 // If necessary, we could set the type to something different than 1892 // TT_FunctionLBrace. 1893 if (Style.BraceWrapping.AfterControlStatement == 1894 FormatStyle::BWACS_Always) { 1895 addUnwrappedLine(); 1896 } 1897 } else if (Style.BraceWrapping.AfterFunction) { 1898 addUnwrappedLine(); 1899 } 1900 FormatTok->setFinalizedType(TT_FunctionLBrace); 1901 parseBlock(); 1902 IsDecltypeAutoFunction = false; 1903 addUnwrappedLine(); 1904 return; 1905 } 1906 // Otherwise this was a braced init list, and the structural 1907 // element continues. 1908 break; 1909 case tok::kw_try: 1910 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1911 // field/method declaration. 1912 nextToken(); 1913 break; 1914 } 1915 // We arrive here when parsing function-try blocks. 1916 if (Style.BraceWrapping.AfterFunction) 1917 addUnwrappedLine(); 1918 parseTryCatch(); 1919 return; 1920 case tok::identifier: { 1921 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1922 Line->MustBeDeclaration) { 1923 addUnwrappedLine(); 1924 parseCSharpGenericTypeConstraint(); 1925 break; 1926 } 1927 if (FormatTok->is(TT_MacroBlockEnd)) { 1928 addUnwrappedLine(); 1929 return; 1930 } 1931 1932 // Function declarations (as opposed to function expressions) are parsed 1933 // on their own unwrapped line by continuing this loop. Function 1934 // expressions (functions that are not on their own line) must not create 1935 // a new unwrapped line, so they are special cased below. 1936 size_t TokenCount = Line->Tokens.size(); 1937 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1938 (TokenCount > 1 || 1939 (TokenCount == 1 && 1940 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1941 tryToParseJSFunction(); 1942 break; 1943 } 1944 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1945 FormatTok->is(Keywords.kw_interface)) { 1946 if (Style.isJavaScript()) { 1947 // In JavaScript/TypeScript, "interface" can be used as a standalone 1948 // identifier, e.g. in `var interface = 1;`. If "interface" is 1949 // followed by another identifier, it is very like to be an actual 1950 // interface declaration. 1951 unsigned StoredPosition = Tokens->getPosition(); 1952 FormatToken *Next = Tokens->getNextToken(); 1953 FormatTok = Tokens->setPosition(StoredPosition); 1954 if (!mustBeJSIdent(Keywords, Next)) { 1955 nextToken(); 1956 break; 1957 } 1958 } 1959 parseRecord(); 1960 addUnwrappedLine(); 1961 return; 1962 } 1963 1964 if (Style.isVerilog()) { 1965 if (FormatTok->is(Keywords.kw_table)) { 1966 parseVerilogTable(); 1967 return; 1968 } 1969 if (Keywords.isVerilogBegin(*FormatTok) || 1970 Keywords.isVerilogHierarchy(*FormatTok)) { 1971 parseBlock(); 1972 addUnwrappedLine(); 1973 return; 1974 } 1975 } 1976 1977 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) { 1978 if (parseStructLike()) 1979 return; 1980 break; 1981 } 1982 1983 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1984 parseStatementMacro(); 1985 return; 1986 } 1987 1988 // See if the following token should start a new unwrapped line. 1989 StringRef Text = FormatTok->TokenText; 1990 1991 FormatToken *PreviousToken = FormatTok; 1992 nextToken(); 1993 1994 // JS doesn't have macros, and within classes colons indicate fields, not 1995 // labels. 1996 if (Style.isJavaScript()) 1997 break; 1998 1999 auto OneTokenSoFar = [&]() { 2000 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2001 while (I != E && I->Tok->is(tok::comment)) 2002 ++I; 2003 if (Style.isVerilog()) 2004 while (I != E && I->Tok->is(tok::hash)) 2005 ++I; 2006 return I != E && (++I == E); 2007 }; 2008 if (OneTokenSoFar()) { 2009 // Recognize function-like macro usages without trailing semicolon as 2010 // well as free-standing macros like Q_OBJECT. 2011 bool FunctionLike = FormatTok->is(tok::l_paren); 2012 if (FunctionLike) 2013 parseParens(); 2014 2015 bool FollowedByNewline = 2016 CommentsBeforeNextToken.empty() 2017 ? FormatTok->NewlinesBefore > 0 2018 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2019 2020 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2021 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2022 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2023 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2024 addUnwrappedLine(); 2025 return; 2026 } 2027 } 2028 break; 2029 } 2030 case tok::equal: 2031 if ((Style.isJavaScript() || Style.isCSharp()) && 2032 FormatTok->is(TT_FatArrow)) { 2033 tryToParseChildBlock(); 2034 break; 2035 } 2036 2037 nextToken(); 2038 if (FormatTok->is(tok::l_brace)) { 2039 // Block kind should probably be set to BK_BracedInit for any language. 2040 // C# needs this change to ensure that array initialisers and object 2041 // initialisers are indented the same way. 2042 if (Style.isCSharp()) 2043 FormatTok->setBlockKind(BK_BracedInit); 2044 // TableGen's defset statement has syntax of the form, 2045 // `defset <type> <name> = { <statement>... }` 2046 if (Style.isTableGen() && 2047 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2048 FormatTok->setFinalizedType(TT_FunctionLBrace); 2049 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2050 /*MunchSemi=*/false); 2051 addUnwrappedLine(); 2052 break; 2053 } 2054 nextToken(); 2055 parseBracedList(); 2056 } else if (Style.Language == FormatStyle::LK_Proto && 2057 FormatTok->is(tok::less)) { 2058 nextToken(); 2059 parseBracedList(/*IsAngleBracket=*/true); 2060 } 2061 break; 2062 case tok::l_square: 2063 parseSquare(); 2064 break; 2065 case tok::kw_new: 2066 parseNew(); 2067 break; 2068 case tok::kw_case: 2069 // Proto: there are no switch/case statements. 2070 if (Style.Language == FormatStyle::LK_Proto) { 2071 nextToken(); 2072 return; 2073 } 2074 // In Verilog switch is called case. 2075 if (Style.isVerilog()) { 2076 parseBlock(); 2077 addUnwrappedLine(); 2078 return; 2079 } 2080 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2081 // 'case: string' field declaration. 2082 nextToken(); 2083 break; 2084 } 2085 parseCaseLabel(); 2086 break; 2087 case tok::kw_default: 2088 nextToken(); 2089 if (Style.isVerilog()) { 2090 if (FormatTok->is(tok::colon)) { 2091 // The label will be handled in the next iteration. 2092 break; 2093 } 2094 if (FormatTok->is(Keywords.kw_clocking)) { 2095 // A default clocking block. 2096 parseBlock(); 2097 addUnwrappedLine(); 2098 return; 2099 } 2100 parseVerilogCaseLabel(); 2101 return; 2102 } 2103 break; 2104 case tok::colon: 2105 nextToken(); 2106 if (Style.isVerilog()) { 2107 parseVerilogCaseLabel(); 2108 return; 2109 } 2110 break; 2111 default: 2112 nextToken(); 2113 break; 2114 } 2115 } while (!eof()); 2116 } 2117 2118 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2119 assert(FormatTok->is(tok::l_brace)); 2120 if (!Style.isCSharp()) 2121 return false; 2122 // See if it's a property accessor. 2123 if (FormatTok->Previous->isNot(tok::identifier)) 2124 return false; 2125 2126 // See if we are inside a property accessor. 2127 // 2128 // Record the current tokenPosition so that we can advance and 2129 // reset the current token. `Next` is not set yet so we need 2130 // another way to advance along the token stream. 2131 unsigned int StoredPosition = Tokens->getPosition(); 2132 FormatToken *Tok = Tokens->getNextToken(); 2133 2134 // A trivial property accessor is of the form: 2135 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2136 // Track these as they do not require line breaks to be introduced. 2137 bool HasSpecialAccessor = false; 2138 bool IsTrivialPropertyAccessor = true; 2139 while (!eof()) { 2140 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 2141 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 2142 Keywords.kw_init, Keywords.kw_set)) { 2143 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2144 HasSpecialAccessor = true; 2145 Tok = Tokens->getNextToken(); 2146 continue; 2147 } 2148 if (Tok->isNot(tok::r_brace)) 2149 IsTrivialPropertyAccessor = false; 2150 break; 2151 } 2152 2153 if (!HasSpecialAccessor) { 2154 Tokens->setPosition(StoredPosition); 2155 return false; 2156 } 2157 2158 // Try to parse the property accessor: 2159 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2160 Tokens->setPosition(StoredPosition); 2161 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2162 addUnwrappedLine(); 2163 nextToken(); 2164 do { 2165 switch (FormatTok->Tok.getKind()) { 2166 case tok::r_brace: 2167 nextToken(); 2168 if (FormatTok->is(tok::equal)) { 2169 while (!eof() && FormatTok->isNot(tok::semi)) 2170 nextToken(); 2171 nextToken(); 2172 } 2173 addUnwrappedLine(); 2174 return true; 2175 case tok::l_brace: 2176 ++Line->Level; 2177 parseBlock(/*MustBeDeclaration=*/true); 2178 addUnwrappedLine(); 2179 --Line->Level; 2180 break; 2181 case tok::equal: 2182 if (FormatTok->is(TT_FatArrow)) { 2183 ++Line->Level; 2184 do { 2185 nextToken(); 2186 } while (!eof() && FormatTok->isNot(tok::semi)); 2187 nextToken(); 2188 addUnwrappedLine(); 2189 --Line->Level; 2190 break; 2191 } 2192 nextToken(); 2193 break; 2194 default: 2195 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2196 Keywords.kw_set) && 2197 !IsTrivialPropertyAccessor) { 2198 // Non-trivial get/set needs to be on its own line. 2199 addUnwrappedLine(); 2200 } 2201 nextToken(); 2202 } 2203 } while (!eof()); 2204 2205 // Unreachable for well-formed code (paired '{' and '}'). 2206 return true; 2207 } 2208 2209 bool UnwrappedLineParser::tryToParseLambda() { 2210 assert(FormatTok->is(tok::l_square)); 2211 if (!Style.isCpp()) { 2212 nextToken(); 2213 return false; 2214 } 2215 FormatToken &LSquare = *FormatTok; 2216 if (!tryToParseLambdaIntroducer()) 2217 return false; 2218 2219 bool SeenArrow = false; 2220 bool InTemplateParameterList = false; 2221 2222 while (FormatTok->isNot(tok::l_brace)) { 2223 if (FormatTok->isSimpleTypeSpecifier()) { 2224 nextToken(); 2225 continue; 2226 } 2227 switch (FormatTok->Tok.getKind()) { 2228 case tok::l_brace: 2229 break; 2230 case tok::l_paren: 2231 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2232 break; 2233 case tok::l_square: 2234 parseSquare(); 2235 break; 2236 case tok::less: 2237 assert(FormatTok->Previous); 2238 if (FormatTok->Previous->is(tok::r_square)) 2239 InTemplateParameterList = true; 2240 nextToken(); 2241 break; 2242 case tok::kw_auto: 2243 case tok::kw_class: 2244 case tok::kw_template: 2245 case tok::kw_typename: 2246 case tok::amp: 2247 case tok::star: 2248 case tok::kw_const: 2249 case tok::kw_constexpr: 2250 case tok::kw_consteval: 2251 case tok::comma: 2252 case tok::greater: 2253 case tok::identifier: 2254 case tok::numeric_constant: 2255 case tok::coloncolon: 2256 case tok::kw_mutable: 2257 case tok::kw_noexcept: 2258 case tok::kw_static: 2259 nextToken(); 2260 break; 2261 // Specialization of a template with an integer parameter can contain 2262 // arithmetic, logical, comparison and ternary operators. 2263 // 2264 // FIXME: This also accepts sequences of operators that are not in the scope 2265 // of a template argument list. 2266 // 2267 // In a C++ lambda a template type can only occur after an arrow. We use 2268 // this as an heuristic to distinguish between Objective-C expressions 2269 // followed by an `a->b` expression, such as: 2270 // ([obj func:arg] + a->b) 2271 // Otherwise the code below would parse as a lambda. 2272 case tok::plus: 2273 case tok::minus: 2274 case tok::exclaim: 2275 case tok::tilde: 2276 case tok::slash: 2277 case tok::percent: 2278 case tok::lessless: 2279 case tok::pipe: 2280 case tok::pipepipe: 2281 case tok::ampamp: 2282 case tok::caret: 2283 case tok::equalequal: 2284 case tok::exclaimequal: 2285 case tok::greaterequal: 2286 case tok::lessequal: 2287 case tok::question: 2288 case tok::colon: 2289 case tok::ellipsis: 2290 case tok::kw_true: 2291 case tok::kw_false: 2292 if (SeenArrow || InTemplateParameterList) { 2293 nextToken(); 2294 break; 2295 } 2296 return true; 2297 case tok::arrow: 2298 // This might or might not actually be a lambda arrow (this could be an 2299 // ObjC method invocation followed by a dereferencing arrow). We might 2300 // reset this back to TT_Unknown in TokenAnnotator. 2301 FormatTok->setFinalizedType(TT_TrailingReturnArrow); 2302 SeenArrow = true; 2303 nextToken(); 2304 break; 2305 case tok::kw_requires: { 2306 auto *RequiresToken = FormatTok; 2307 nextToken(); 2308 parseRequiresClause(RequiresToken); 2309 break; 2310 } 2311 case tok::equal: 2312 if (!InTemplateParameterList) 2313 return true; 2314 nextToken(); 2315 break; 2316 default: 2317 return true; 2318 } 2319 } 2320 2321 FormatTok->setFinalizedType(TT_LambdaLBrace); 2322 LSquare.setFinalizedType(TT_LambdaLSquare); 2323 2324 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2325 parseChildBlock(); 2326 assert(!NestedLambdas.empty()); 2327 NestedLambdas.pop_back(); 2328 2329 return true; 2330 } 2331 2332 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2333 const FormatToken *Previous = FormatTok->Previous; 2334 const FormatToken *LeftSquare = FormatTok; 2335 nextToken(); 2336 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2337 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2338 tok::kw_co_yield, tok::kw_co_return)) || 2339 Previous->closesScope())) || 2340 LeftSquare->isCppStructuredBinding(Style)) { 2341 return false; 2342 } 2343 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2344 return false; 2345 if (FormatTok->is(tok::r_square)) { 2346 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2347 if (Next->is(tok::greater)) 2348 return false; 2349 } 2350 parseSquare(/*LambdaIntroducer=*/true); 2351 return true; 2352 } 2353 2354 void UnwrappedLineParser::tryToParseJSFunction() { 2355 assert(FormatTok->is(Keywords.kw_function)); 2356 if (FormatTok->is(Keywords.kw_async)) 2357 nextToken(); 2358 // Consume "function". 2359 nextToken(); 2360 2361 // Consume * (generator function). Treat it like C++'s overloaded operators. 2362 if (FormatTok->is(tok::star)) { 2363 FormatTok->setFinalizedType(TT_OverloadedOperator); 2364 nextToken(); 2365 } 2366 2367 // Consume function name. 2368 if (FormatTok->is(tok::identifier)) 2369 nextToken(); 2370 2371 if (FormatTok->isNot(tok::l_paren)) 2372 return; 2373 2374 // Parse formal parameter list. 2375 parseParens(); 2376 2377 if (FormatTok->is(tok::colon)) { 2378 // Parse a type definition. 2379 nextToken(); 2380 2381 // Eat the type declaration. For braced inline object types, balance braces, 2382 // otherwise just parse until finding an l_brace for the function body. 2383 if (FormatTok->is(tok::l_brace)) 2384 tryToParseBracedList(); 2385 else 2386 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2387 nextToken(); 2388 } 2389 2390 if (FormatTok->is(tok::semi)) 2391 return; 2392 2393 parseChildBlock(); 2394 } 2395 2396 bool UnwrappedLineParser::tryToParseBracedList() { 2397 if (FormatTok->is(BK_Unknown)) 2398 calculateBraceTypes(); 2399 assert(FormatTok->isNot(BK_Unknown)); 2400 if (FormatTok->is(BK_Block)) 2401 return false; 2402 nextToken(); 2403 parseBracedList(); 2404 return true; 2405 } 2406 2407 bool UnwrappedLineParser::tryToParseChildBlock() { 2408 assert(Style.isJavaScript() || Style.isCSharp()); 2409 assert(FormatTok->is(TT_FatArrow)); 2410 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2411 // They always start an expression or a child block if followed by a curly 2412 // brace. 2413 nextToken(); 2414 if (FormatTok->isNot(tok::l_brace)) 2415 return false; 2416 parseChildBlock(); 2417 return true; 2418 } 2419 2420 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2421 bool HasError = false; 2422 2423 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2424 // replace this by using parseAssignmentExpression() inside. 2425 do { 2426 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2427 tryToParseChildBlock()) { 2428 continue; 2429 } 2430 if (Style.isJavaScript()) { 2431 if (FormatTok->is(Keywords.kw_function)) { 2432 tryToParseJSFunction(); 2433 continue; 2434 } 2435 if (FormatTok->is(tok::l_brace)) { 2436 // Could be a method inside of a braced list `{a() { return 1; }}`. 2437 if (tryToParseBracedList()) 2438 continue; 2439 parseChildBlock(); 2440 } 2441 } 2442 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2443 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2444 addUnwrappedLine(); 2445 nextToken(); 2446 return !HasError; 2447 } 2448 switch (FormatTok->Tok.getKind()) { 2449 case tok::l_square: 2450 if (Style.isCSharp()) 2451 parseSquare(); 2452 else 2453 tryToParseLambda(); 2454 break; 2455 case tok::l_paren: 2456 parseParens(); 2457 // JavaScript can just have free standing methods and getters/setters in 2458 // object literals. Detect them by a "{" following ")". 2459 if (Style.isJavaScript()) { 2460 if (FormatTok->is(tok::l_brace)) 2461 parseChildBlock(); 2462 break; 2463 } 2464 break; 2465 case tok::l_brace: 2466 // Assume there are no blocks inside a braced init list apart 2467 // from the ones we explicitly parse out (like lambdas). 2468 FormatTok->setBlockKind(BK_BracedInit); 2469 nextToken(); 2470 parseBracedList(); 2471 break; 2472 case tok::less: 2473 nextToken(); 2474 if (IsAngleBracket) 2475 parseBracedList(/*IsAngleBracket=*/true); 2476 break; 2477 case tok::semi: 2478 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2479 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2480 // used for error recovery if we have otherwise determined that this is 2481 // a braced list. 2482 if (Style.isJavaScript()) { 2483 nextToken(); 2484 break; 2485 } 2486 HasError = true; 2487 if (!IsEnum) 2488 return false; 2489 nextToken(); 2490 break; 2491 case tok::comma: 2492 nextToken(); 2493 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2494 addUnwrappedLine(); 2495 break; 2496 default: 2497 nextToken(); 2498 break; 2499 } 2500 } while (!eof()); 2501 return false; 2502 } 2503 2504 /// \brief Parses a pair of parentheses (and everything between them). 2505 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2506 /// double ampersands. This applies for all nested scopes as well. 2507 /// 2508 /// Returns whether there is a `=` token between the parentheses. 2509 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2510 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2511 auto *LeftParen = FormatTok; 2512 bool SeenEqual = false; 2513 bool MightBeFoldExpr = false; 2514 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2515 nextToken(); 2516 do { 2517 switch (FormatTok->Tok.getKind()) { 2518 case tok::l_paren: 2519 if (parseParens(AmpAmpTokenType)) 2520 SeenEqual = true; 2521 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2522 parseChildBlock(); 2523 break; 2524 case tok::r_paren: 2525 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && 2526 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2527 const auto *Prev = LeftParen->Previous; 2528 const auto *Next = Tokens->peekNextToken(); 2529 const bool DoubleParens = 2530 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2531 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2532 const bool Blacklisted = 2533 PrevPrev && 2534 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2535 (SeenEqual && 2536 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2537 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2538 const bool ReturnParens = 2539 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2540 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2541 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2542 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2543 Next->is(tok::semi); 2544 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2545 LeftParen->Optional = true; 2546 FormatTok->Optional = true; 2547 } 2548 } 2549 nextToken(); 2550 return SeenEqual; 2551 case tok::r_brace: 2552 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2553 return SeenEqual; 2554 case tok::l_square: 2555 tryToParseLambda(); 2556 break; 2557 case tok::l_brace: 2558 if (!tryToParseBracedList()) 2559 parseChildBlock(); 2560 break; 2561 case tok::at: 2562 nextToken(); 2563 if (FormatTok->is(tok::l_brace)) { 2564 nextToken(); 2565 parseBracedList(); 2566 } 2567 break; 2568 case tok::ellipsis: 2569 MightBeFoldExpr = true; 2570 nextToken(); 2571 break; 2572 case tok::equal: 2573 SeenEqual = true; 2574 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2575 tryToParseChildBlock(); 2576 else 2577 nextToken(); 2578 break; 2579 case tok::kw_class: 2580 if (Style.isJavaScript()) 2581 parseRecord(/*ParseAsExpr=*/true); 2582 else 2583 nextToken(); 2584 break; 2585 case tok::identifier: 2586 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2587 tryToParseJSFunction(); 2588 else 2589 nextToken(); 2590 break; 2591 case tok::kw_requires: { 2592 auto RequiresToken = FormatTok; 2593 nextToken(); 2594 parseRequiresExpression(RequiresToken); 2595 break; 2596 } 2597 case tok::ampamp: 2598 if (AmpAmpTokenType != TT_Unknown) 2599 FormatTok->setFinalizedType(AmpAmpTokenType); 2600 [[fallthrough]]; 2601 default: 2602 nextToken(); 2603 break; 2604 } 2605 } while (!eof()); 2606 return SeenEqual; 2607 } 2608 2609 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2610 if (!LambdaIntroducer) { 2611 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2612 if (tryToParseLambda()) 2613 return; 2614 } 2615 do { 2616 switch (FormatTok->Tok.getKind()) { 2617 case tok::l_paren: 2618 parseParens(); 2619 break; 2620 case tok::r_square: 2621 nextToken(); 2622 return; 2623 case tok::r_brace: 2624 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2625 return; 2626 case tok::l_square: 2627 parseSquare(); 2628 break; 2629 case tok::l_brace: { 2630 if (!tryToParseBracedList()) 2631 parseChildBlock(); 2632 break; 2633 } 2634 case tok::at: 2635 nextToken(); 2636 if (FormatTok->is(tok::l_brace)) { 2637 nextToken(); 2638 parseBracedList(); 2639 } 2640 break; 2641 default: 2642 nextToken(); 2643 break; 2644 } 2645 } while (!eof()); 2646 } 2647 2648 void UnwrappedLineParser::keepAncestorBraces() { 2649 if (!Style.RemoveBracesLLVM) 2650 return; 2651 2652 const int MaxNestingLevels = 2; 2653 const int Size = NestedTooDeep.size(); 2654 if (Size >= MaxNestingLevels) 2655 NestedTooDeep[Size - MaxNestingLevels] = true; 2656 NestedTooDeep.push_back(false); 2657 } 2658 2659 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2660 for (const auto &Token : llvm::reverse(Line.Tokens)) 2661 if (Token.Tok->isNot(tok::comment)) 2662 return Token.Tok; 2663 2664 return nullptr; 2665 } 2666 2667 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2668 FormatToken *Tok = nullptr; 2669 2670 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2671 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2672 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2673 ? getLastNonComment(*Line) 2674 : Line->Tokens.back().Tok; 2675 assert(Tok); 2676 if (Tok->BraceCount < 0) { 2677 assert(Tok->BraceCount == -1); 2678 Tok = nullptr; 2679 } else { 2680 Tok->BraceCount = -1; 2681 } 2682 } 2683 2684 addUnwrappedLine(); 2685 ++Line->Level; 2686 parseStructuralElement(); 2687 2688 if (Tok) { 2689 assert(!Line->InPPDirective); 2690 Tok = nullptr; 2691 for (const auto &L : llvm::reverse(*CurrentLines)) { 2692 if (!L.InPPDirective && getLastNonComment(L)) { 2693 Tok = L.Tokens.back().Tok; 2694 break; 2695 } 2696 } 2697 assert(Tok); 2698 ++Tok->BraceCount; 2699 } 2700 2701 if (CheckEOF && eof()) 2702 addUnwrappedLine(); 2703 2704 --Line->Level; 2705 } 2706 2707 static void markOptionalBraces(FormatToken *LeftBrace) { 2708 if (!LeftBrace) 2709 return; 2710 2711 assert(LeftBrace->is(tok::l_brace)); 2712 2713 FormatToken *RightBrace = LeftBrace->MatchingParen; 2714 if (!RightBrace) { 2715 assert(!LeftBrace->Optional); 2716 return; 2717 } 2718 2719 assert(RightBrace->is(tok::r_brace)); 2720 assert(RightBrace->MatchingParen == LeftBrace); 2721 assert(LeftBrace->Optional == RightBrace->Optional); 2722 2723 LeftBrace->Optional = true; 2724 RightBrace->Optional = true; 2725 } 2726 2727 void UnwrappedLineParser::handleAttributes() { 2728 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2729 if (FormatTok->isAttribute()) 2730 nextToken(); 2731 else if (FormatTok->is(tok::l_square)) 2732 handleCppAttributes(); 2733 } 2734 2735 bool UnwrappedLineParser::handleCppAttributes() { 2736 // Handle [[likely]] / [[unlikely]] attributes. 2737 assert(FormatTok->is(tok::l_square)); 2738 if (!tryToParseSimpleAttribute()) 2739 return false; 2740 parseSquare(); 2741 return true; 2742 } 2743 2744 /// Returns whether \c Tok begins a block. 2745 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2746 // FIXME: rename the function or make 2747 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2748 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2749 : Tok.is(tok::l_brace); 2750 } 2751 2752 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2753 bool KeepBraces, 2754 bool IsVerilogAssert) { 2755 assert((FormatTok->is(tok::kw_if) || 2756 (Style.isVerilog() && 2757 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2758 Keywords.kw_assume, Keywords.kw_cover))) && 2759 "'if' expected"); 2760 nextToken(); 2761 2762 if (IsVerilogAssert) { 2763 // Handle `assert #0` and `assert final`. 2764 if (FormatTok->is(Keywords.kw_verilogHash)) { 2765 nextToken(); 2766 if (FormatTok->is(tok::numeric_constant)) 2767 nextToken(); 2768 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2769 Keywords.kw_sequence)) { 2770 nextToken(); 2771 } 2772 } 2773 2774 // TableGen's if statement has the form of `if <cond> then { ... }`. 2775 if (Style.isTableGen()) { 2776 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2777 // Simply skip until then. This range only contains a value. 2778 nextToken(); 2779 } 2780 } 2781 2782 // Handle `if !consteval`. 2783 if (FormatTok->is(tok::exclaim)) 2784 nextToken(); 2785 2786 bool KeepIfBraces = true; 2787 if (FormatTok->is(tok::kw_consteval)) { 2788 nextToken(); 2789 } else { 2790 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2791 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2792 nextToken(); 2793 if (FormatTok->is(tok::l_paren)) { 2794 FormatTok->setFinalizedType(TT_ConditionLParen); 2795 parseParens(); 2796 } 2797 } 2798 handleAttributes(); 2799 // The then action is optional in Verilog assert statements. 2800 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2801 nextToken(); 2802 addUnwrappedLine(); 2803 return nullptr; 2804 } 2805 2806 bool NeedsUnwrappedLine = false; 2807 keepAncestorBraces(); 2808 2809 FormatToken *IfLeftBrace = nullptr; 2810 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2811 2812 if (isBlockBegin(*FormatTok)) { 2813 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2814 IfLeftBrace = FormatTok; 2815 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2816 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2817 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2818 setPreviousRBraceType(TT_ControlStatementRBrace); 2819 if (Style.BraceWrapping.BeforeElse) 2820 addUnwrappedLine(); 2821 else 2822 NeedsUnwrappedLine = true; 2823 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2824 addUnwrappedLine(); 2825 } else { 2826 parseUnbracedBody(); 2827 } 2828 2829 if (Style.RemoveBracesLLVM) { 2830 assert(!NestedTooDeep.empty()); 2831 KeepIfBraces = KeepIfBraces || 2832 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2833 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2834 IfBlockKind == IfStmtKind::IfElseIf; 2835 } 2836 2837 bool KeepElseBraces = KeepIfBraces; 2838 FormatToken *ElseLeftBrace = nullptr; 2839 IfStmtKind Kind = IfStmtKind::IfOnly; 2840 2841 if (FormatTok->is(tok::kw_else)) { 2842 if (Style.RemoveBracesLLVM) { 2843 NestedTooDeep.back() = false; 2844 Kind = IfStmtKind::IfElse; 2845 } 2846 nextToken(); 2847 handleAttributes(); 2848 if (isBlockBegin(*FormatTok)) { 2849 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2850 FormatTok->setFinalizedType(TT_ElseLBrace); 2851 ElseLeftBrace = FormatTok; 2852 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2853 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2854 FormatToken *IfLBrace = 2855 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2856 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2857 setPreviousRBraceType(TT_ElseRBrace); 2858 if (FormatTok->is(tok::kw_else)) { 2859 KeepElseBraces = KeepElseBraces || 2860 ElseBlockKind == IfStmtKind::IfOnly || 2861 ElseBlockKind == IfStmtKind::IfElseIf; 2862 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2863 KeepElseBraces = true; 2864 assert(ElseLeftBrace->MatchingParen); 2865 markOptionalBraces(ElseLeftBrace); 2866 } 2867 addUnwrappedLine(); 2868 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2869 const FormatToken *Previous = Tokens->getPreviousToken(); 2870 assert(Previous); 2871 const bool IsPrecededByComment = Previous->is(tok::comment); 2872 if (IsPrecededByComment) { 2873 addUnwrappedLine(); 2874 ++Line->Level; 2875 } 2876 bool TooDeep = true; 2877 if (Style.RemoveBracesLLVM) { 2878 Kind = IfStmtKind::IfElseIf; 2879 TooDeep = NestedTooDeep.pop_back_val(); 2880 } 2881 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2882 if (Style.RemoveBracesLLVM) 2883 NestedTooDeep.push_back(TooDeep); 2884 if (IsPrecededByComment) 2885 --Line->Level; 2886 } else { 2887 parseUnbracedBody(/*CheckEOF=*/true); 2888 } 2889 } else { 2890 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2891 if (NeedsUnwrappedLine) 2892 addUnwrappedLine(); 2893 } 2894 2895 if (!Style.RemoveBracesLLVM) 2896 return nullptr; 2897 2898 assert(!NestedTooDeep.empty()); 2899 KeepElseBraces = KeepElseBraces || 2900 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2901 NestedTooDeep.back(); 2902 2903 NestedTooDeep.pop_back(); 2904 2905 if (!KeepIfBraces && !KeepElseBraces) { 2906 markOptionalBraces(IfLeftBrace); 2907 markOptionalBraces(ElseLeftBrace); 2908 } else if (IfLeftBrace) { 2909 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2910 if (IfRightBrace) { 2911 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2912 assert(!IfLeftBrace->Optional); 2913 assert(!IfRightBrace->Optional); 2914 IfLeftBrace->MatchingParen = nullptr; 2915 IfRightBrace->MatchingParen = nullptr; 2916 } 2917 } 2918 2919 if (IfKind) 2920 *IfKind = Kind; 2921 2922 return IfLeftBrace; 2923 } 2924 2925 void UnwrappedLineParser::parseTryCatch() { 2926 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2927 nextToken(); 2928 bool NeedsUnwrappedLine = false; 2929 if (FormatTok->is(tok::colon)) { 2930 // We are in a function try block, what comes is an initializer list. 2931 nextToken(); 2932 2933 // In case identifiers were removed by clang-tidy, what might follow is 2934 // multiple commas in sequence - before the first identifier. 2935 while (FormatTok->is(tok::comma)) 2936 nextToken(); 2937 2938 while (FormatTok->is(tok::identifier)) { 2939 nextToken(); 2940 if (FormatTok->is(tok::l_paren)) 2941 parseParens(); 2942 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2943 FormatTok->is(tok::l_brace)) { 2944 do { 2945 nextToken(); 2946 } while (FormatTok->isNot(tok::r_brace)); 2947 nextToken(); 2948 } 2949 2950 // In case identifiers were removed by clang-tidy, what might follow is 2951 // multiple commas in sequence - after the first identifier. 2952 while (FormatTok->is(tok::comma)) 2953 nextToken(); 2954 } 2955 } 2956 // Parse try with resource. 2957 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2958 parseParens(); 2959 2960 keepAncestorBraces(); 2961 2962 if (FormatTok->is(tok::l_brace)) { 2963 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2964 parseBlock(); 2965 if (Style.BraceWrapping.BeforeCatch) 2966 addUnwrappedLine(); 2967 else 2968 NeedsUnwrappedLine = true; 2969 } else if (FormatTok->isNot(tok::kw_catch)) { 2970 // The C++ standard requires a compound-statement after a try. 2971 // If there's none, we try to assume there's a structuralElement 2972 // and try to continue. 2973 addUnwrappedLine(); 2974 ++Line->Level; 2975 parseStructuralElement(); 2976 --Line->Level; 2977 } 2978 while (true) { 2979 if (FormatTok->is(tok::at)) 2980 nextToken(); 2981 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2982 tok::kw___finally) || 2983 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2984 FormatTok->is(Keywords.kw_finally)) || 2985 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2986 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2987 break; 2988 } 2989 nextToken(); 2990 while (FormatTok->isNot(tok::l_brace)) { 2991 if (FormatTok->is(tok::l_paren)) { 2992 parseParens(); 2993 continue; 2994 } 2995 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2996 if (Style.RemoveBracesLLVM) 2997 NestedTooDeep.pop_back(); 2998 return; 2999 } 3000 nextToken(); 3001 } 3002 NeedsUnwrappedLine = false; 3003 Line->MustBeDeclaration = false; 3004 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3005 parseBlock(); 3006 if (Style.BraceWrapping.BeforeCatch) 3007 addUnwrappedLine(); 3008 else 3009 NeedsUnwrappedLine = true; 3010 } 3011 3012 if (Style.RemoveBracesLLVM) 3013 NestedTooDeep.pop_back(); 3014 3015 if (NeedsUnwrappedLine) 3016 addUnwrappedLine(); 3017 } 3018 3019 void UnwrappedLineParser::parseNamespace() { 3020 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3021 "'namespace' expected"); 3022 3023 const FormatToken &InitialToken = *FormatTok; 3024 nextToken(); 3025 if (InitialToken.is(TT_NamespaceMacro)) { 3026 parseParens(); 3027 } else { 3028 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3029 tok::l_square, tok::period, tok::l_paren) || 3030 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3031 if (FormatTok->is(tok::l_square)) 3032 parseSquare(); 3033 else if (FormatTok->is(tok::l_paren)) 3034 parseParens(); 3035 else 3036 nextToken(); 3037 } 3038 } 3039 if (FormatTok->is(tok::l_brace)) { 3040 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3041 3042 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3043 addUnwrappedLine(); 3044 3045 unsigned AddLevels = 3046 Style.NamespaceIndentation == FormatStyle::NI_All || 3047 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3048 DeclarationScopeStack.size() > 1) 3049 ? 1u 3050 : 0u; 3051 bool ManageWhitesmithsBraces = 3052 AddLevels == 0u && 3053 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3054 3055 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3056 // the whole block. 3057 if (ManageWhitesmithsBraces) 3058 ++Line->Level; 3059 3060 // Munch the semicolon after a namespace. This is more common than one would 3061 // think. Putting the semicolon into its own line is very ugly. 3062 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3063 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3064 ManageWhitesmithsBraces); 3065 3066 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3067 3068 if (ManageWhitesmithsBraces) 3069 --Line->Level; 3070 } 3071 // FIXME: Add error handling. 3072 } 3073 3074 void UnwrappedLineParser::parseNew() { 3075 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3076 nextToken(); 3077 3078 if (Style.isCSharp()) { 3079 do { 3080 // Handle constructor invocation, e.g. `new(field: value)`. 3081 if (FormatTok->is(tok::l_paren)) 3082 parseParens(); 3083 3084 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3085 if (FormatTok->is(tok::l_brace)) 3086 parseBracedList(); 3087 3088 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3089 return; 3090 3091 nextToken(); 3092 } while (!eof()); 3093 } 3094 3095 if (Style.Language != FormatStyle::LK_Java) 3096 return; 3097 3098 // In Java, we can parse everything up to the parens, which aren't optional. 3099 do { 3100 // There should not be a ;, { or } before the new's open paren. 3101 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3102 return; 3103 3104 // Consume the parens. 3105 if (FormatTok->is(tok::l_paren)) { 3106 parseParens(); 3107 3108 // If there is a class body of an anonymous class, consume that as child. 3109 if (FormatTok->is(tok::l_brace)) 3110 parseChildBlock(); 3111 return; 3112 } 3113 nextToken(); 3114 } while (!eof()); 3115 } 3116 3117 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3118 keepAncestorBraces(); 3119 3120 if (isBlockBegin(*FormatTok)) { 3121 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3122 FormatToken *LeftBrace = FormatTok; 3123 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3124 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3125 /*MunchSemi=*/true, KeepBraces); 3126 setPreviousRBraceType(TT_ControlStatementRBrace); 3127 if (!KeepBraces) { 3128 assert(!NestedTooDeep.empty()); 3129 if (!NestedTooDeep.back()) 3130 markOptionalBraces(LeftBrace); 3131 } 3132 if (WrapRightBrace) 3133 addUnwrappedLine(); 3134 } else { 3135 parseUnbracedBody(); 3136 } 3137 3138 if (!KeepBraces) 3139 NestedTooDeep.pop_back(); 3140 } 3141 3142 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3143 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3144 (Style.isVerilog() && 3145 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3146 Keywords.kw_always_ff, Keywords.kw_always_latch, 3147 Keywords.kw_final, Keywords.kw_initial, 3148 Keywords.kw_foreach, Keywords.kw_forever, 3149 Keywords.kw_repeat))) && 3150 "'for', 'while' or foreach macro expected"); 3151 const bool KeepBraces = !Style.RemoveBracesLLVM || 3152 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3153 3154 nextToken(); 3155 // JS' for await ( ... 3156 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3157 nextToken(); 3158 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 3159 nextToken(); 3160 if (HasParens && FormatTok->is(tok::l_paren)) { 3161 // The type is only set for Verilog basically because we were afraid to 3162 // change the existing behavior for loops. See the discussion on D121756 for 3163 // details. 3164 if (Style.isVerilog()) 3165 FormatTok->setFinalizedType(TT_ConditionLParen); 3166 parseParens(); 3167 } 3168 3169 if (Style.isVerilog()) { 3170 // Event control. 3171 parseVerilogSensitivityList(); 3172 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3173 Tokens->getPreviousToken()->is(tok::r_paren)) { 3174 nextToken(); 3175 addUnwrappedLine(); 3176 return; 3177 } 3178 3179 handleAttributes(); 3180 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3181 } 3182 3183 void UnwrappedLineParser::parseDoWhile() { 3184 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3185 nextToken(); 3186 3187 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3188 3189 // FIXME: Add error handling. 3190 if (FormatTok->isNot(tok::kw_while)) { 3191 addUnwrappedLine(); 3192 return; 3193 } 3194 3195 FormatTok->setFinalizedType(TT_DoWhile); 3196 3197 // If in Whitesmiths mode, the line with the while() needs to be indented 3198 // to the same level as the block. 3199 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3200 ++Line->Level; 3201 3202 nextToken(); 3203 parseStructuralElement(); 3204 } 3205 3206 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3207 nextToken(); 3208 unsigned OldLineLevel = Line->Level; 3209 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3210 --Line->Level; 3211 if (LeftAlignLabel) 3212 Line->Level = 0; 3213 3214 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3215 FormatTok->is(tok::l_brace)) { 3216 3217 CompoundStatementIndenter Indenter(this, Line->Level, 3218 Style.BraceWrapping.AfterCaseLabel, 3219 Style.BraceWrapping.IndentBraces); 3220 parseBlock(); 3221 if (FormatTok->is(tok::kw_break)) { 3222 if (Style.BraceWrapping.AfterControlStatement == 3223 FormatStyle::BWACS_Always) { 3224 addUnwrappedLine(); 3225 if (!Style.IndentCaseBlocks && 3226 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3227 ++Line->Level; 3228 } 3229 } 3230 parseStructuralElement(); 3231 } 3232 addUnwrappedLine(); 3233 } else { 3234 if (FormatTok->is(tok::semi)) 3235 nextToken(); 3236 addUnwrappedLine(); 3237 } 3238 Line->Level = OldLineLevel; 3239 if (FormatTok->isNot(tok::l_brace)) { 3240 parseStructuralElement(); 3241 addUnwrappedLine(); 3242 } 3243 } 3244 3245 void UnwrappedLineParser::parseCaseLabel() { 3246 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3247 3248 // FIXME: fix handling of complex expressions here. 3249 do { 3250 nextToken(); 3251 if (FormatTok->is(tok::colon)) { 3252 FormatTok->setFinalizedType(TT_CaseLabelColon); 3253 break; 3254 } 3255 } while (!eof()); 3256 parseLabel(); 3257 } 3258 3259 void UnwrappedLineParser::parseSwitch() { 3260 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3261 nextToken(); 3262 if (FormatTok->is(tok::l_paren)) 3263 parseParens(); 3264 3265 keepAncestorBraces(); 3266 3267 if (FormatTok->is(tok::l_brace)) { 3268 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3269 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3270 parseBlock(); 3271 setPreviousRBraceType(TT_ControlStatementRBrace); 3272 addUnwrappedLine(); 3273 } else { 3274 addUnwrappedLine(); 3275 ++Line->Level; 3276 parseStructuralElement(); 3277 --Line->Level; 3278 } 3279 3280 if (Style.RemoveBracesLLVM) 3281 NestedTooDeep.pop_back(); 3282 } 3283 3284 // Operators that can follow a C variable. 3285 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 3286 switch (kind) { 3287 case tok::ampamp: 3288 case tok::ampequal: 3289 case tok::arrow: 3290 case tok::caret: 3291 case tok::caretequal: 3292 case tok::comma: 3293 case tok::ellipsis: 3294 case tok::equal: 3295 case tok::equalequal: 3296 case tok::exclaim: 3297 case tok::exclaimequal: 3298 case tok::greater: 3299 case tok::greaterequal: 3300 case tok::greatergreater: 3301 case tok::greatergreaterequal: 3302 case tok::l_paren: 3303 case tok::l_square: 3304 case tok::less: 3305 case tok::lessequal: 3306 case tok::lessless: 3307 case tok::lesslessequal: 3308 case tok::minus: 3309 case tok::minusequal: 3310 case tok::minusminus: 3311 case tok::percent: 3312 case tok::percentequal: 3313 case tok::period: 3314 case tok::pipe: 3315 case tok::pipeequal: 3316 case tok::pipepipe: 3317 case tok::plus: 3318 case tok::plusequal: 3319 case tok::plusplus: 3320 case tok::question: 3321 case tok::r_brace: 3322 case tok::r_paren: 3323 case tok::r_square: 3324 case tok::semi: 3325 case tok::slash: 3326 case tok::slashequal: 3327 case tok::star: 3328 case tok::starequal: 3329 return true; 3330 default: 3331 return false; 3332 } 3333 } 3334 3335 void UnwrappedLineParser::parseAccessSpecifier() { 3336 FormatToken *AccessSpecifierCandidate = FormatTok; 3337 nextToken(); 3338 // Understand Qt's slots. 3339 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3340 nextToken(); 3341 // Otherwise, we don't know what it is, and we'd better keep the next token. 3342 if (FormatTok->is(tok::colon)) { 3343 nextToken(); 3344 addUnwrappedLine(); 3345 } else if (FormatTok->isNot(tok::coloncolon) && 3346 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3347 // Not a variable name nor namespace name. 3348 addUnwrappedLine(); 3349 } else if (AccessSpecifierCandidate) { 3350 // Consider the access specifier to be a C identifier. 3351 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3352 } 3353 } 3354 3355 /// \brief Parses a requires, decides if it is a clause or an expression. 3356 /// \pre The current token has to be the requires keyword. 3357 /// \returns true if it parsed a clause. 3358 bool clang::format::UnwrappedLineParser::parseRequires() { 3359 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3360 auto RequiresToken = FormatTok; 3361 3362 // We try to guess if it is a requires clause, or a requires expression. For 3363 // that we first consume the keyword and check the next token. 3364 nextToken(); 3365 3366 switch (FormatTok->Tok.getKind()) { 3367 case tok::l_brace: 3368 // This can only be an expression, never a clause. 3369 parseRequiresExpression(RequiresToken); 3370 return false; 3371 case tok::l_paren: 3372 // Clauses and expression can start with a paren, it's unclear what we have. 3373 break; 3374 default: 3375 // All other tokens can only be a clause. 3376 parseRequiresClause(RequiresToken); 3377 return true; 3378 } 3379 3380 // Looking forward we would have to decide if there are function declaration 3381 // like arguments to the requires expression: 3382 // requires (T t) { 3383 // Or there is a constraint expression for the requires clause: 3384 // requires (C<T> && ... 3385 3386 // But first let's look behind. 3387 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3388 3389 if (!PreviousNonComment || 3390 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3391 // If there is no token, or an expression left brace, we are a requires 3392 // clause within a requires expression. 3393 parseRequiresClause(RequiresToken); 3394 return true; 3395 } 3396 3397 switch (PreviousNonComment->Tok.getKind()) { 3398 case tok::greater: 3399 case tok::r_paren: 3400 case tok::kw_noexcept: 3401 case tok::kw_const: 3402 // This is a requires clause. 3403 parseRequiresClause(RequiresToken); 3404 return true; 3405 case tok::amp: 3406 case tok::ampamp: { 3407 // This can be either: 3408 // if (... && requires (T t) ...) 3409 // Or 3410 // void member(...) && requires (C<T> ... 3411 // We check the one token before that for a const: 3412 // void member(...) const && requires (C<T> ... 3413 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3414 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3415 parseRequiresClause(RequiresToken); 3416 return true; 3417 } 3418 break; 3419 } 3420 default: 3421 if (PreviousNonComment->isTypeOrIdentifier()) { 3422 // This is a requires clause. 3423 parseRequiresClause(RequiresToken); 3424 return true; 3425 } 3426 // It's an expression. 3427 parseRequiresExpression(RequiresToken); 3428 return false; 3429 } 3430 3431 // Now we look forward and try to check if the paren content is a parameter 3432 // list. The parameters can be cv-qualified and contain references or 3433 // pointers. 3434 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3435 // of stuff: typename, const, *, &, &&, ::, identifiers. 3436 3437 unsigned StoredPosition = Tokens->getPosition(); 3438 FormatToken *NextToken = Tokens->getNextToken(); 3439 int Lookahead = 0; 3440 auto PeekNext = [&Lookahead, &NextToken, this] { 3441 ++Lookahead; 3442 NextToken = Tokens->getNextToken(); 3443 }; 3444 3445 bool FoundType = false; 3446 bool LastWasColonColon = false; 3447 int OpenAngles = 0; 3448 3449 for (; Lookahead < 50; PeekNext()) { 3450 switch (NextToken->Tok.getKind()) { 3451 case tok::kw_volatile: 3452 case tok::kw_const: 3453 case tok::comma: 3454 if (OpenAngles == 0) { 3455 FormatTok = Tokens->setPosition(StoredPosition); 3456 parseRequiresExpression(RequiresToken); 3457 return false; 3458 } 3459 break; 3460 case tok::r_paren: 3461 case tok::pipepipe: 3462 FormatTok = Tokens->setPosition(StoredPosition); 3463 parseRequiresClause(RequiresToken); 3464 return true; 3465 case tok::eof: 3466 // Break out of the loop. 3467 Lookahead = 50; 3468 break; 3469 case tok::coloncolon: 3470 LastWasColonColon = true; 3471 break; 3472 case tok::identifier: 3473 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3474 FormatTok = Tokens->setPosition(StoredPosition); 3475 parseRequiresExpression(RequiresToken); 3476 return false; 3477 } 3478 FoundType = true; 3479 LastWasColonColon = false; 3480 break; 3481 case tok::less: 3482 ++OpenAngles; 3483 break; 3484 case tok::greater: 3485 --OpenAngles; 3486 break; 3487 default: 3488 if (NextToken->isSimpleTypeSpecifier()) { 3489 FormatTok = Tokens->setPosition(StoredPosition); 3490 parseRequiresExpression(RequiresToken); 3491 return false; 3492 } 3493 break; 3494 } 3495 } 3496 // This seems to be a complicated expression, just assume it's a clause. 3497 FormatTok = Tokens->setPosition(StoredPosition); 3498 parseRequiresClause(RequiresToken); 3499 return true; 3500 } 3501 3502 /// \brief Parses a requires clause. 3503 /// \param RequiresToken The requires keyword token, which starts this clause. 3504 /// \pre We need to be on the next token after the requires keyword. 3505 /// \sa parseRequiresExpression 3506 /// 3507 /// Returns if it either has finished parsing the clause, or it detects, that 3508 /// the clause is incorrect. 3509 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3510 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3511 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3512 3513 // If there is no previous token, we are within a requires expression, 3514 // otherwise we will always have the template or function declaration in front 3515 // of it. 3516 bool InRequiresExpression = 3517 !RequiresToken->Previous || 3518 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3519 3520 RequiresToken->setFinalizedType(InRequiresExpression 3521 ? TT_RequiresClauseInARequiresExpression 3522 : TT_RequiresClause); 3523 3524 // NOTE: parseConstraintExpression is only ever called from this function. 3525 // It could be inlined into here. 3526 parseConstraintExpression(); 3527 3528 if (!InRequiresExpression) 3529 FormatTok->Previous->ClosesRequiresClause = true; 3530 } 3531 3532 /// \brief Parses a requires expression. 3533 /// \param RequiresToken The requires keyword token, which starts this clause. 3534 /// \pre We need to be on the next token after the requires keyword. 3535 /// \sa parseRequiresClause 3536 /// 3537 /// Returns if it either has finished parsing the expression, or it detects, 3538 /// that the expression is incorrect. 3539 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3540 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3541 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3542 3543 RequiresToken->setFinalizedType(TT_RequiresExpression); 3544 3545 if (FormatTok->is(tok::l_paren)) { 3546 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3547 parseParens(); 3548 } 3549 3550 if (FormatTok->is(tok::l_brace)) { 3551 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3552 parseChildBlock(); 3553 } 3554 } 3555 3556 /// \brief Parses a constraint expression. 3557 /// 3558 /// This is the body of a requires clause. It returns, when the parsing is 3559 /// complete, or the expression is incorrect. 3560 void UnwrappedLineParser::parseConstraintExpression() { 3561 // The special handling for lambdas is needed since tryToParseLambda() eats a 3562 // token and if a requires expression is the last part of a requires clause 3563 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3564 // not set on the correct token. Thus we need to be aware if we even expect a 3565 // lambda to be possible. 3566 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3567 bool LambdaNextTimeAllowed = true; 3568 3569 // Within lambda declarations, it is permitted to put a requires clause after 3570 // its template parameter list, which would place the requires clause right 3571 // before the parentheses of the parameters of the lambda declaration. Thus, 3572 // we track if we expect to see grouping parentheses at all. 3573 // Without this check, `requires foo<T> (T t)` in the below example would be 3574 // seen as the whole requires clause, accidentally eating the parameters of 3575 // the lambda. 3576 // [&]<typename T> requires foo<T> (T t) { ... }; 3577 bool TopLevelParensAllowed = true; 3578 3579 do { 3580 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3581 3582 switch (FormatTok->Tok.getKind()) { 3583 case tok::kw_requires: { 3584 auto RequiresToken = FormatTok; 3585 nextToken(); 3586 parseRequiresExpression(RequiresToken); 3587 break; 3588 } 3589 3590 case tok::l_paren: 3591 if (!TopLevelParensAllowed) 3592 return; 3593 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3594 TopLevelParensAllowed = false; 3595 break; 3596 3597 case tok::l_square: 3598 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3599 return; 3600 break; 3601 3602 case tok::kw_const: 3603 case tok::semi: 3604 case tok::kw_class: 3605 case tok::kw_struct: 3606 case tok::kw_union: 3607 return; 3608 3609 case tok::l_brace: 3610 // Potential function body. 3611 return; 3612 3613 case tok::ampamp: 3614 case tok::pipepipe: 3615 FormatTok->setFinalizedType(TT_BinaryOperator); 3616 nextToken(); 3617 LambdaNextTimeAllowed = true; 3618 TopLevelParensAllowed = true; 3619 break; 3620 3621 case tok::comma: 3622 case tok::comment: 3623 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3624 nextToken(); 3625 break; 3626 3627 case tok::kw_sizeof: 3628 case tok::greater: 3629 case tok::greaterequal: 3630 case tok::greatergreater: 3631 case tok::less: 3632 case tok::lessequal: 3633 case tok::lessless: 3634 case tok::equalequal: 3635 case tok::exclaim: 3636 case tok::exclaimequal: 3637 case tok::plus: 3638 case tok::minus: 3639 case tok::star: 3640 case tok::slash: 3641 LambdaNextTimeAllowed = true; 3642 TopLevelParensAllowed = true; 3643 // Just eat them. 3644 nextToken(); 3645 break; 3646 3647 case tok::numeric_constant: 3648 case tok::coloncolon: 3649 case tok::kw_true: 3650 case tok::kw_false: 3651 TopLevelParensAllowed = false; 3652 // Just eat them. 3653 nextToken(); 3654 break; 3655 3656 case tok::kw_static_cast: 3657 case tok::kw_const_cast: 3658 case tok::kw_reinterpret_cast: 3659 case tok::kw_dynamic_cast: 3660 nextToken(); 3661 if (FormatTok->isNot(tok::less)) 3662 return; 3663 3664 nextToken(); 3665 parseBracedList(/*IsAngleBracket=*/true); 3666 break; 3667 3668 default: 3669 if (!FormatTok->Tok.getIdentifierInfo()) { 3670 // Identifiers are part of the default case, we check for more then 3671 // tok::identifier to handle builtin type traits. 3672 return; 3673 } 3674 3675 // We need to differentiate identifiers for a template deduction guide, 3676 // variables, or function return types (the constraint expression has 3677 // ended before that), and basically all other cases. But it's easier to 3678 // check the other way around. 3679 assert(FormatTok->Previous); 3680 switch (FormatTok->Previous->Tok.getKind()) { 3681 case tok::coloncolon: // Nested identifier. 3682 case tok::ampamp: // Start of a function or variable for the 3683 case tok::pipepipe: // constraint expression. (binary) 3684 case tok::exclaim: // The same as above, but unary. 3685 case tok::kw_requires: // Initial identifier of a requires clause. 3686 case tok::equal: // Initial identifier of a concept declaration. 3687 break; 3688 default: 3689 return; 3690 } 3691 3692 // Read identifier with optional template declaration. 3693 nextToken(); 3694 if (FormatTok->is(tok::less)) { 3695 nextToken(); 3696 parseBracedList(/*IsAngleBracket=*/true); 3697 } 3698 TopLevelParensAllowed = false; 3699 break; 3700 } 3701 } while (!eof()); 3702 } 3703 3704 bool UnwrappedLineParser::parseEnum() { 3705 const FormatToken &InitialToken = *FormatTok; 3706 3707 // Won't be 'enum' for NS_ENUMs. 3708 if (FormatTok->is(tok::kw_enum)) 3709 nextToken(); 3710 3711 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3712 // declarations. An "enum" keyword followed by a colon would be a syntax 3713 // error and thus assume it is just an identifier. 3714 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3715 return false; 3716 3717 // In protobuf, "enum" can be used as a field name. 3718 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3719 return false; 3720 3721 // Eat up enum class ... 3722 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3723 nextToken(); 3724 3725 while (FormatTok->Tok.getIdentifierInfo() || 3726 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3727 tok::greater, tok::comma, tok::question, 3728 tok::l_square, tok::r_square)) { 3729 if (Style.isVerilog()) { 3730 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3731 nextToken(); 3732 // In Verilog the base type can have dimensions. 3733 while (FormatTok->is(tok::l_square)) 3734 parseSquare(); 3735 } else { 3736 nextToken(); 3737 } 3738 // We can have macros or attributes in between 'enum' and the enum name. 3739 if (FormatTok->is(tok::l_paren)) 3740 parseParens(); 3741 assert(FormatTok->isNot(TT_AttributeSquare)); 3742 if (FormatTok->is(tok::identifier)) { 3743 nextToken(); 3744 // If there are two identifiers in a row, this is likely an elaborate 3745 // return type. In Java, this can be "implements", etc. 3746 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3747 return false; 3748 } 3749 } 3750 3751 // Just a declaration or something is wrong. 3752 if (FormatTok->isNot(tok::l_brace)) 3753 return true; 3754 FormatTok->setFinalizedType(TT_EnumLBrace); 3755 FormatTok->setBlockKind(BK_Block); 3756 3757 if (Style.Language == FormatStyle::LK_Java) { 3758 // Java enums are different. 3759 parseJavaEnumBody(); 3760 return true; 3761 } 3762 if (Style.Language == FormatStyle::LK_Proto) { 3763 parseBlock(/*MustBeDeclaration=*/true); 3764 return true; 3765 } 3766 3767 if (!Style.AllowShortEnumsOnASingleLine && 3768 ShouldBreakBeforeBrace(Style, InitialToken)) { 3769 addUnwrappedLine(); 3770 } 3771 // Parse enum body. 3772 nextToken(); 3773 if (!Style.AllowShortEnumsOnASingleLine) { 3774 addUnwrappedLine(); 3775 Line->Level += 1; 3776 } 3777 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3778 if (!Style.AllowShortEnumsOnASingleLine) 3779 Line->Level -= 1; 3780 if (HasError) { 3781 if (FormatTok->is(tok::semi)) 3782 nextToken(); 3783 addUnwrappedLine(); 3784 } 3785 setPreviousRBraceType(TT_EnumRBrace); 3786 return true; 3787 3788 // There is no addUnwrappedLine() here so that we fall through to parsing a 3789 // structural element afterwards. Thus, in "enum A {} n, m;", 3790 // "} n, m;" will end up in one unwrapped line. 3791 } 3792 3793 bool UnwrappedLineParser::parseStructLike() { 3794 // parseRecord falls through and does not yet add an unwrapped line as a 3795 // record declaration or definition can start a structural element. 3796 parseRecord(); 3797 // This does not apply to Java, JavaScript and C#. 3798 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3799 Style.isCSharp()) { 3800 if (FormatTok->is(tok::semi)) 3801 nextToken(); 3802 addUnwrappedLine(); 3803 return true; 3804 } 3805 return false; 3806 } 3807 3808 namespace { 3809 // A class used to set and restore the Token position when peeking 3810 // ahead in the token source. 3811 class ScopedTokenPosition { 3812 unsigned StoredPosition; 3813 FormatTokenSource *Tokens; 3814 3815 public: 3816 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3817 assert(Tokens && "Tokens expected to not be null"); 3818 StoredPosition = Tokens->getPosition(); 3819 } 3820 3821 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3822 }; 3823 } // namespace 3824 3825 // Look to see if we have [[ by looking ahead, if 3826 // its not then rewind to the original position. 3827 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3828 ScopedTokenPosition AutoPosition(Tokens); 3829 FormatToken *Tok = Tokens->getNextToken(); 3830 // We already read the first [ check for the second. 3831 if (Tok->isNot(tok::l_square)) 3832 return false; 3833 // Double check that the attribute is just something 3834 // fairly simple. 3835 while (Tok->isNot(tok::eof)) { 3836 if (Tok->is(tok::r_square)) 3837 break; 3838 Tok = Tokens->getNextToken(); 3839 } 3840 if (Tok->is(tok::eof)) 3841 return false; 3842 Tok = Tokens->getNextToken(); 3843 if (Tok->isNot(tok::r_square)) 3844 return false; 3845 Tok = Tokens->getNextToken(); 3846 if (Tok->is(tok::semi)) 3847 return false; 3848 return true; 3849 } 3850 3851 void UnwrappedLineParser::parseJavaEnumBody() { 3852 assert(FormatTok->is(tok::l_brace)); 3853 const FormatToken *OpeningBrace = FormatTok; 3854 3855 // Determine whether the enum is simple, i.e. does not have a semicolon or 3856 // constants with class bodies. Simple enums can be formatted like braced 3857 // lists, contracted to a single line, etc. 3858 unsigned StoredPosition = Tokens->getPosition(); 3859 bool IsSimple = true; 3860 FormatToken *Tok = Tokens->getNextToken(); 3861 while (Tok->isNot(tok::eof)) { 3862 if (Tok->is(tok::r_brace)) 3863 break; 3864 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3865 IsSimple = false; 3866 break; 3867 } 3868 // FIXME: This will also mark enums with braces in the arguments to enum 3869 // constants as "not simple". This is probably fine in practice, though. 3870 Tok = Tokens->getNextToken(); 3871 } 3872 FormatTok = Tokens->setPosition(StoredPosition); 3873 3874 if (IsSimple) { 3875 nextToken(); 3876 parseBracedList(); 3877 addUnwrappedLine(); 3878 return; 3879 } 3880 3881 // Parse the body of a more complex enum. 3882 // First add a line for everything up to the "{". 3883 nextToken(); 3884 addUnwrappedLine(); 3885 ++Line->Level; 3886 3887 // Parse the enum constants. 3888 while (!eof()) { 3889 if (FormatTok->is(tok::l_brace)) { 3890 // Parse the constant's class body. 3891 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3892 /*MunchSemi=*/false); 3893 } else if (FormatTok->is(tok::l_paren)) { 3894 parseParens(); 3895 } else if (FormatTok->is(tok::comma)) { 3896 nextToken(); 3897 addUnwrappedLine(); 3898 } else if (FormatTok->is(tok::semi)) { 3899 nextToken(); 3900 addUnwrappedLine(); 3901 break; 3902 } else if (FormatTok->is(tok::r_brace)) { 3903 addUnwrappedLine(); 3904 break; 3905 } else { 3906 nextToken(); 3907 } 3908 } 3909 3910 // Parse the class body after the enum's ";" if any. 3911 parseLevel(OpeningBrace); 3912 nextToken(); 3913 --Line->Level; 3914 addUnwrappedLine(); 3915 } 3916 3917 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3918 const FormatToken &InitialToken = *FormatTok; 3919 nextToken(); 3920 3921 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 3922 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 3923 }; 3924 // The actual identifier can be a nested name specifier, and in macros 3925 // it is often token-pasted. 3926 // An [[attribute]] can be before the identifier. 3927 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3928 tok::kw_alignas, tok::l_square) || 3929 FormatTok->isAttribute() || 3930 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3931 FormatTok->isOneOf(tok::period, tok::comma))) { 3932 if (Style.isJavaScript() && 3933 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3934 // JavaScript/TypeScript supports inline object types in 3935 // extends/implements positions: 3936 // class Foo implements {bar: number} { } 3937 nextToken(); 3938 if (FormatTok->is(tok::l_brace)) { 3939 tryToParseBracedList(); 3940 continue; 3941 } 3942 } 3943 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 3944 continue; 3945 nextToken(); 3946 // We can have macros in between 'class' and the class name. 3947 if (!IsNonMacroIdentifier(FormatTok->Previous) && 3948 FormatTok->is(tok::l_paren)) { 3949 parseParens(); 3950 } 3951 } 3952 3953 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3954 int AngleNestingLevel = 0; 3955 do { 3956 if (FormatTok->is(tok::less)) 3957 ++AngleNestingLevel; 3958 else if (FormatTok->is(tok::greater)) 3959 --AngleNestingLevel; 3960 3961 if (AngleNestingLevel == 0 && FormatTok->is(tok::l_paren) && 3962 IsNonMacroIdentifier(FormatTok->Previous)) { 3963 break; 3964 } 3965 if (FormatTok->is(tok::l_brace)) { 3966 calculateBraceTypes(/*ExpectClassBody=*/true); 3967 if (!tryToParseBracedList()) 3968 break; 3969 } 3970 if (FormatTok->is(tok::l_square)) { 3971 FormatToken *Previous = FormatTok->Previous; 3972 if (!Previous || 3973 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3974 // Don't try parsing a lambda if we had a closing parenthesis before, 3975 // it was probably a pointer to an array: int (*)[]. 3976 if (!tryToParseLambda()) 3977 continue; 3978 } else { 3979 parseSquare(); 3980 continue; 3981 } 3982 } 3983 if (FormatTok->is(tok::semi)) 3984 return; 3985 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3986 addUnwrappedLine(); 3987 nextToken(); 3988 parseCSharpGenericTypeConstraint(); 3989 break; 3990 } 3991 nextToken(); 3992 } while (!eof()); 3993 } 3994 3995 auto GetBraceTypes = 3996 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 3997 switch (RecordTok.Tok.getKind()) { 3998 case tok::kw_class: 3999 return {TT_ClassLBrace, TT_ClassRBrace}; 4000 case tok::kw_struct: 4001 return {TT_StructLBrace, TT_StructRBrace}; 4002 case tok::kw_union: 4003 return {TT_UnionLBrace, TT_UnionRBrace}; 4004 default: 4005 // Useful for e.g. interface. 4006 return {TT_RecordLBrace, TT_RecordRBrace}; 4007 } 4008 }; 4009 if (FormatTok->is(tok::l_brace)) { 4010 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4011 FormatTok->setFinalizedType(OpenBraceType); 4012 if (ParseAsExpr) { 4013 parseChildBlock(); 4014 } else { 4015 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4016 addUnwrappedLine(); 4017 4018 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4019 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4020 } 4021 setPreviousRBraceType(ClosingBraceType); 4022 } 4023 // There is no addUnwrappedLine() here so that we fall through to parsing a 4024 // structural element afterwards. Thus, in "class A {} n, m;", 4025 // "} n, m;" will end up in one unwrapped line. 4026 } 4027 4028 void UnwrappedLineParser::parseObjCMethod() { 4029 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4030 "'(' or identifier expected."); 4031 do { 4032 if (FormatTok->is(tok::semi)) { 4033 nextToken(); 4034 addUnwrappedLine(); 4035 return; 4036 } else if (FormatTok->is(tok::l_brace)) { 4037 if (Style.BraceWrapping.AfterFunction) 4038 addUnwrappedLine(); 4039 parseBlock(); 4040 addUnwrappedLine(); 4041 return; 4042 } else { 4043 nextToken(); 4044 } 4045 } while (!eof()); 4046 } 4047 4048 void UnwrappedLineParser::parseObjCProtocolList() { 4049 assert(FormatTok->is(tok::less) && "'<' expected."); 4050 do { 4051 nextToken(); 4052 // Early exit in case someone forgot a close angle. 4053 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4054 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4055 return; 4056 } 4057 } while (!eof() && FormatTok->isNot(tok::greater)); 4058 nextToken(); // Skip '>'. 4059 } 4060 4061 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4062 do { 4063 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4064 nextToken(); 4065 addUnwrappedLine(); 4066 break; 4067 } 4068 if (FormatTok->is(tok::l_brace)) { 4069 parseBlock(); 4070 // In ObjC interfaces, nothing should be following the "}". 4071 addUnwrappedLine(); 4072 } else if (FormatTok->is(tok::r_brace)) { 4073 // Ignore stray "}". parseStructuralElement doesn't consume them. 4074 nextToken(); 4075 addUnwrappedLine(); 4076 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4077 nextToken(); 4078 parseObjCMethod(); 4079 } else { 4080 parseStructuralElement(); 4081 } 4082 } while (!eof()); 4083 } 4084 4085 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4086 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4087 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4088 nextToken(); 4089 nextToken(); // interface name 4090 4091 // @interface can be followed by a lightweight generic 4092 // specialization list, then either a base class or a category. 4093 if (FormatTok->is(tok::less)) 4094 parseObjCLightweightGenerics(); 4095 if (FormatTok->is(tok::colon)) { 4096 nextToken(); 4097 nextToken(); // base class name 4098 // The base class can also have lightweight generics applied to it. 4099 if (FormatTok->is(tok::less)) 4100 parseObjCLightweightGenerics(); 4101 } else if (FormatTok->is(tok::l_paren)) { 4102 // Skip category, if present. 4103 parseParens(); 4104 } 4105 4106 if (FormatTok->is(tok::less)) 4107 parseObjCProtocolList(); 4108 4109 if (FormatTok->is(tok::l_brace)) { 4110 if (Style.BraceWrapping.AfterObjCDeclaration) 4111 addUnwrappedLine(); 4112 parseBlock(/*MustBeDeclaration=*/true); 4113 } 4114 4115 // With instance variables, this puts '}' on its own line. Without instance 4116 // variables, this ends the @interface line. 4117 addUnwrappedLine(); 4118 4119 parseObjCUntilAtEnd(); 4120 } 4121 4122 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4123 assert(FormatTok->is(tok::less)); 4124 // Unlike protocol lists, generic parameterizations support 4125 // nested angles: 4126 // 4127 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4128 // NSObject <NSCopying, NSSecureCoding> 4129 // 4130 // so we need to count how many open angles we have left. 4131 unsigned NumOpenAngles = 1; 4132 do { 4133 nextToken(); 4134 // Early exit in case someone forgot a close angle. 4135 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4136 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4137 break; 4138 } 4139 if (FormatTok->is(tok::less)) { 4140 ++NumOpenAngles; 4141 } else if (FormatTok->is(tok::greater)) { 4142 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4143 --NumOpenAngles; 4144 } 4145 } while (!eof() && NumOpenAngles != 0); 4146 nextToken(); // Skip '>'. 4147 } 4148 4149 // Returns true for the declaration/definition form of @protocol, 4150 // false for the expression form. 4151 bool UnwrappedLineParser::parseObjCProtocol() { 4152 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4153 nextToken(); 4154 4155 if (FormatTok->is(tok::l_paren)) { 4156 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4157 return false; 4158 } 4159 4160 // The definition/declaration form, 4161 // @protocol Foo 4162 // - (int)someMethod; 4163 // @end 4164 4165 nextToken(); // protocol name 4166 4167 if (FormatTok->is(tok::less)) 4168 parseObjCProtocolList(); 4169 4170 // Check for protocol declaration. 4171 if (FormatTok->is(tok::semi)) { 4172 nextToken(); 4173 addUnwrappedLine(); 4174 return true; 4175 } 4176 4177 addUnwrappedLine(); 4178 parseObjCUntilAtEnd(); 4179 return true; 4180 } 4181 4182 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4183 bool IsImport = FormatTok->is(Keywords.kw_import); 4184 assert(IsImport || FormatTok->is(tok::kw_export)); 4185 nextToken(); 4186 4187 // Consume the "default" in "export default class/function". 4188 if (FormatTok->is(tok::kw_default)) 4189 nextToken(); 4190 4191 // Consume "async function", "function" and "default function", so that these 4192 // get parsed as free-standing JS functions, i.e. do not require a trailing 4193 // semicolon. 4194 if (FormatTok->is(Keywords.kw_async)) 4195 nextToken(); 4196 if (FormatTok->is(Keywords.kw_function)) { 4197 nextToken(); 4198 return; 4199 } 4200 4201 // For imports, `export *`, `export {...}`, consume the rest of the line up 4202 // to the terminating `;`. For everything else, just return and continue 4203 // parsing the structural element, i.e. the declaration or expression for 4204 // `export default`. 4205 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4206 !FormatTok->isStringLiteral() && 4207 !(FormatTok->is(Keywords.kw_type) && 4208 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4209 return; 4210 } 4211 4212 while (!eof()) { 4213 if (FormatTok->is(tok::semi)) 4214 return; 4215 if (Line->Tokens.empty()) { 4216 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4217 // import statement should terminate. 4218 return; 4219 } 4220 if (FormatTok->is(tok::l_brace)) { 4221 FormatTok->setBlockKind(BK_Block); 4222 nextToken(); 4223 parseBracedList(); 4224 } else { 4225 nextToken(); 4226 } 4227 } 4228 } 4229 4230 void UnwrappedLineParser::parseStatementMacro() { 4231 nextToken(); 4232 if (FormatTok->is(tok::l_paren)) 4233 parseParens(); 4234 if (FormatTok->is(tok::semi)) 4235 nextToken(); 4236 addUnwrappedLine(); 4237 } 4238 4239 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4240 // consume things like a::`b.c[d:e] or a::* 4241 while (true) { 4242 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4243 tok::coloncolon, tok::hash) || 4244 Keywords.isVerilogIdentifier(*FormatTok)) { 4245 nextToken(); 4246 } else if (FormatTok->is(tok::l_square)) { 4247 parseSquare(); 4248 } else { 4249 break; 4250 } 4251 } 4252 } 4253 4254 void UnwrappedLineParser::parseVerilogSensitivityList() { 4255 if (FormatTok->isNot(tok::at)) 4256 return; 4257 nextToken(); 4258 // A block event expression has 2 at signs. 4259 if (FormatTok->is(tok::at)) 4260 nextToken(); 4261 switch (FormatTok->Tok.getKind()) { 4262 case tok::star: 4263 nextToken(); 4264 break; 4265 case tok::l_paren: 4266 parseParens(); 4267 break; 4268 default: 4269 parseVerilogHierarchyIdentifier(); 4270 break; 4271 } 4272 } 4273 4274 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4275 unsigned AddLevels = 0; 4276 4277 if (FormatTok->is(Keywords.kw_clocking)) { 4278 nextToken(); 4279 if (Keywords.isVerilogIdentifier(*FormatTok)) 4280 nextToken(); 4281 parseVerilogSensitivityList(); 4282 if (FormatTok->is(tok::semi)) 4283 nextToken(); 4284 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4285 Keywords.kw_casez, Keywords.kw_randcase, 4286 Keywords.kw_randsequence)) { 4287 if (Style.IndentCaseLabels) 4288 AddLevels++; 4289 nextToken(); 4290 if (FormatTok->is(tok::l_paren)) { 4291 FormatTok->setFinalizedType(TT_ConditionLParen); 4292 parseParens(); 4293 } 4294 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4295 nextToken(); 4296 // The case header has no semicolon. 4297 } else { 4298 // "module" etc. 4299 nextToken(); 4300 // all the words like the name of the module and specifiers like 4301 // "automatic" and the width of function return type 4302 while (true) { 4303 if (FormatTok->is(tok::l_square)) { 4304 auto Prev = FormatTok->getPreviousNonComment(); 4305 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4306 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4307 parseSquare(); 4308 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4309 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4310 nextToken(); 4311 } else { 4312 break; 4313 } 4314 } 4315 4316 auto NewLine = [this]() { 4317 addUnwrappedLine(); 4318 Line->IsContinuation = true; 4319 }; 4320 4321 // package imports 4322 while (FormatTok->is(Keywords.kw_import)) { 4323 NewLine(); 4324 nextToken(); 4325 parseVerilogHierarchyIdentifier(); 4326 if (FormatTok->is(tok::semi)) 4327 nextToken(); 4328 } 4329 4330 // parameters and ports 4331 if (FormatTok->is(Keywords.kw_verilogHash)) { 4332 NewLine(); 4333 nextToken(); 4334 if (FormatTok->is(tok::l_paren)) { 4335 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4336 parseParens(); 4337 } 4338 } 4339 if (FormatTok->is(tok::l_paren)) { 4340 NewLine(); 4341 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4342 parseParens(); 4343 } 4344 4345 // extends and implements 4346 if (FormatTok->is(Keywords.kw_extends)) { 4347 NewLine(); 4348 nextToken(); 4349 parseVerilogHierarchyIdentifier(); 4350 if (FormatTok->is(tok::l_paren)) 4351 parseParens(); 4352 } 4353 if (FormatTok->is(Keywords.kw_implements)) { 4354 NewLine(); 4355 do { 4356 nextToken(); 4357 parseVerilogHierarchyIdentifier(); 4358 } while (FormatTok->is(tok::comma)); 4359 } 4360 4361 // Coverage event for cover groups. 4362 if (FormatTok->is(tok::at)) { 4363 NewLine(); 4364 parseVerilogSensitivityList(); 4365 } 4366 4367 if (FormatTok->is(tok::semi)) 4368 nextToken(/*LevelDifference=*/1); 4369 addUnwrappedLine(); 4370 } 4371 4372 return AddLevels; 4373 } 4374 4375 void UnwrappedLineParser::parseVerilogTable() { 4376 assert(FormatTok->is(Keywords.kw_table)); 4377 nextToken(/*LevelDifference=*/1); 4378 addUnwrappedLine(); 4379 4380 auto InitialLevel = Line->Level++; 4381 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4382 FormatToken *Tok = FormatTok; 4383 nextToken(); 4384 if (Tok->is(tok::semi)) 4385 addUnwrappedLine(); 4386 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4387 Tok->setFinalizedType(TT_VerilogTableItem); 4388 } 4389 Line->Level = InitialLevel; 4390 nextToken(/*LevelDifference=*/-1); 4391 addUnwrappedLine(); 4392 } 4393 4394 void UnwrappedLineParser::parseVerilogCaseLabel() { 4395 // The label will get unindented in AnnotatingParser. If there are no leading 4396 // spaces, indent the rest here so that things inside the block will be 4397 // indented relative to things outside. We don't use parseLabel because we 4398 // don't know whether this colon is a label or a ternary expression at this 4399 // point. 4400 auto OrigLevel = Line->Level; 4401 auto FirstLine = CurrentLines->size(); 4402 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4403 ++Line->Level; 4404 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4405 --Line->Level; 4406 parseStructuralElement(); 4407 // Restore the indentation in both the new line and the line that has the 4408 // label. 4409 if (CurrentLines->size() > FirstLine) 4410 (*CurrentLines)[FirstLine].Level = OrigLevel; 4411 Line->Level = OrigLevel; 4412 } 4413 4414 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4415 for (const auto &N : Line.Tokens) { 4416 if (N.Tok->MacroCtx) 4417 return true; 4418 for (const UnwrappedLine &Child : N.Children) 4419 if (containsExpansion(Child)) 4420 return true; 4421 } 4422 return false; 4423 } 4424 4425 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4426 if (Line->Tokens.empty()) 4427 return; 4428 LLVM_DEBUG({ 4429 if (!parsingPPDirective()) { 4430 llvm::dbgs() << "Adding unwrapped line:\n"; 4431 printDebugInfo(*Line); 4432 } 4433 }); 4434 4435 // If this line closes a block when in Whitesmiths mode, remember that 4436 // information so that the level can be decreased after the line is added. 4437 // This has to happen after the addition of the line since the line itself 4438 // needs to be indented. 4439 bool ClosesWhitesmithsBlock = 4440 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4441 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4442 4443 // If the current line was expanded from a macro call, we use it to 4444 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4445 // line and the unexpanded token stream. 4446 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4447 if (!Reconstruct) 4448 Reconstruct.emplace(Line->Level, Unexpanded); 4449 Reconstruct->addLine(*Line); 4450 4451 // While the reconstructed unexpanded lines are stored in the normal 4452 // flow of lines, the expanded lines are stored on the side to be analyzed 4453 // in an extra step. 4454 CurrentExpandedLines.push_back(std::move(*Line)); 4455 4456 if (Reconstruct->finished()) { 4457 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4458 assert(!Reconstructed.Tokens.empty() && 4459 "Reconstructed must at least contain the macro identifier."); 4460 assert(!parsingPPDirective()); 4461 LLVM_DEBUG({ 4462 llvm::dbgs() << "Adding unexpanded line:\n"; 4463 printDebugInfo(Reconstructed); 4464 }); 4465 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4466 Lines.push_back(std::move(Reconstructed)); 4467 CurrentExpandedLines.clear(); 4468 Reconstruct.reset(); 4469 } 4470 } else { 4471 // At the top level we only get here when no unexpansion is going on, or 4472 // when conditional formatting led to unfinished macro reconstructions. 4473 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4474 CurrentLines->push_back(std::move(*Line)); 4475 } 4476 Line->Tokens.clear(); 4477 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4478 Line->FirstStartColumn = 0; 4479 Line->IsContinuation = false; 4480 Line->SeenDecltypeAuto = false; 4481 4482 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4483 --Line->Level; 4484 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4485 CurrentLines->append( 4486 std::make_move_iterator(PreprocessorDirectives.begin()), 4487 std::make_move_iterator(PreprocessorDirectives.end())); 4488 PreprocessorDirectives.clear(); 4489 } 4490 // Disconnect the current token from the last token on the previous line. 4491 FormatTok->Previous = nullptr; 4492 } 4493 4494 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4495 4496 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4497 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4498 FormatTok.NewlinesBefore > 0; 4499 } 4500 4501 // Checks if \p FormatTok is a line comment that continues the line comment 4502 // section on \p Line. 4503 static bool 4504 continuesLineCommentSection(const FormatToken &FormatTok, 4505 const UnwrappedLine &Line, 4506 const llvm::Regex &CommentPragmasRegex) { 4507 if (Line.Tokens.empty()) 4508 return false; 4509 4510 StringRef IndentContent = FormatTok.TokenText; 4511 if (FormatTok.TokenText.starts_with("//") || 4512 FormatTok.TokenText.starts_with("/*")) { 4513 IndentContent = FormatTok.TokenText.substr(2); 4514 } 4515 if (CommentPragmasRegex.match(IndentContent)) 4516 return false; 4517 4518 // If Line starts with a line comment, then FormatTok continues the comment 4519 // section if its original column is greater or equal to the original start 4520 // column of the line. 4521 // 4522 // Define the min column token of a line as follows: if a line ends in '{' or 4523 // contains a '{' followed by a line comment, then the min column token is 4524 // that '{'. Otherwise, the min column token of the line is the first token of 4525 // the line. 4526 // 4527 // If Line starts with a token other than a line comment, then FormatTok 4528 // continues the comment section if its original column is greater than the 4529 // original start column of the min column token of the line. 4530 // 4531 // For example, the second line comment continues the first in these cases: 4532 // 4533 // // first line 4534 // // second line 4535 // 4536 // and: 4537 // 4538 // // first line 4539 // // second line 4540 // 4541 // and: 4542 // 4543 // int i; // first line 4544 // // second line 4545 // 4546 // and: 4547 // 4548 // do { // first line 4549 // // second line 4550 // int i; 4551 // } while (true); 4552 // 4553 // and: 4554 // 4555 // enum { 4556 // a, // first line 4557 // // second line 4558 // b 4559 // }; 4560 // 4561 // The second line comment doesn't continue the first in these cases: 4562 // 4563 // // first line 4564 // // second line 4565 // 4566 // and: 4567 // 4568 // int i; // first line 4569 // // second line 4570 // 4571 // and: 4572 // 4573 // do { // first line 4574 // // second line 4575 // int i; 4576 // } while (true); 4577 // 4578 // and: 4579 // 4580 // enum { 4581 // a, // first line 4582 // // second line 4583 // }; 4584 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4585 4586 // Scan for '{//'. If found, use the column of '{' as a min column for line 4587 // comment section continuation. 4588 const FormatToken *PreviousToken = nullptr; 4589 for (const UnwrappedLineNode &Node : Line.Tokens) { 4590 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4591 isLineComment(*Node.Tok)) { 4592 MinColumnToken = PreviousToken; 4593 break; 4594 } 4595 PreviousToken = Node.Tok; 4596 4597 // Grab the last newline preceding a token in this unwrapped line. 4598 if (Node.Tok->NewlinesBefore > 0) 4599 MinColumnToken = Node.Tok; 4600 } 4601 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4602 MinColumnToken = PreviousToken; 4603 4604 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4605 MinColumnToken); 4606 } 4607 4608 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4609 bool JustComments = Line->Tokens.empty(); 4610 for (FormatToken *Tok : CommentsBeforeNextToken) { 4611 // Line comments that belong to the same line comment section are put on the 4612 // same line since later we might want to reflow content between them. 4613 // Additional fine-grained breaking of line comment sections is controlled 4614 // by the class BreakableLineCommentSection in case it is desirable to keep 4615 // several line comment sections in the same unwrapped line. 4616 // 4617 // FIXME: Consider putting separate line comment sections as children to the 4618 // unwrapped line instead. 4619 Tok->ContinuesLineCommentSection = 4620 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4621 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4622 addUnwrappedLine(); 4623 pushToken(Tok); 4624 } 4625 if (NewlineBeforeNext && JustComments) 4626 addUnwrappedLine(); 4627 CommentsBeforeNextToken.clear(); 4628 } 4629 4630 void UnwrappedLineParser::nextToken(int LevelDifference) { 4631 if (eof()) 4632 return; 4633 flushComments(isOnNewLine(*FormatTok)); 4634 pushToken(FormatTok); 4635 FormatToken *Previous = FormatTok; 4636 if (!Style.isJavaScript()) 4637 readToken(LevelDifference); 4638 else 4639 readTokenWithJavaScriptASI(); 4640 FormatTok->Previous = Previous; 4641 if (Style.isVerilog()) { 4642 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4643 // keywords like `begin`, we can't treat them the same as left braces 4644 // because some contexts require one of them. For example structs use 4645 // braces and if blocks use keywords, and a left brace can occur in an if 4646 // statement, but it is not a block. For keywords like `end`, we simply 4647 // treat them the same as right braces. 4648 if (Keywords.isVerilogEnd(*FormatTok)) 4649 FormatTok->Tok.setKind(tok::r_brace); 4650 } 4651 } 4652 4653 void UnwrappedLineParser::distributeComments( 4654 const SmallVectorImpl<FormatToken *> &Comments, 4655 const FormatToken *NextTok) { 4656 // Whether or not a line comment token continues a line is controlled by 4657 // the method continuesLineCommentSection, with the following caveat: 4658 // 4659 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4660 // that each comment line from the trail is aligned with the next token, if 4661 // the next token exists. If a trail exists, the beginning of the maximal 4662 // trail is marked as a start of a new comment section. 4663 // 4664 // For example in this code: 4665 // 4666 // int a; // line about a 4667 // // line 1 about b 4668 // // line 2 about b 4669 // int b; 4670 // 4671 // the two lines about b form a maximal trail, so there are two sections, the 4672 // first one consisting of the single comment "// line about a" and the 4673 // second one consisting of the next two comments. 4674 if (Comments.empty()) 4675 return; 4676 bool ShouldPushCommentsInCurrentLine = true; 4677 bool HasTrailAlignedWithNextToken = false; 4678 unsigned StartOfTrailAlignedWithNextToken = 0; 4679 if (NextTok) { 4680 // We are skipping the first element intentionally. 4681 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4682 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4683 HasTrailAlignedWithNextToken = true; 4684 StartOfTrailAlignedWithNextToken = i; 4685 } 4686 } 4687 } 4688 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4689 FormatToken *FormatTok = Comments[i]; 4690 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4691 FormatTok->ContinuesLineCommentSection = false; 4692 } else { 4693 FormatTok->ContinuesLineCommentSection = 4694 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4695 } 4696 if (!FormatTok->ContinuesLineCommentSection && 4697 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4698 ShouldPushCommentsInCurrentLine = false; 4699 } 4700 if (ShouldPushCommentsInCurrentLine) 4701 pushToken(FormatTok); 4702 else 4703 CommentsBeforeNextToken.push_back(FormatTok); 4704 } 4705 } 4706 4707 void UnwrappedLineParser::readToken(int LevelDifference) { 4708 SmallVector<FormatToken *, 1> Comments; 4709 bool PreviousWasComment = false; 4710 bool FirstNonCommentOnLine = false; 4711 do { 4712 FormatTok = Tokens->getNextToken(); 4713 assert(FormatTok); 4714 while (FormatTok->getType() == TT_ConflictStart || 4715 FormatTok->getType() == TT_ConflictEnd || 4716 FormatTok->getType() == TT_ConflictAlternative) { 4717 if (FormatTok->getType() == TT_ConflictStart) 4718 conditionalCompilationStart(/*Unreachable=*/false); 4719 else if (FormatTok->getType() == TT_ConflictAlternative) 4720 conditionalCompilationAlternative(); 4721 else if (FormatTok->getType() == TT_ConflictEnd) 4722 conditionalCompilationEnd(); 4723 FormatTok = Tokens->getNextToken(); 4724 FormatTok->MustBreakBefore = true; 4725 FormatTok->MustBreakBeforeFinalized = true; 4726 } 4727 4728 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4729 const FormatToken &Tok, 4730 bool PreviousWasComment) { 4731 auto IsFirstOnLine = [](const FormatToken &Tok) { 4732 return Tok.HasUnescapedNewline || Tok.IsFirst; 4733 }; 4734 4735 // Consider preprocessor directives preceded by block comments as first 4736 // on line. 4737 if (PreviousWasComment) 4738 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4739 return IsFirstOnLine(Tok); 4740 }; 4741 4742 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4743 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4744 PreviousWasComment = FormatTok->is(tok::comment); 4745 4746 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4747 (!Style.isVerilog() || 4748 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4749 FirstNonCommentOnLine) { 4750 distributeComments(Comments, FormatTok); 4751 Comments.clear(); 4752 // If there is an unfinished unwrapped line, we flush the preprocessor 4753 // directives only after that unwrapped line was finished later. 4754 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4755 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4756 assert((LevelDifference >= 0 || 4757 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4758 "LevelDifference makes Line->Level negative"); 4759 Line->Level += LevelDifference; 4760 // Comments stored before the preprocessor directive need to be output 4761 // before the preprocessor directive, at the same level as the 4762 // preprocessor directive, as we consider them to apply to the directive. 4763 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4764 PPBranchLevel > 0) { 4765 Line->Level += PPBranchLevel; 4766 } 4767 flushComments(isOnNewLine(*FormatTok)); 4768 parsePPDirective(); 4769 PreviousWasComment = FormatTok->is(tok::comment); 4770 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4771 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4772 } 4773 4774 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4775 !Line->InPPDirective) { 4776 continue; 4777 } 4778 4779 if (FormatTok->is(tok::identifier) && 4780 Macros.defined(FormatTok->TokenText) && 4781 // FIXME: Allow expanding macros in preprocessor directives. 4782 !Line->InPPDirective) { 4783 FormatToken *ID = FormatTok; 4784 unsigned Position = Tokens->getPosition(); 4785 4786 // To correctly parse the code, we need to replace the tokens of the macro 4787 // call with its expansion. 4788 auto PreCall = std::move(Line); 4789 Line.reset(new UnwrappedLine); 4790 bool OldInExpansion = InExpansion; 4791 InExpansion = true; 4792 // We parse the macro call into a new line. 4793 auto Args = parseMacroCall(); 4794 InExpansion = OldInExpansion; 4795 assert(Line->Tokens.front().Tok == ID); 4796 // And remember the unexpanded macro call tokens. 4797 auto UnexpandedLine = std::move(Line); 4798 // Reset to the old line. 4799 Line = std::move(PreCall); 4800 4801 LLVM_DEBUG({ 4802 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4803 if (Args) { 4804 llvm::dbgs() << "("; 4805 for (const auto &Arg : Args.value()) 4806 for (const auto &T : Arg) 4807 llvm::dbgs() << T->TokenText << " "; 4808 llvm::dbgs() << ")"; 4809 } 4810 llvm::dbgs() << "\n"; 4811 }); 4812 if (Macros.objectLike(ID->TokenText) && Args && 4813 !Macros.hasArity(ID->TokenText, Args->size())) { 4814 // The macro is either 4815 // - object-like, but we got argumnets, or 4816 // - overloaded to be both object-like and function-like, but none of 4817 // the function-like arities match the number of arguments. 4818 // Thus, expand as object-like macro. 4819 LLVM_DEBUG(llvm::dbgs() 4820 << "Macro \"" << ID->TokenText 4821 << "\" not overloaded for arity " << Args->size() 4822 << "or not function-like, using object-like overload."); 4823 Args.reset(); 4824 UnexpandedLine->Tokens.resize(1); 4825 Tokens->setPosition(Position); 4826 nextToken(); 4827 assert(!Args && Macros.objectLike(ID->TokenText)); 4828 } 4829 if ((!Args && Macros.objectLike(ID->TokenText)) || 4830 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4831 // Next, we insert the expanded tokens in the token stream at the 4832 // current position, and continue parsing. 4833 Unexpanded[ID] = std::move(UnexpandedLine); 4834 SmallVector<FormatToken *, 8> Expansion = 4835 Macros.expand(ID, std::move(Args)); 4836 if (!Expansion.empty()) 4837 FormatTok = Tokens->insertTokens(Expansion); 4838 4839 LLVM_DEBUG({ 4840 llvm::dbgs() << "Expanded: "; 4841 for (const auto &T : Expansion) 4842 llvm::dbgs() << T->TokenText << " "; 4843 llvm::dbgs() << "\n"; 4844 }); 4845 } else { 4846 LLVM_DEBUG({ 4847 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4848 << "\", because it was used "; 4849 if (Args) 4850 llvm::dbgs() << "with " << Args->size(); 4851 else 4852 llvm::dbgs() << "without"; 4853 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4854 }); 4855 Tokens->setPosition(Position); 4856 FormatTok = ID; 4857 } 4858 } 4859 4860 if (FormatTok->isNot(tok::comment)) { 4861 distributeComments(Comments, FormatTok); 4862 Comments.clear(); 4863 return; 4864 } 4865 4866 Comments.push_back(FormatTok); 4867 } while (!eof()); 4868 4869 distributeComments(Comments, nullptr); 4870 Comments.clear(); 4871 } 4872 4873 namespace { 4874 template <typename Iterator> 4875 void pushTokens(Iterator Begin, Iterator End, 4876 llvm::SmallVectorImpl<FormatToken *> &Into) { 4877 for (auto I = Begin; I != End; ++I) { 4878 Into.push_back(I->Tok); 4879 for (const auto &Child : I->Children) 4880 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4881 } 4882 } 4883 } // namespace 4884 4885 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4886 UnwrappedLineParser::parseMacroCall() { 4887 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4888 assert(Line->Tokens.empty()); 4889 nextToken(); 4890 if (FormatTok->isNot(tok::l_paren)) 4891 return Args; 4892 unsigned Position = Tokens->getPosition(); 4893 FormatToken *Tok = FormatTok; 4894 nextToken(); 4895 Args.emplace(); 4896 auto ArgStart = std::prev(Line->Tokens.end()); 4897 4898 int Parens = 0; 4899 do { 4900 switch (FormatTok->Tok.getKind()) { 4901 case tok::l_paren: 4902 ++Parens; 4903 nextToken(); 4904 break; 4905 case tok::r_paren: { 4906 if (Parens > 0) { 4907 --Parens; 4908 nextToken(); 4909 break; 4910 } 4911 Args->push_back({}); 4912 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4913 nextToken(); 4914 return Args; 4915 } 4916 case tok::comma: { 4917 if (Parens > 0) { 4918 nextToken(); 4919 break; 4920 } 4921 Args->push_back({}); 4922 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4923 nextToken(); 4924 ArgStart = std::prev(Line->Tokens.end()); 4925 break; 4926 } 4927 default: 4928 nextToken(); 4929 break; 4930 } 4931 } while (!eof()); 4932 Line->Tokens.resize(1); 4933 Tokens->setPosition(Position); 4934 FormatTok = Tok; 4935 return {}; 4936 } 4937 4938 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4939 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4940 if (MustBreakBeforeNextToken) { 4941 Line->Tokens.back().Tok->MustBreakBefore = true; 4942 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 4943 MustBreakBeforeNextToken = false; 4944 } 4945 } 4946 4947 } // end namespace format 4948 } // end namespace clang 4949