1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType() 51 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 52 << "\"] "; 53 for (SmallVectorImpl<UnwrappedLine>::const_iterator 54 CI = I->Children.begin(), 55 CE = I->Children.end(); 56 CI != CE; ++CI) { 57 OS << "\n"; 58 printLine(OS, *CI, (Prefix + " ").str()); 59 NewLine = true; 60 } 61 } 62 if (!NewLine) 63 OS << "\n"; 64 } 65 66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 67 printLine(llvm::dbgs(), Line); 68 } 69 70 class ScopedDeclarationState { 71 public: 72 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 73 bool MustBeDeclaration) 74 : Line(Line), Stack(Stack) { 75 Line.MustBeDeclaration = MustBeDeclaration; 76 Stack.push_back(MustBeDeclaration); 77 } 78 ~ScopedDeclarationState() { 79 Stack.pop_back(); 80 if (!Stack.empty()) 81 Line.MustBeDeclaration = Stack.back(); 82 else 83 Line.MustBeDeclaration = true; 84 } 85 86 private: 87 UnwrappedLine &Line; 88 llvm::BitVector &Stack; 89 }; 90 91 } // end anonymous namespace 92 93 class ScopedLineState { 94 public: 95 ScopedLineState(UnwrappedLineParser &Parser, 96 bool SwitchToPreprocessorLines = false) 97 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 98 if (SwitchToPreprocessorLines) 99 Parser.CurrentLines = &Parser.PreprocessorDirectives; 100 else if (!Parser.Line->Tokens.empty()) 101 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 102 PreBlockLine = std::move(Parser.Line); 103 Parser.Line = std::make_unique<UnwrappedLine>(); 104 Parser.Line->Level = PreBlockLine->Level; 105 Parser.Line->PPLevel = PreBlockLine->PPLevel; 106 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 107 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 108 } 109 110 ~ScopedLineState() { 111 if (!Parser.Line->Tokens.empty()) 112 Parser.addUnwrappedLine(); 113 assert(Parser.Line->Tokens.empty()); 114 Parser.Line = std::move(PreBlockLine); 115 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 116 Parser.MustBreakBeforeNextToken = true; 117 Parser.CurrentLines = OriginalLines; 118 } 119 120 private: 121 UnwrappedLineParser &Parser; 122 123 std::unique_ptr<UnwrappedLine> PreBlockLine; 124 SmallVectorImpl<UnwrappedLine> *OriginalLines; 125 }; 126 127 class CompoundStatementIndenter { 128 public: 129 CompoundStatementIndenter(UnwrappedLineParser *Parser, 130 const FormatStyle &Style, unsigned &LineLevel) 131 : CompoundStatementIndenter(Parser, LineLevel, 132 Style.BraceWrapping.AfterControlStatement, 133 Style.BraceWrapping.IndentBraces) {} 134 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 135 bool WrapBrace, bool IndentBrace) 136 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 137 if (WrapBrace) 138 Parser->addUnwrappedLine(); 139 if (IndentBrace) 140 ++LineLevel; 141 } 142 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 143 144 private: 145 unsigned &LineLevel; 146 unsigned OldLineLevel; 147 }; 148 149 UnwrappedLineParser::UnwrappedLineParser( 150 SourceManager &SourceMgr, const FormatStyle &Style, 151 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 152 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 153 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 154 IdentifierTable &IdentTable) 155 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 156 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 157 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 158 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 159 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 160 ? IG_Rejected 161 : IG_Inited), 162 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 163 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} 164 165 void UnwrappedLineParser::reset() { 166 PPBranchLevel = -1; 167 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 168 ? IG_Rejected 169 : IG_Inited; 170 IncludeGuardToken = nullptr; 171 Line.reset(new UnwrappedLine); 172 CommentsBeforeNextToken.clear(); 173 FormatTok = nullptr; 174 MustBreakBeforeNextToken = false; 175 IsDecltypeAutoFunction = false; 176 PreprocessorDirectives.clear(); 177 CurrentLines = &Lines; 178 DeclarationScopeStack.clear(); 179 NestedTooDeep.clear(); 180 NestedLambdas.clear(); 181 PPStack.clear(); 182 Line->FirstStartColumn = FirstStartColumn; 183 184 if (!Unexpanded.empty()) 185 for (FormatToken *Token : AllTokens) 186 Token->MacroCtx.reset(); 187 CurrentExpandedLines.clear(); 188 ExpandedLines.clear(); 189 Unexpanded.clear(); 190 InExpansion = false; 191 Reconstruct.reset(); 192 } 193 194 void UnwrappedLineParser::parse() { 195 IndexedTokenSource TokenSource(AllTokens); 196 Line->FirstStartColumn = FirstStartColumn; 197 do { 198 LLVM_DEBUG(llvm::dbgs() << "----\n"); 199 reset(); 200 Tokens = &TokenSource; 201 TokenSource.reset(); 202 203 readToken(); 204 parseFile(); 205 206 // If we found an include guard then all preprocessor directives (other than 207 // the guard) are over-indented by one. 208 if (IncludeGuard == IG_Found) { 209 for (auto &Line : Lines) 210 if (Line.InPPDirective && Line.Level > 0) 211 --Line.Level; 212 } 213 214 // Create line with eof token. 215 assert(eof()); 216 pushToken(FormatTok); 217 addUnwrappedLine(); 218 219 // In a first run, format everything with the lines containing macro calls 220 // replaced by the expansion. 221 if (!ExpandedLines.empty()) { 222 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 223 for (const auto &Line : Lines) { 224 if (!Line.Tokens.empty()) { 225 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 226 if (it != ExpandedLines.end()) { 227 for (const auto &Expanded : it->second) { 228 LLVM_DEBUG(printDebugInfo(Expanded)); 229 Callback.consumeUnwrappedLine(Expanded); 230 } 231 continue; 232 } 233 } 234 LLVM_DEBUG(printDebugInfo(Line)); 235 Callback.consumeUnwrappedLine(Line); 236 } 237 Callback.finishRun(); 238 } 239 240 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 241 for (const UnwrappedLine &Line : Lines) { 242 LLVM_DEBUG(printDebugInfo(Line)); 243 Callback.consumeUnwrappedLine(Line); 244 } 245 Callback.finishRun(); 246 Lines.clear(); 247 while (!PPLevelBranchIndex.empty() && 248 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 249 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 250 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 251 } 252 if (!PPLevelBranchIndex.empty()) { 253 ++PPLevelBranchIndex.back(); 254 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 255 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 256 } 257 } while (!PPLevelBranchIndex.empty()); 258 } 259 260 void UnwrappedLineParser::parseFile() { 261 // The top-level context in a file always has declarations, except for pre- 262 // processor directives and JavaScript files. 263 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 264 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 265 MustBeDeclaration); 266 if (Style.Language == FormatStyle::LK_TextProto) 267 parseBracedList(); 268 else 269 parseLevel(); 270 // Make sure to format the remaining tokens. 271 // 272 // LK_TextProto is special since its top-level is parsed as the body of a 273 // braced list, which does not necessarily have natural line separators such 274 // as a semicolon. Comments after the last entry that have been determined to 275 // not belong to that line, as in: 276 // key: value 277 // // endfile comment 278 // do not have a chance to be put on a line of their own until this point. 279 // Here we add this newline before end-of-file comments. 280 if (Style.Language == FormatStyle::LK_TextProto && 281 !CommentsBeforeNextToken.empty()) { 282 addUnwrappedLine(); 283 } 284 flushComments(true); 285 addUnwrappedLine(); 286 } 287 288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 289 do { 290 switch (FormatTok->Tok.getKind()) { 291 case tok::l_brace: 292 return; 293 default: 294 if (FormatTok->is(Keywords.kw_where)) { 295 addUnwrappedLine(); 296 nextToken(); 297 parseCSharpGenericTypeConstraint(); 298 break; 299 } 300 nextToken(); 301 break; 302 } 303 } while (!eof()); 304 } 305 306 void UnwrappedLineParser::parseCSharpAttribute() { 307 int UnpairedSquareBrackets = 1; 308 do { 309 switch (FormatTok->Tok.getKind()) { 310 case tok::r_square: 311 nextToken(); 312 --UnpairedSquareBrackets; 313 if (UnpairedSquareBrackets == 0) { 314 addUnwrappedLine(); 315 return; 316 } 317 break; 318 case tok::l_square: 319 ++UnpairedSquareBrackets; 320 nextToken(); 321 break; 322 default: 323 nextToken(); 324 break; 325 } 326 } while (!eof()); 327 } 328 329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 330 if (!Lines.empty() && Lines.back().InPPDirective) 331 return true; 332 333 const FormatToken *Previous = Tokens->getPreviousToken(); 334 return Previous && Previous->is(tok::comment) && 335 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 336 } 337 338 /// \brief Parses a level, that is ???. 339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 340 /// \param IfKind The \p if statement kind in the level. 341 /// \param IfLeftBrace The left brace of the \p if block in the level. 342 /// \returns true if a simple block of if/else/for/while, or false otherwise. 343 /// (A simple block has a single statement.) 344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 345 IfStmtKind *IfKind, 346 FormatToken **IfLeftBrace) { 347 const bool InRequiresExpression = 348 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 349 const bool IsPrecededByCommentOrPPDirective = 350 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 351 FormatToken *IfLBrace = nullptr; 352 bool HasDoWhile = false; 353 bool HasLabel = false; 354 unsigned StatementCount = 0; 355 bool SwitchLabelEncountered = false; 356 357 do { 358 if (FormatTok->isAttribute()) { 359 nextToken(); 360 continue; 361 } 362 tok::TokenKind kind = FormatTok->Tok.getKind(); 363 if (FormatTok->getType() == TT_MacroBlockBegin) 364 kind = tok::l_brace; 365 else if (FormatTok->getType() == TT_MacroBlockEnd) 366 kind = tok::r_brace; 367 368 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 369 &HasLabel, &StatementCount] { 370 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 371 HasDoWhile ? nullptr : &HasDoWhile, 372 HasLabel ? nullptr : &HasLabel); 373 ++StatementCount; 374 assert(StatementCount > 0 && "StatementCount overflow!"); 375 }; 376 377 switch (kind) { 378 case tok::comment: 379 nextToken(); 380 addUnwrappedLine(); 381 break; 382 case tok::l_brace: 383 if (InRequiresExpression) { 384 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 385 } else if (FormatTok->Previous && 386 FormatTok->Previous->ClosesRequiresClause) { 387 // We need the 'default' case here to correctly parse a function 388 // l_brace. 389 ParseDefault(); 390 continue; 391 } 392 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) && 393 tryToParseBracedList()) { 394 continue; 395 } 396 parseBlock(); 397 ++StatementCount; 398 assert(StatementCount > 0 && "StatementCount overflow!"); 399 addUnwrappedLine(); 400 break; 401 case tok::r_brace: 402 if (OpeningBrace) { 403 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 404 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 405 return false; 406 } 407 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 408 HasDoWhile || IsPrecededByCommentOrPPDirective || 409 precededByCommentOrPPDirective()) { 410 return false; 411 } 412 const FormatToken *Next = Tokens->peekNextToken(); 413 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 414 return false; 415 if (IfLeftBrace) 416 *IfLeftBrace = IfLBrace; 417 return true; 418 } 419 nextToken(); 420 addUnwrappedLine(); 421 break; 422 case tok::kw_default: { 423 unsigned StoredPosition = Tokens->getPosition(); 424 FormatToken *Next; 425 do { 426 Next = Tokens->getNextToken(); 427 assert(Next); 428 } while (Next->is(tok::comment)); 429 FormatTok = Tokens->setPosition(StoredPosition); 430 if (Next->isNot(tok::colon)) { 431 // default not followed by ':' is not a case label; treat it like 432 // an identifier. 433 parseStructuralElement(); 434 break; 435 } 436 // Else, if it is 'default:', fall through to the case handling. 437 [[fallthrough]]; 438 } 439 case tok::kw_case: 440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 441 (Style.isJavaScript() && Line->MustBeDeclaration)) { 442 // Proto: there are no switch/case statements 443 // Verilog: Case labels don't have this word. We handle case 444 // labels including default in TokenAnnotator. 445 // JavaScript: A 'case: string' style field declaration. 446 ParseDefault(); 447 break; 448 } 449 if (!SwitchLabelEncountered && 450 (Style.IndentCaseLabels || 451 (Line->InPPDirective && Line->Level == 1))) { 452 ++Line->Level; 453 } 454 SwitchLabelEncountered = true; 455 parseStructuralElement(); 456 break; 457 case tok::l_square: 458 if (Style.isCSharp()) { 459 nextToken(); 460 parseCSharpAttribute(); 461 break; 462 } 463 if (handleCppAttributes()) 464 break; 465 [[fallthrough]]; 466 default: 467 ParseDefault(); 468 break; 469 } 470 } while (!eof()); 471 472 return false; 473 } 474 475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 476 // We'll parse forward through the tokens until we hit 477 // a closing brace or eof - note that getNextToken() will 478 // parse macros, so this will magically work inside macro 479 // definitions, too. 480 unsigned StoredPosition = Tokens->getPosition(); 481 FormatToken *Tok = FormatTok; 482 const FormatToken *PrevTok = Tok->Previous; 483 // Keep a stack of positions of lbrace tokens. We will 484 // update information about whether an lbrace starts a 485 // braced init list or a different block during the loop. 486 struct StackEntry { 487 FormatToken *Tok; 488 const FormatToken *PrevTok; 489 }; 490 SmallVector<StackEntry, 8> LBraceStack; 491 assert(Tok->is(tok::l_brace)); 492 493 do { 494 FormatToken *NextTok; 495 do { 496 NextTok = Tokens->getNextToken(); 497 } while (NextTok->is(tok::comment)); 498 499 if (!Line->InMacroBody) { 500 // Skip PPDirective lines and comments. 501 while (NextTok->is(tok::hash)) { 502 do { 503 NextTok = Tokens->getNextToken(); 504 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); 505 506 while (NextTok->is(tok::comment)) 507 NextTok = Tokens->getNextToken(); 508 } 509 } 510 511 switch (Tok->Tok.getKind()) { 512 case tok::l_brace: 513 if (Style.isJavaScript() && PrevTok) { 514 if (PrevTok->isOneOf(tok::colon, tok::less)) { 515 // A ':' indicates this code is in a type, or a braced list 516 // following a label in an object literal ({a: {b: 1}}). 517 // A '<' could be an object used in a comparison, but that is nonsense 518 // code (can never return true), so more likely it is a generic type 519 // argument (`X<{a: string; b: number}>`). 520 // The code below could be confused by semicolons between the 521 // individual members in a type member list, which would normally 522 // trigger BK_Block. In both cases, this must be parsed as an inline 523 // braced init. 524 Tok->setBlockKind(BK_BracedInit); 525 } else if (PrevTok->is(tok::r_paren)) { 526 // `) { }` can only occur in function or method declarations in JS. 527 Tok->setBlockKind(BK_Block); 528 } 529 } else { 530 Tok->setBlockKind(BK_Unknown); 531 } 532 LBraceStack.push_back({Tok, PrevTok}); 533 break; 534 case tok::r_brace: 535 if (LBraceStack.empty()) 536 break; 537 if (LBraceStack.back().Tok->is(BK_Unknown)) { 538 bool ProbablyBracedList = false; 539 if (Style.Language == FormatStyle::LK_Proto) { 540 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 541 } else { 542 // Using OriginalColumn to distinguish between ObjC methods and 543 // binary operators is a bit hacky. 544 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 545 NextTok->OriginalColumn == 0; 546 547 // Try to detect a braced list. Note that regardless how we mark inner 548 // braces here, we will overwrite the BlockKind later if we parse a 549 // braced list (where all blocks inside are by default braced lists), 550 // or when we explicitly detect blocks (for example while parsing 551 // lambdas). 552 553 // If we already marked the opening brace as braced list, the closing 554 // must also be part of it. 555 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace); 556 557 ProbablyBracedList = ProbablyBracedList || 558 (Style.isJavaScript() && 559 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 560 Keywords.kw_as)); 561 ProbablyBracedList = ProbablyBracedList || 562 (Style.isCpp() && NextTok->is(tok::l_paren)); 563 564 // If there is a comma, semicolon or right paren after the closing 565 // brace, we assume this is a braced initializer list. 566 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 567 // braced list in JS. 568 ProbablyBracedList = 569 ProbablyBracedList || 570 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 571 tok::r_paren, tok::r_square, tok::ellipsis); 572 573 // Distinguish between braced list in a constructor initializer list 574 // followed by constructor body, or just adjacent blocks. 575 ProbablyBracedList = 576 ProbablyBracedList || 577 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 578 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 579 tok::greater)); 580 581 ProbablyBracedList = 582 ProbablyBracedList || 583 (NextTok->is(tok::identifier) && 584 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 585 586 ProbablyBracedList = ProbablyBracedList || 587 (NextTok->is(tok::semi) && 588 (!ExpectClassBody || LBraceStack.size() != 1)); 589 590 ProbablyBracedList = 591 ProbablyBracedList || 592 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 593 594 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 595 // We can have an array subscript after a braced init 596 // list, but C++11 attributes are expected after blocks. 597 NextTok = Tokens->getNextToken(); 598 ProbablyBracedList = NextTok->isNot(tok::l_square); 599 } 600 601 // Cpp macro definition body that is a nonempty braced list or block: 602 if (Style.isCpp() && Line->InMacroBody && PrevTok != FormatTok && 603 !FormatTok->Previous && NextTok->is(tok::eof) && 604 // A statement can end with only `;` (simple statement), a block 605 // closing brace (compound statement), or `:` (label statement). 606 // If PrevTok is a block opening brace, Tok ends an empty block. 607 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { 608 ProbablyBracedList = true; 609 } 610 } 611 if (ProbablyBracedList) { 612 Tok->setBlockKind(BK_BracedInit); 613 LBraceStack.back().Tok->setBlockKind(BK_BracedInit); 614 } else { 615 Tok->setBlockKind(BK_Block); 616 LBraceStack.back().Tok->setBlockKind(BK_Block); 617 } 618 } 619 LBraceStack.pop_back(); 620 break; 621 case tok::identifier: 622 if (Tok->isNot(TT_StatementMacro)) 623 break; 624 [[fallthrough]]; 625 case tok::at: 626 case tok::semi: 627 case tok::kw_if: 628 case tok::kw_while: 629 case tok::kw_for: 630 case tok::kw_switch: 631 case tok::kw_try: 632 case tok::kw___try: 633 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 634 LBraceStack.back().Tok->setBlockKind(BK_Block); 635 break; 636 default: 637 break; 638 } 639 640 PrevTok = Tok; 641 Tok = NextTok; 642 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 643 644 // Assume other blocks for all unclosed opening braces. 645 for (const auto &Entry : LBraceStack) 646 if (Entry.Tok->is(BK_Unknown)) 647 Entry.Tok->setBlockKind(BK_Block); 648 649 FormatTok = Tokens->setPosition(StoredPosition); 650 } 651 652 // Sets the token type of the directly previous right brace. 653 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 654 if (auto Prev = FormatTok->getPreviousNonComment(); 655 Prev && Prev->is(tok::r_brace)) { 656 Prev->setFinalizedType(Type); 657 } 658 } 659 660 template <class T> 661 static inline void hash_combine(std::size_t &seed, const T &v) { 662 std::hash<T> hasher; 663 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 664 } 665 666 size_t UnwrappedLineParser::computePPHash() const { 667 size_t h = 0; 668 for (const auto &i : PPStack) { 669 hash_combine(h, size_t(i.Kind)); 670 hash_combine(h, i.Line); 671 } 672 return h; 673 } 674 675 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 676 // is not null, subtracts its length (plus the preceding space) when computing 677 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 678 // running the token annotator on it so that we can restore them afterward. 679 bool UnwrappedLineParser::mightFitOnOneLine( 680 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 681 const auto ColumnLimit = Style.ColumnLimit; 682 if (ColumnLimit == 0) 683 return true; 684 685 auto &Tokens = ParsedLine.Tokens; 686 assert(!Tokens.empty()); 687 688 const auto *LastToken = Tokens.back().Tok; 689 assert(LastToken); 690 691 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 692 693 int Index = 0; 694 for (const auto &Token : Tokens) { 695 assert(Token.Tok); 696 auto &SavedToken = SavedTokens[Index++]; 697 SavedToken.Tok = new FormatToken; 698 SavedToken.Tok->copyFrom(*Token.Tok); 699 SavedToken.Children = std::move(Token.Children); 700 } 701 702 AnnotatedLine Line(ParsedLine); 703 assert(Line.Last == LastToken); 704 705 TokenAnnotator Annotator(Style, Keywords); 706 Annotator.annotate(Line); 707 Annotator.calculateFormattingInformation(Line); 708 709 auto Length = LastToken->TotalLength; 710 if (OpeningBrace) { 711 assert(OpeningBrace != Tokens.front().Tok); 712 if (auto Prev = OpeningBrace->Previous; 713 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 714 Length -= ColumnLimit; 715 } 716 Length -= OpeningBrace->TokenText.size() + 1; 717 } 718 719 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 720 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 721 Length -= FirstToken->TokenText.size() + 1; 722 } 723 724 Index = 0; 725 for (auto &Token : Tokens) { 726 const auto &SavedToken = SavedTokens[Index++]; 727 Token.Tok->copyFrom(*SavedToken.Tok); 728 Token.Children = std::move(SavedToken.Children); 729 delete SavedToken.Tok; 730 } 731 732 // If these change PPLevel needs to be used for get correct indentation. 733 assert(!Line.InMacroBody); 734 assert(!Line.InPPDirective); 735 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 736 } 737 738 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 739 unsigned AddLevels, bool MunchSemi, 740 bool KeepBraces, 741 IfStmtKind *IfKind, 742 bool UnindentWhitesmithsBraces) { 743 auto HandleVerilogBlockLabel = [this]() { 744 // ":" name 745 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 746 nextToken(); 747 if (Keywords.isVerilogIdentifier(*FormatTok)) 748 nextToken(); 749 } 750 }; 751 752 // Whether this is a Verilog-specific block that has a special header like a 753 // module. 754 const bool VerilogHierarchy = 755 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 756 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 757 (Style.isVerilog() && 758 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 759 "'{' or macro block token expected"); 760 FormatToken *Tok = FormatTok; 761 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 762 auto Index = CurrentLines->size(); 763 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 764 FormatTok->setBlockKind(BK_Block); 765 766 // For Whitesmiths mode, jump to the next level prior to skipping over the 767 // braces. 768 if (!VerilogHierarchy && AddLevels > 0 && 769 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 770 ++Line->Level; 771 } 772 773 size_t PPStartHash = computePPHash(); 774 775 const unsigned InitialLevel = Line->Level; 776 if (VerilogHierarchy) { 777 AddLevels += parseVerilogHierarchyHeader(); 778 } else { 779 nextToken(/*LevelDifference=*/AddLevels); 780 HandleVerilogBlockLabel(); 781 } 782 783 // Bail out if there are too many levels. Otherwise, the stack might overflow. 784 if (Line->Level > 300) 785 return nullptr; 786 787 if (MacroBlock && FormatTok->is(tok::l_paren)) 788 parseParens(); 789 790 size_t NbPreprocessorDirectives = 791 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 792 addUnwrappedLine(); 793 size_t OpeningLineIndex = 794 CurrentLines->empty() 795 ? (UnwrappedLine::kInvalidIndex) 796 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 797 798 // Whitesmiths is weird here. The brace needs to be indented for the namespace 799 // block, but the block itself may not be indented depending on the style 800 // settings. This allows the format to back up one level in those cases. 801 if (UnindentWhitesmithsBraces) 802 --Line->Level; 803 804 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 805 MustBeDeclaration); 806 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 807 Line->Level += AddLevels; 808 809 FormatToken *IfLBrace = nullptr; 810 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 811 812 if (eof()) 813 return IfLBrace; 814 815 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 816 : FormatTok->isNot(tok::r_brace)) { 817 Line->Level = InitialLevel; 818 FormatTok->setBlockKind(BK_Block); 819 return IfLBrace; 820 } 821 822 if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace)) 823 FormatTok->setFinalizedType(TT_NamespaceRBrace); 824 825 const bool IsFunctionRBrace = 826 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 827 828 auto RemoveBraces = [=]() mutable { 829 if (!SimpleBlock) 830 return false; 831 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 832 assert(FormatTok->is(tok::r_brace)); 833 const bool WrappedOpeningBrace = !Tok->Previous; 834 if (WrappedOpeningBrace && FollowedByComment) 835 return false; 836 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 837 if (KeepBraces && !HasRequiredIfBraces) 838 return false; 839 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 840 const FormatToken *Previous = Tokens->getPreviousToken(); 841 assert(Previous); 842 if (Previous->is(tok::r_brace) && !Previous->Optional) 843 return false; 844 } 845 assert(!CurrentLines->empty()); 846 auto &LastLine = CurrentLines->back(); 847 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 848 return false; 849 if (Tok->is(TT_ElseLBrace)) 850 return true; 851 if (WrappedOpeningBrace) { 852 assert(Index > 0); 853 --Index; // The line above the wrapped l_brace. 854 Tok = nullptr; 855 } 856 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 857 }; 858 if (RemoveBraces()) { 859 Tok->MatchingParen = FormatTok; 860 FormatTok->MatchingParen = Tok; 861 } 862 863 size_t PPEndHash = computePPHash(); 864 865 // Munch the closing brace. 866 nextToken(/*LevelDifference=*/-AddLevels); 867 868 // When this is a function block and there is an unnecessary semicolon 869 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 870 // it later). 871 if (Style.RemoveSemicolon && IsFunctionRBrace) { 872 while (FormatTok->is(tok::semi)) { 873 FormatTok->Optional = true; 874 nextToken(); 875 } 876 } 877 878 HandleVerilogBlockLabel(); 879 880 if (MacroBlock && FormatTok->is(tok::l_paren)) 881 parseParens(); 882 883 Line->Level = InitialLevel; 884 885 if (FormatTok->is(tok::kw_noexcept)) { 886 // A noexcept in a requires expression. 887 nextToken(); 888 } 889 890 if (FormatTok->is(tok::arrow)) { 891 // Following the } or noexcept we can find a trailing return type arrow 892 // as part of an implicit conversion constraint. 893 nextToken(); 894 parseStructuralElement(); 895 } 896 897 if (MunchSemi && FormatTok->is(tok::semi)) 898 nextToken(); 899 900 if (PPStartHash == PPEndHash) { 901 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 902 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 903 // Update the opening line to add the forward reference as well 904 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 905 CurrentLines->size() - 1; 906 } 907 } 908 909 return IfLBrace; 910 } 911 912 static bool isGoogScope(const UnwrappedLine &Line) { 913 // FIXME: Closure-library specific stuff should not be hard-coded but be 914 // configurable. 915 if (Line.Tokens.size() < 4) 916 return false; 917 auto I = Line.Tokens.begin(); 918 if (I->Tok->TokenText != "goog") 919 return false; 920 ++I; 921 if (I->Tok->isNot(tok::period)) 922 return false; 923 ++I; 924 if (I->Tok->TokenText != "scope") 925 return false; 926 ++I; 927 return I->Tok->is(tok::l_paren); 928 } 929 930 static bool isIIFE(const UnwrappedLine &Line, 931 const AdditionalKeywords &Keywords) { 932 // Look for the start of an immediately invoked anonymous function. 933 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 934 // This is commonly done in JavaScript to create a new, anonymous scope. 935 // Example: (function() { ... })() 936 if (Line.Tokens.size() < 3) 937 return false; 938 auto I = Line.Tokens.begin(); 939 if (I->Tok->isNot(tok::l_paren)) 940 return false; 941 ++I; 942 if (I->Tok->isNot(Keywords.kw_function)) 943 return false; 944 ++I; 945 return I->Tok->is(tok::l_paren); 946 } 947 948 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 949 const FormatToken &InitialToken) { 950 tok::TokenKind Kind = InitialToken.Tok.getKind(); 951 if (InitialToken.is(TT_NamespaceMacro)) 952 Kind = tok::kw_namespace; 953 954 switch (Kind) { 955 case tok::kw_namespace: 956 return Style.BraceWrapping.AfterNamespace; 957 case tok::kw_class: 958 return Style.BraceWrapping.AfterClass; 959 case tok::kw_union: 960 return Style.BraceWrapping.AfterUnion; 961 case tok::kw_struct: 962 return Style.BraceWrapping.AfterStruct; 963 case tok::kw_enum: 964 return Style.BraceWrapping.AfterEnum; 965 default: 966 return false; 967 } 968 } 969 970 void UnwrappedLineParser::parseChildBlock() { 971 assert(FormatTok->is(tok::l_brace)); 972 FormatTok->setBlockKind(BK_Block); 973 const FormatToken *OpeningBrace = FormatTok; 974 nextToken(); 975 { 976 bool SkipIndent = (Style.isJavaScript() && 977 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 978 ScopedLineState LineState(*this); 979 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 980 /*MustBeDeclaration=*/false); 981 Line->Level += SkipIndent ? 0 : 1; 982 parseLevel(OpeningBrace); 983 flushComments(isOnNewLine(*FormatTok)); 984 Line->Level -= SkipIndent ? 0 : 1; 985 } 986 nextToken(); 987 } 988 989 void UnwrappedLineParser::parsePPDirective() { 990 assert(FormatTok->is(tok::hash) && "'#' expected"); 991 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 992 993 nextToken(); 994 995 if (!FormatTok->Tok.getIdentifierInfo()) { 996 parsePPUnknown(); 997 return; 998 } 999 1000 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1001 case tok::pp_define: 1002 parsePPDefine(); 1003 return; 1004 case tok::pp_if: 1005 parsePPIf(/*IfDef=*/false); 1006 break; 1007 case tok::pp_ifdef: 1008 case tok::pp_ifndef: 1009 parsePPIf(/*IfDef=*/true); 1010 break; 1011 case tok::pp_else: 1012 case tok::pp_elifdef: 1013 case tok::pp_elifndef: 1014 case tok::pp_elif: 1015 parsePPElse(); 1016 break; 1017 case tok::pp_endif: 1018 parsePPEndIf(); 1019 break; 1020 case tok::pp_pragma: 1021 parsePPPragma(); 1022 break; 1023 default: 1024 parsePPUnknown(); 1025 break; 1026 } 1027 } 1028 1029 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1030 size_t Line = CurrentLines->size(); 1031 if (CurrentLines == &PreprocessorDirectives) 1032 Line += Lines.size(); 1033 1034 if (Unreachable || 1035 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1036 PPStack.push_back({PP_Unreachable, Line}); 1037 } else { 1038 PPStack.push_back({PP_Conditional, Line}); 1039 } 1040 } 1041 1042 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1043 ++PPBranchLevel; 1044 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1045 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1046 PPLevelBranchIndex.push_back(0); 1047 PPLevelBranchCount.push_back(0); 1048 } 1049 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1050 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1051 conditionalCompilationCondition(Unreachable || Skip); 1052 } 1053 1054 void UnwrappedLineParser::conditionalCompilationAlternative() { 1055 if (!PPStack.empty()) 1056 PPStack.pop_back(); 1057 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1058 if (!PPChainBranchIndex.empty()) 1059 ++PPChainBranchIndex.top(); 1060 conditionalCompilationCondition( 1061 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1062 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1063 } 1064 1065 void UnwrappedLineParser::conditionalCompilationEnd() { 1066 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1067 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1068 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1069 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1070 } 1071 // Guard against #endif's without #if. 1072 if (PPBranchLevel > -1) 1073 --PPBranchLevel; 1074 if (!PPChainBranchIndex.empty()) 1075 PPChainBranchIndex.pop(); 1076 if (!PPStack.empty()) 1077 PPStack.pop_back(); 1078 } 1079 1080 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1081 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1082 nextToken(); 1083 bool Unreachable = false; 1084 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1085 Unreachable = true; 1086 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1087 Unreachable = true; 1088 conditionalCompilationStart(Unreachable); 1089 FormatToken *IfCondition = FormatTok; 1090 // If there's a #ifndef on the first line, and the only lines before it are 1091 // comments, it could be an include guard. 1092 bool MaybeIncludeGuard = IfNDef; 1093 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1094 for (auto &Line : Lines) { 1095 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1096 MaybeIncludeGuard = false; 1097 IncludeGuard = IG_Rejected; 1098 break; 1099 } 1100 } 1101 } 1102 --PPBranchLevel; 1103 parsePPUnknown(); 1104 ++PPBranchLevel; 1105 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1106 IncludeGuard = IG_IfNdefed; 1107 IncludeGuardToken = IfCondition; 1108 } 1109 } 1110 1111 void UnwrappedLineParser::parsePPElse() { 1112 // If a potential include guard has an #else, it's not an include guard. 1113 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1114 IncludeGuard = IG_Rejected; 1115 // Don't crash when there is an #else without an #if. 1116 assert(PPBranchLevel >= -1); 1117 if (PPBranchLevel == -1) 1118 conditionalCompilationStart(/*Unreachable=*/true); 1119 conditionalCompilationAlternative(); 1120 --PPBranchLevel; 1121 parsePPUnknown(); 1122 ++PPBranchLevel; 1123 } 1124 1125 void UnwrappedLineParser::parsePPEndIf() { 1126 conditionalCompilationEnd(); 1127 parsePPUnknown(); 1128 // If the #endif of a potential include guard is the last thing in the file, 1129 // then we found an include guard. 1130 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1131 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1132 IncludeGuard = IG_Found; 1133 } 1134 } 1135 1136 void UnwrappedLineParser::parsePPDefine() { 1137 nextToken(); 1138 1139 if (!FormatTok->Tok.getIdentifierInfo()) { 1140 IncludeGuard = IG_Rejected; 1141 IncludeGuardToken = nullptr; 1142 parsePPUnknown(); 1143 return; 1144 } 1145 1146 if (IncludeGuard == IG_IfNdefed && 1147 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1148 IncludeGuard = IG_Defined; 1149 IncludeGuardToken = nullptr; 1150 for (auto &Line : Lines) { 1151 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1152 IncludeGuard = IG_Rejected; 1153 break; 1154 } 1155 } 1156 } 1157 1158 // In the context of a define, even keywords should be treated as normal 1159 // identifiers. Setting the kind to identifier is not enough, because we need 1160 // to treat additional keywords like __except as well, which are already 1161 // identifiers. Setting the identifier info to null interferes with include 1162 // guard processing above, and changes preprocessing nesting. 1163 FormatTok->Tok.setKind(tok::identifier); 1164 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1165 nextToken(); 1166 if (FormatTok->Tok.getKind() == tok::l_paren && 1167 !FormatTok->hasWhitespaceBefore()) { 1168 parseParens(); 1169 } 1170 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1171 Line->Level += PPBranchLevel + 1; 1172 addUnwrappedLine(); 1173 ++Line->Level; 1174 1175 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1176 assert((int)Line->PPLevel >= 0); 1177 Line->InMacroBody = true; 1178 1179 if (Style.SkipMacroDefinitionBody) { 1180 do { 1181 FormatTok->Finalized = true; 1182 nextToken(); 1183 } while (!eof()); 1184 addUnwrappedLine(); 1185 return; 1186 } 1187 1188 if (FormatTok->is(tok::identifier) && 1189 Tokens->peekNextToken()->is(tok::colon)) { 1190 nextToken(); 1191 nextToken(); 1192 } 1193 1194 // Errors during a preprocessor directive can only affect the layout of the 1195 // preprocessor directive, and thus we ignore them. An alternative approach 1196 // would be to use the same approach we use on the file level (no 1197 // re-indentation if there was a structural error) within the macro 1198 // definition. 1199 parseFile(); 1200 } 1201 1202 void UnwrappedLineParser::parsePPPragma() { 1203 Line->InPragmaDirective = true; 1204 parsePPUnknown(); 1205 } 1206 1207 void UnwrappedLineParser::parsePPUnknown() { 1208 do { 1209 nextToken(); 1210 } while (!eof()); 1211 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1212 Line->Level += PPBranchLevel + 1; 1213 addUnwrappedLine(); 1214 } 1215 1216 // Here we exclude certain tokens that are not usually the first token in an 1217 // unwrapped line. This is used in attempt to distinguish macro calls without 1218 // trailing semicolons from other constructs split to several lines. 1219 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1220 // Semicolon can be a null-statement, l_square can be a start of a macro or 1221 // a C++11 attribute, but this doesn't seem to be common. 1222 assert(Tok.isNot(TT_AttributeSquare)); 1223 return !Tok.isOneOf(tok::semi, tok::l_brace, 1224 // Tokens that can only be used as binary operators and a 1225 // part of overloaded operator names. 1226 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1227 tok::less, tok::greater, tok::slash, tok::percent, 1228 tok::lessless, tok::greatergreater, tok::equal, 1229 tok::plusequal, tok::minusequal, tok::starequal, 1230 tok::slashequal, tok::percentequal, tok::ampequal, 1231 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1232 tok::lesslessequal, 1233 // Colon is used in labels, base class lists, initializer 1234 // lists, range-based for loops, ternary operator, but 1235 // should never be the first token in an unwrapped line. 1236 tok::colon, 1237 // 'noexcept' is a trailing annotation. 1238 tok::kw_noexcept); 1239 } 1240 1241 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1242 const FormatToken *FormatTok) { 1243 // FIXME: This returns true for C/C++ keywords like 'struct'. 1244 return FormatTok->is(tok::identifier) && 1245 (!FormatTok->Tok.getIdentifierInfo() || 1246 !FormatTok->isOneOf( 1247 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1248 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1249 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1250 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1251 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1252 Keywords.kw_instanceof, Keywords.kw_interface, 1253 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1254 } 1255 1256 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1257 const FormatToken *FormatTok) { 1258 return FormatTok->Tok.isLiteral() || 1259 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1260 mustBeJSIdent(Keywords, FormatTok); 1261 } 1262 1263 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1264 // when encountered after a value (see mustBeJSIdentOrValue). 1265 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1266 const FormatToken *FormatTok) { 1267 return FormatTok->isOneOf( 1268 tok::kw_return, Keywords.kw_yield, 1269 // conditionals 1270 tok::kw_if, tok::kw_else, 1271 // loops 1272 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1273 // switch/case 1274 tok::kw_switch, tok::kw_case, 1275 // exceptions 1276 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1277 // declaration 1278 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1279 Keywords.kw_async, Keywords.kw_function, 1280 // import/export 1281 Keywords.kw_import, tok::kw_export); 1282 } 1283 1284 // Checks whether a token is a type in K&R C (aka C78). 1285 static bool isC78Type(const FormatToken &Tok) { 1286 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1287 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1288 tok::identifier); 1289 } 1290 1291 // This function checks whether a token starts the first parameter declaration 1292 // in a K&R C (aka C78) function definition, e.g.: 1293 // int f(a, b) 1294 // short a, b; 1295 // { 1296 // return a + b; 1297 // } 1298 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1299 const FormatToken *FuncName) { 1300 assert(Tok); 1301 assert(Next); 1302 assert(FuncName); 1303 1304 if (FuncName->isNot(tok::identifier)) 1305 return false; 1306 1307 const FormatToken *Prev = FuncName->Previous; 1308 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1309 return false; 1310 1311 if (!isC78Type(*Tok) && 1312 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1313 return false; 1314 } 1315 1316 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1317 return false; 1318 1319 Tok = Tok->Previous; 1320 if (!Tok || Tok->isNot(tok::r_paren)) 1321 return false; 1322 1323 Tok = Tok->Previous; 1324 if (!Tok || Tok->isNot(tok::identifier)) 1325 return false; 1326 1327 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1328 } 1329 1330 bool UnwrappedLineParser::parseModuleImport() { 1331 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1332 1333 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1334 !Token->Tok.getIdentifierInfo() && 1335 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1336 return false; 1337 } 1338 1339 nextToken(); 1340 while (!eof()) { 1341 if (FormatTok->is(tok::colon)) { 1342 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1343 } 1344 // Handle import <foo/bar.h> as we would an include statement. 1345 else if (FormatTok->is(tok::less)) { 1346 nextToken(); 1347 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1348 // Mark tokens up to the trailing line comments as implicit string 1349 // literals. 1350 if (FormatTok->isNot(tok::comment) && 1351 !FormatTok->TokenText.starts_with("//")) { 1352 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1353 } 1354 nextToken(); 1355 } 1356 } 1357 if (FormatTok->is(tok::semi)) { 1358 nextToken(); 1359 break; 1360 } 1361 nextToken(); 1362 } 1363 1364 addUnwrappedLine(); 1365 return true; 1366 } 1367 1368 // readTokenWithJavaScriptASI reads the next token and terminates the current 1369 // line if JavaScript Automatic Semicolon Insertion must 1370 // happen between the current token and the next token. 1371 // 1372 // This method is conservative - it cannot cover all edge cases of JavaScript, 1373 // but only aims to correctly handle certain well known cases. It *must not* 1374 // return true in speculative cases. 1375 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1376 FormatToken *Previous = FormatTok; 1377 readToken(); 1378 FormatToken *Next = FormatTok; 1379 1380 bool IsOnSameLine = 1381 CommentsBeforeNextToken.empty() 1382 ? Next->NewlinesBefore == 0 1383 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1384 if (IsOnSameLine) 1385 return; 1386 1387 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1388 bool PreviousStartsTemplateExpr = 1389 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1390 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1391 // If the line contains an '@' sign, the previous token might be an 1392 // annotation, which can precede another identifier/value. 1393 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1394 return LineNode.Tok->is(tok::at); 1395 }); 1396 if (HasAt) 1397 return; 1398 } 1399 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1400 return addUnwrappedLine(); 1401 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1402 bool NextEndsTemplateExpr = 1403 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1404 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1405 (PreviousMustBeValue || 1406 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1407 tok::minusminus))) { 1408 return addUnwrappedLine(); 1409 } 1410 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1411 isJSDeclOrStmt(Keywords, Next)) { 1412 return addUnwrappedLine(); 1413 } 1414 } 1415 1416 void UnwrappedLineParser::parseStructuralElement( 1417 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1418 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1419 if (Style.Language == FormatStyle::LK_TableGen && 1420 FormatTok->is(tok::pp_include)) { 1421 nextToken(); 1422 if (FormatTok->is(tok::string_literal)) 1423 nextToken(); 1424 addUnwrappedLine(); 1425 return; 1426 } 1427 1428 if (Style.isCpp()) { 1429 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1430 } 1431 } else if (Style.isVerilog()) { 1432 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1433 parseForOrWhileLoop(/*HasParens=*/false); 1434 return; 1435 } 1436 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1437 parseForOrWhileLoop(); 1438 return; 1439 } 1440 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1441 Keywords.kw_assume, Keywords.kw_cover)) { 1442 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1443 return; 1444 } 1445 1446 // Skip things that can exist before keywords like 'if' and 'case'. 1447 while (true) { 1448 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1449 Keywords.kw_unique0)) { 1450 nextToken(); 1451 } else if (FormatTok->is(tok::l_paren) && 1452 Tokens->peekNextToken()->is(tok::star)) { 1453 parseParens(); 1454 } else { 1455 break; 1456 } 1457 } 1458 } 1459 1460 // Tokens that only make sense at the beginning of a line. 1461 switch (FormatTok->Tok.getKind()) { 1462 case tok::kw_asm: 1463 nextToken(); 1464 if (FormatTok->is(tok::l_brace)) { 1465 FormatTok->setFinalizedType(TT_InlineASMBrace); 1466 nextToken(); 1467 while (FormatTok && !eof()) { 1468 if (FormatTok->is(tok::r_brace)) { 1469 FormatTok->setFinalizedType(TT_InlineASMBrace); 1470 nextToken(); 1471 addUnwrappedLine(); 1472 break; 1473 } 1474 FormatTok->Finalized = true; 1475 nextToken(); 1476 } 1477 } 1478 break; 1479 case tok::kw_namespace: 1480 parseNamespace(); 1481 return; 1482 case tok::kw_public: 1483 case tok::kw_protected: 1484 case tok::kw_private: 1485 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1486 Style.isCSharp()) { 1487 nextToken(); 1488 } else { 1489 parseAccessSpecifier(); 1490 } 1491 return; 1492 case tok::kw_if: { 1493 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1494 // field/method declaration. 1495 break; 1496 } 1497 FormatToken *Tok = parseIfThenElse(IfKind); 1498 if (IfLeftBrace) 1499 *IfLeftBrace = Tok; 1500 return; 1501 } 1502 case tok::kw_for: 1503 case tok::kw_while: 1504 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1505 // field/method declaration. 1506 break; 1507 } 1508 parseForOrWhileLoop(); 1509 return; 1510 case tok::kw_do: 1511 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1512 // field/method declaration. 1513 break; 1514 } 1515 parseDoWhile(); 1516 if (HasDoWhile) 1517 *HasDoWhile = true; 1518 return; 1519 case tok::kw_switch: 1520 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1521 // 'switch: string' field declaration. 1522 break; 1523 } 1524 parseSwitch(); 1525 return; 1526 case tok::kw_default: 1527 // In Verilog default along with other labels are handled in the next loop. 1528 if (Style.isVerilog()) 1529 break; 1530 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1531 // 'default: string' field declaration. 1532 break; 1533 } 1534 nextToken(); 1535 if (FormatTok->is(tok::colon)) { 1536 FormatTok->setFinalizedType(TT_CaseLabelColon); 1537 parseLabel(); 1538 return; 1539 } 1540 // e.g. "default void f() {}" in a Java interface. 1541 break; 1542 case tok::kw_case: 1543 // Proto: there are no switch/case statements. 1544 if (Style.Language == FormatStyle::LK_Proto) { 1545 nextToken(); 1546 return; 1547 } 1548 if (Style.isVerilog()) { 1549 parseBlock(); 1550 addUnwrappedLine(); 1551 return; 1552 } 1553 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1554 // 'case: string' field declaration. 1555 nextToken(); 1556 break; 1557 } 1558 parseCaseLabel(); 1559 return; 1560 case tok::kw_try: 1561 case tok::kw___try: 1562 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1563 // field/method declaration. 1564 break; 1565 } 1566 parseTryCatch(); 1567 return; 1568 case tok::kw_extern: 1569 nextToken(); 1570 if (Style.isVerilog()) { 1571 // In Verilog and extern module declaration looks like a start of module. 1572 // But there is no body and endmodule. So we handle it separately. 1573 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1574 parseVerilogHierarchyHeader(); 1575 return; 1576 } 1577 } else if (FormatTok->is(tok::string_literal)) { 1578 nextToken(); 1579 if (FormatTok->is(tok::l_brace)) { 1580 if (Style.BraceWrapping.AfterExternBlock) 1581 addUnwrappedLine(); 1582 // Either we indent or for backwards compatibility we follow the 1583 // AfterExternBlock style. 1584 unsigned AddLevels = 1585 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1586 (Style.BraceWrapping.AfterExternBlock && 1587 Style.IndentExternBlock == 1588 FormatStyle::IEBS_AfterExternBlock) 1589 ? 1u 1590 : 0u; 1591 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1592 addUnwrappedLine(); 1593 return; 1594 } 1595 } 1596 break; 1597 case tok::kw_export: 1598 if (Style.isJavaScript()) { 1599 parseJavaScriptEs6ImportExport(); 1600 return; 1601 } 1602 if (Style.isCpp()) { 1603 nextToken(); 1604 if (FormatTok->is(tok::kw_namespace)) { 1605 parseNamespace(); 1606 return; 1607 } 1608 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1609 return; 1610 } 1611 break; 1612 case tok::kw_inline: 1613 nextToken(); 1614 if (FormatTok->is(tok::kw_namespace)) { 1615 parseNamespace(); 1616 return; 1617 } 1618 break; 1619 case tok::identifier: 1620 if (FormatTok->is(TT_ForEachMacro)) { 1621 parseForOrWhileLoop(); 1622 return; 1623 } 1624 if (FormatTok->is(TT_MacroBlockBegin)) { 1625 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1626 /*MunchSemi=*/false); 1627 return; 1628 } 1629 if (FormatTok->is(Keywords.kw_import)) { 1630 if (Style.isJavaScript()) { 1631 parseJavaScriptEs6ImportExport(); 1632 return; 1633 } 1634 if (Style.Language == FormatStyle::LK_Proto) { 1635 nextToken(); 1636 if (FormatTok->is(tok::kw_public)) 1637 nextToken(); 1638 if (FormatTok->isNot(tok::string_literal)) 1639 return; 1640 nextToken(); 1641 if (FormatTok->is(tok::semi)) 1642 nextToken(); 1643 addUnwrappedLine(); 1644 return; 1645 } 1646 if (Style.isCpp() && parseModuleImport()) 1647 return; 1648 } 1649 if (Style.isCpp() && 1650 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1651 Keywords.kw_slots, Keywords.kw_qslots)) { 1652 nextToken(); 1653 if (FormatTok->is(tok::colon)) { 1654 nextToken(); 1655 addUnwrappedLine(); 1656 return; 1657 } 1658 } 1659 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1660 parseStatementMacro(); 1661 return; 1662 } 1663 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1664 parseNamespace(); 1665 return; 1666 } 1667 // In Verilog labels can be any expression, so we don't do them here. 1668 // JS doesn't have macros, and within classes colons indicate fields, not 1669 // labels. 1670 // TableGen doesn't have labels. 1671 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1672 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1673 nextToken(); 1674 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1675 FormatTok->setFinalizedType(TT_GotoLabelColon); 1676 parseLabel(!Style.IndentGotoLabels); 1677 if (HasLabel) 1678 *HasLabel = true; 1679 return; 1680 } 1681 // In all other cases, parse the declaration. 1682 break; 1683 default: 1684 break; 1685 } 1686 1687 const bool InRequiresExpression = 1688 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 1689 do { 1690 const FormatToken *Previous = FormatTok->Previous; 1691 switch (FormatTok->Tok.getKind()) { 1692 case tok::at: 1693 nextToken(); 1694 if (FormatTok->is(tok::l_brace)) { 1695 nextToken(); 1696 parseBracedList(); 1697 break; 1698 } else if (Style.Language == FormatStyle::LK_Java && 1699 FormatTok->is(Keywords.kw_interface)) { 1700 nextToken(); 1701 break; 1702 } 1703 switch (FormatTok->Tok.getObjCKeywordID()) { 1704 case tok::objc_public: 1705 case tok::objc_protected: 1706 case tok::objc_package: 1707 case tok::objc_private: 1708 return parseAccessSpecifier(); 1709 case tok::objc_interface: 1710 case tok::objc_implementation: 1711 return parseObjCInterfaceOrImplementation(); 1712 case tok::objc_protocol: 1713 if (parseObjCProtocol()) 1714 return; 1715 break; 1716 case tok::objc_end: 1717 return; // Handled by the caller. 1718 case tok::objc_optional: 1719 case tok::objc_required: 1720 nextToken(); 1721 addUnwrappedLine(); 1722 return; 1723 case tok::objc_autoreleasepool: 1724 nextToken(); 1725 if (FormatTok->is(tok::l_brace)) { 1726 if (Style.BraceWrapping.AfterControlStatement == 1727 FormatStyle::BWACS_Always) { 1728 addUnwrappedLine(); 1729 } 1730 parseBlock(); 1731 } 1732 addUnwrappedLine(); 1733 return; 1734 case tok::objc_synchronized: 1735 nextToken(); 1736 if (FormatTok->is(tok::l_paren)) { 1737 // Skip synchronization object 1738 parseParens(); 1739 } 1740 if (FormatTok->is(tok::l_brace)) { 1741 if (Style.BraceWrapping.AfterControlStatement == 1742 FormatStyle::BWACS_Always) { 1743 addUnwrappedLine(); 1744 } 1745 parseBlock(); 1746 } 1747 addUnwrappedLine(); 1748 return; 1749 case tok::objc_try: 1750 // This branch isn't strictly necessary (the kw_try case below would 1751 // do this too after the tok::at is parsed above). But be explicit. 1752 parseTryCatch(); 1753 return; 1754 default: 1755 break; 1756 } 1757 break; 1758 case tok::kw_requires: { 1759 if (Style.isCpp()) { 1760 bool ParsedClause = parseRequires(); 1761 if (ParsedClause) 1762 return; 1763 } else { 1764 nextToken(); 1765 } 1766 break; 1767 } 1768 case tok::kw_enum: 1769 // Ignore if this is part of "template <enum ...". 1770 if (Previous && Previous->is(tok::less)) { 1771 nextToken(); 1772 break; 1773 } 1774 1775 // parseEnum falls through and does not yet add an unwrapped line as an 1776 // enum definition can start a structural element. 1777 if (!parseEnum()) 1778 break; 1779 // This only applies to C++ and Verilog. 1780 if (!Style.isCpp() && !Style.isVerilog()) { 1781 addUnwrappedLine(); 1782 return; 1783 } 1784 break; 1785 case tok::kw_typedef: 1786 nextToken(); 1787 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1788 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1789 Keywords.kw_CF_CLOSED_ENUM, 1790 Keywords.kw_NS_CLOSED_ENUM)) { 1791 parseEnum(); 1792 } 1793 break; 1794 case tok::kw_class: 1795 if (Style.isVerilog()) { 1796 parseBlock(); 1797 addUnwrappedLine(); 1798 return; 1799 } 1800 if (Style.isTableGen()) { 1801 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1802 // This is same as def and so on. 1803 nextToken(); 1804 break; 1805 } 1806 [[fallthrough]]; 1807 case tok::kw_struct: 1808 case tok::kw_union: 1809 if (parseStructLike()) 1810 return; 1811 break; 1812 case tok::kw_decltype: 1813 nextToken(); 1814 if (FormatTok->is(tok::l_paren)) { 1815 parseParens(); 1816 assert(FormatTok->Previous); 1817 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1818 tok::l_paren)) { 1819 Line->SeenDecltypeAuto = true; 1820 } 1821 } 1822 break; 1823 case tok::period: 1824 nextToken(); 1825 // In Java, classes have an implicit static member "class". 1826 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1827 FormatTok->is(tok::kw_class)) { 1828 nextToken(); 1829 } 1830 if (Style.isJavaScript() && FormatTok && 1831 FormatTok->Tok.getIdentifierInfo()) { 1832 // JavaScript only has pseudo keywords, all keywords are allowed to 1833 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1834 nextToken(); 1835 } 1836 break; 1837 case tok::semi: 1838 nextToken(); 1839 addUnwrappedLine(); 1840 return; 1841 case tok::r_brace: 1842 addUnwrappedLine(); 1843 return; 1844 case tok::l_paren: { 1845 parseParens(); 1846 // Break the unwrapped line if a K&R C function definition has a parameter 1847 // declaration. 1848 if (OpeningBrace || !Style.isCpp() || !Previous || eof()) 1849 break; 1850 if (isC78ParameterDecl(FormatTok, 1851 Tokens->peekNextToken(/*SkipComment=*/true), 1852 Previous)) { 1853 addUnwrappedLine(); 1854 return; 1855 } 1856 break; 1857 } 1858 case tok::kw_operator: 1859 nextToken(); 1860 if (FormatTok->isBinaryOperator()) 1861 nextToken(); 1862 break; 1863 case tok::caret: 1864 nextToken(); 1865 // Block return type. 1866 if (FormatTok->Tok.isAnyIdentifier() || 1867 FormatTok->isSimpleTypeSpecifier()) { 1868 nextToken(); 1869 // Return types: pointers are ok too. 1870 while (FormatTok->is(tok::star)) 1871 nextToken(); 1872 } 1873 // Block argument list. 1874 if (FormatTok->is(tok::l_paren)) 1875 parseParens(); 1876 // Block body. 1877 if (FormatTok->is(tok::l_brace)) 1878 parseChildBlock(); 1879 break; 1880 case tok::l_brace: 1881 if (InRequiresExpression) 1882 FormatTok->setFinalizedType(TT_BracedListLBrace); 1883 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1884 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1885 // A block outside of parentheses must be the last part of a 1886 // structural element. 1887 // FIXME: Figure out cases where this is not true, and add projections 1888 // for them (the one we know is missing are lambdas). 1889 if (Style.Language == FormatStyle::LK_Java && 1890 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1891 // If necessary, we could set the type to something different than 1892 // TT_FunctionLBrace. 1893 if (Style.BraceWrapping.AfterControlStatement == 1894 FormatStyle::BWACS_Always) { 1895 addUnwrappedLine(); 1896 } 1897 } else if (Style.BraceWrapping.AfterFunction) { 1898 addUnwrappedLine(); 1899 } 1900 FormatTok->setFinalizedType(TT_FunctionLBrace); 1901 parseBlock(); 1902 IsDecltypeAutoFunction = false; 1903 addUnwrappedLine(); 1904 return; 1905 } 1906 // Otherwise this was a braced init list, and the structural 1907 // element continues. 1908 break; 1909 case tok::kw_try: 1910 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1911 // field/method declaration. 1912 nextToken(); 1913 break; 1914 } 1915 // We arrive here when parsing function-try blocks. 1916 if (Style.BraceWrapping.AfterFunction) 1917 addUnwrappedLine(); 1918 parseTryCatch(); 1919 return; 1920 case tok::identifier: { 1921 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1922 Line->MustBeDeclaration) { 1923 addUnwrappedLine(); 1924 parseCSharpGenericTypeConstraint(); 1925 break; 1926 } 1927 if (FormatTok->is(TT_MacroBlockEnd)) { 1928 addUnwrappedLine(); 1929 return; 1930 } 1931 1932 // Function declarations (as opposed to function expressions) are parsed 1933 // on their own unwrapped line by continuing this loop. Function 1934 // expressions (functions that are not on their own line) must not create 1935 // a new unwrapped line, so they are special cased below. 1936 size_t TokenCount = Line->Tokens.size(); 1937 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1938 (TokenCount > 1 || 1939 (TokenCount == 1 && 1940 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1941 tryToParseJSFunction(); 1942 break; 1943 } 1944 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1945 FormatTok->is(Keywords.kw_interface)) { 1946 if (Style.isJavaScript()) { 1947 // In JavaScript/TypeScript, "interface" can be used as a standalone 1948 // identifier, e.g. in `var interface = 1;`. If "interface" is 1949 // followed by another identifier, it is very like to be an actual 1950 // interface declaration. 1951 unsigned StoredPosition = Tokens->getPosition(); 1952 FormatToken *Next = Tokens->getNextToken(); 1953 FormatTok = Tokens->setPosition(StoredPosition); 1954 if (!mustBeJSIdent(Keywords, Next)) { 1955 nextToken(); 1956 break; 1957 } 1958 } 1959 parseRecord(); 1960 addUnwrappedLine(); 1961 return; 1962 } 1963 1964 if (Style.isVerilog()) { 1965 if (FormatTok->is(Keywords.kw_table)) { 1966 parseVerilogTable(); 1967 return; 1968 } 1969 if (Keywords.isVerilogBegin(*FormatTok) || 1970 Keywords.isVerilogHierarchy(*FormatTok)) { 1971 parseBlock(); 1972 addUnwrappedLine(); 1973 return; 1974 } 1975 } 1976 1977 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) { 1978 if (parseStructLike()) 1979 return; 1980 break; 1981 } 1982 1983 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1984 parseStatementMacro(); 1985 return; 1986 } 1987 1988 // See if the following token should start a new unwrapped line. 1989 StringRef Text = FormatTok->TokenText; 1990 1991 FormatToken *PreviousToken = FormatTok; 1992 nextToken(); 1993 1994 // JS doesn't have macros, and within classes colons indicate fields, not 1995 // labels. 1996 if (Style.isJavaScript()) 1997 break; 1998 1999 auto OneTokenSoFar = [&]() { 2000 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2001 while (I != E && I->Tok->is(tok::comment)) 2002 ++I; 2003 if (Style.isVerilog()) 2004 while (I != E && I->Tok->is(tok::hash)) 2005 ++I; 2006 return I != E && (++I == E); 2007 }; 2008 if (OneTokenSoFar()) { 2009 // Recognize function-like macro usages without trailing semicolon as 2010 // well as free-standing macros like Q_OBJECT. 2011 bool FunctionLike = FormatTok->is(tok::l_paren); 2012 if (FunctionLike) 2013 parseParens(); 2014 2015 bool FollowedByNewline = 2016 CommentsBeforeNextToken.empty() 2017 ? FormatTok->NewlinesBefore > 0 2018 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2019 2020 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2021 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2022 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2023 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2024 addUnwrappedLine(); 2025 return; 2026 } 2027 } 2028 break; 2029 } 2030 case tok::equal: 2031 if ((Style.isJavaScript() || Style.isCSharp()) && 2032 FormatTok->is(TT_FatArrow)) { 2033 tryToParseChildBlock(); 2034 break; 2035 } 2036 2037 nextToken(); 2038 if (FormatTok->is(tok::l_brace)) { 2039 // Block kind should probably be set to BK_BracedInit for any language. 2040 // C# needs this change to ensure that array initialisers and object 2041 // initialisers are indented the same way. 2042 if (Style.isCSharp()) 2043 FormatTok->setBlockKind(BK_BracedInit); 2044 // TableGen's defset statement has syntax of the form, 2045 // `defset <type> <name> = { <statement>... }` 2046 if (Style.isTableGen() && 2047 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2048 FormatTok->setFinalizedType(TT_FunctionLBrace); 2049 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2050 /*MunchSemi=*/false); 2051 addUnwrappedLine(); 2052 break; 2053 } 2054 nextToken(); 2055 parseBracedList(); 2056 } else if (Style.Language == FormatStyle::LK_Proto && 2057 FormatTok->is(tok::less)) { 2058 nextToken(); 2059 parseBracedList(/*IsAngleBracket=*/true); 2060 } 2061 break; 2062 case tok::l_square: 2063 parseSquare(); 2064 break; 2065 case tok::kw_new: 2066 parseNew(); 2067 break; 2068 case tok::kw_case: 2069 // Proto: there are no switch/case statements. 2070 if (Style.Language == FormatStyle::LK_Proto) { 2071 nextToken(); 2072 return; 2073 } 2074 // In Verilog switch is called case. 2075 if (Style.isVerilog()) { 2076 parseBlock(); 2077 addUnwrappedLine(); 2078 return; 2079 } 2080 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2081 // 'case: string' field declaration. 2082 nextToken(); 2083 break; 2084 } 2085 parseCaseLabel(); 2086 break; 2087 case tok::kw_default: 2088 nextToken(); 2089 if (Style.isVerilog()) { 2090 if (FormatTok->is(tok::colon)) { 2091 // The label will be handled in the next iteration. 2092 break; 2093 } 2094 if (FormatTok->is(Keywords.kw_clocking)) { 2095 // A default clocking block. 2096 parseBlock(); 2097 addUnwrappedLine(); 2098 return; 2099 } 2100 parseVerilogCaseLabel(); 2101 return; 2102 } 2103 break; 2104 case tok::colon: 2105 nextToken(); 2106 if (Style.isVerilog()) { 2107 parseVerilogCaseLabel(); 2108 return; 2109 } 2110 break; 2111 default: 2112 nextToken(); 2113 break; 2114 } 2115 } while (!eof()); 2116 } 2117 2118 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2119 assert(FormatTok->is(tok::l_brace)); 2120 if (!Style.isCSharp()) 2121 return false; 2122 // See if it's a property accessor. 2123 if (FormatTok->Previous->isNot(tok::identifier)) 2124 return false; 2125 2126 // See if we are inside a property accessor. 2127 // 2128 // Record the current tokenPosition so that we can advance and 2129 // reset the current token. `Next` is not set yet so we need 2130 // another way to advance along the token stream. 2131 unsigned int StoredPosition = Tokens->getPosition(); 2132 FormatToken *Tok = Tokens->getNextToken(); 2133 2134 // A trivial property accessor is of the form: 2135 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2136 // Track these as they do not require line breaks to be introduced. 2137 bool HasSpecialAccessor = false; 2138 bool IsTrivialPropertyAccessor = true; 2139 while (!eof()) { 2140 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 2141 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 2142 Keywords.kw_init, Keywords.kw_set)) { 2143 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2144 HasSpecialAccessor = true; 2145 Tok = Tokens->getNextToken(); 2146 continue; 2147 } 2148 if (Tok->isNot(tok::r_brace)) 2149 IsTrivialPropertyAccessor = false; 2150 break; 2151 } 2152 2153 if (!HasSpecialAccessor) { 2154 Tokens->setPosition(StoredPosition); 2155 return false; 2156 } 2157 2158 // Try to parse the property accessor: 2159 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2160 Tokens->setPosition(StoredPosition); 2161 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2162 addUnwrappedLine(); 2163 nextToken(); 2164 do { 2165 switch (FormatTok->Tok.getKind()) { 2166 case tok::r_brace: 2167 nextToken(); 2168 if (FormatTok->is(tok::equal)) { 2169 while (!eof() && FormatTok->isNot(tok::semi)) 2170 nextToken(); 2171 nextToken(); 2172 } 2173 addUnwrappedLine(); 2174 return true; 2175 case tok::l_brace: 2176 ++Line->Level; 2177 parseBlock(/*MustBeDeclaration=*/true); 2178 addUnwrappedLine(); 2179 --Line->Level; 2180 break; 2181 case tok::equal: 2182 if (FormatTok->is(TT_FatArrow)) { 2183 ++Line->Level; 2184 do { 2185 nextToken(); 2186 } while (!eof() && FormatTok->isNot(tok::semi)); 2187 nextToken(); 2188 addUnwrappedLine(); 2189 --Line->Level; 2190 break; 2191 } 2192 nextToken(); 2193 break; 2194 default: 2195 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2196 Keywords.kw_set) && 2197 !IsTrivialPropertyAccessor) { 2198 // Non-trivial get/set needs to be on its own line. 2199 addUnwrappedLine(); 2200 } 2201 nextToken(); 2202 } 2203 } while (!eof()); 2204 2205 // Unreachable for well-formed code (paired '{' and '}'). 2206 return true; 2207 } 2208 2209 bool UnwrappedLineParser::tryToParseLambda() { 2210 assert(FormatTok->is(tok::l_square)); 2211 if (!Style.isCpp()) { 2212 nextToken(); 2213 return false; 2214 } 2215 FormatToken &LSquare = *FormatTok; 2216 if (!tryToParseLambdaIntroducer()) 2217 return false; 2218 2219 bool SeenArrow = false; 2220 bool InTemplateParameterList = false; 2221 2222 while (FormatTok->isNot(tok::l_brace)) { 2223 if (FormatTok->isSimpleTypeSpecifier()) { 2224 nextToken(); 2225 continue; 2226 } 2227 switch (FormatTok->Tok.getKind()) { 2228 case tok::l_brace: 2229 break; 2230 case tok::l_paren: 2231 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2232 break; 2233 case tok::l_square: 2234 parseSquare(); 2235 break; 2236 case tok::less: 2237 assert(FormatTok->Previous); 2238 if (FormatTok->Previous->is(tok::r_square)) 2239 InTemplateParameterList = true; 2240 nextToken(); 2241 break; 2242 case tok::kw_auto: 2243 case tok::kw_class: 2244 case tok::kw_template: 2245 case tok::kw_typename: 2246 case tok::amp: 2247 case tok::star: 2248 case tok::kw_const: 2249 case tok::kw_constexpr: 2250 case tok::kw_consteval: 2251 case tok::comma: 2252 case tok::greater: 2253 case tok::identifier: 2254 case tok::numeric_constant: 2255 case tok::coloncolon: 2256 case tok::kw_mutable: 2257 case tok::kw_noexcept: 2258 case tok::kw_static: 2259 nextToken(); 2260 break; 2261 // Specialization of a template with an integer parameter can contain 2262 // arithmetic, logical, comparison and ternary operators. 2263 // 2264 // FIXME: This also accepts sequences of operators that are not in the scope 2265 // of a template argument list. 2266 // 2267 // In a C++ lambda a template type can only occur after an arrow. We use 2268 // this as an heuristic to distinguish between Objective-C expressions 2269 // followed by an `a->b` expression, such as: 2270 // ([obj func:arg] + a->b) 2271 // Otherwise the code below would parse as a lambda. 2272 case tok::plus: 2273 case tok::minus: 2274 case tok::exclaim: 2275 case tok::tilde: 2276 case tok::slash: 2277 case tok::percent: 2278 case tok::lessless: 2279 case tok::pipe: 2280 case tok::pipepipe: 2281 case tok::ampamp: 2282 case tok::caret: 2283 case tok::equalequal: 2284 case tok::exclaimequal: 2285 case tok::greaterequal: 2286 case tok::lessequal: 2287 case tok::question: 2288 case tok::colon: 2289 case tok::ellipsis: 2290 case tok::kw_true: 2291 case tok::kw_false: 2292 if (SeenArrow || InTemplateParameterList) { 2293 nextToken(); 2294 break; 2295 } 2296 return true; 2297 case tok::arrow: 2298 // This might or might not actually be a lambda arrow (this could be an 2299 // ObjC method invocation followed by a dereferencing arrow). We might 2300 // reset this back to TT_Unknown in TokenAnnotator. 2301 FormatTok->setFinalizedType(TT_TrailingReturnArrow); 2302 SeenArrow = true; 2303 nextToken(); 2304 break; 2305 case tok::kw_requires: { 2306 auto *RequiresToken = FormatTok; 2307 nextToken(); 2308 parseRequiresClause(RequiresToken); 2309 break; 2310 } 2311 case tok::equal: 2312 if (!InTemplateParameterList) 2313 return true; 2314 nextToken(); 2315 break; 2316 default: 2317 return true; 2318 } 2319 } 2320 2321 FormatTok->setFinalizedType(TT_LambdaLBrace); 2322 LSquare.setFinalizedType(TT_LambdaLSquare); 2323 2324 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2325 parseChildBlock(); 2326 assert(!NestedLambdas.empty()); 2327 NestedLambdas.pop_back(); 2328 2329 return true; 2330 } 2331 2332 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2333 const FormatToken *Previous = FormatTok->Previous; 2334 const FormatToken *LeftSquare = FormatTok; 2335 nextToken(); 2336 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2337 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2338 tok::kw_co_yield, tok::kw_co_return)) || 2339 Previous->closesScope())) || 2340 LeftSquare->isCppStructuredBinding(Style)) { 2341 return false; 2342 } 2343 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2344 return false; 2345 if (FormatTok->is(tok::r_square)) { 2346 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2347 if (Next->is(tok::greater)) 2348 return false; 2349 } 2350 parseSquare(/*LambdaIntroducer=*/true); 2351 return true; 2352 } 2353 2354 void UnwrappedLineParser::tryToParseJSFunction() { 2355 assert(FormatTok->is(Keywords.kw_function)); 2356 if (FormatTok->is(Keywords.kw_async)) 2357 nextToken(); 2358 // Consume "function". 2359 nextToken(); 2360 2361 // Consume * (generator function). Treat it like C++'s overloaded operators. 2362 if (FormatTok->is(tok::star)) { 2363 FormatTok->setFinalizedType(TT_OverloadedOperator); 2364 nextToken(); 2365 } 2366 2367 // Consume function name. 2368 if (FormatTok->is(tok::identifier)) 2369 nextToken(); 2370 2371 if (FormatTok->isNot(tok::l_paren)) 2372 return; 2373 2374 // Parse formal parameter list. 2375 parseParens(); 2376 2377 if (FormatTok->is(tok::colon)) { 2378 // Parse a type definition. 2379 nextToken(); 2380 2381 // Eat the type declaration. For braced inline object types, balance braces, 2382 // otherwise just parse until finding an l_brace for the function body. 2383 if (FormatTok->is(tok::l_brace)) 2384 tryToParseBracedList(); 2385 else 2386 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2387 nextToken(); 2388 } 2389 2390 if (FormatTok->is(tok::semi)) 2391 return; 2392 2393 parseChildBlock(); 2394 } 2395 2396 bool UnwrappedLineParser::tryToParseBracedList() { 2397 if (FormatTok->is(BK_Unknown)) 2398 calculateBraceTypes(); 2399 assert(FormatTok->isNot(BK_Unknown)); 2400 if (FormatTok->is(BK_Block)) 2401 return false; 2402 nextToken(); 2403 parseBracedList(); 2404 return true; 2405 } 2406 2407 bool UnwrappedLineParser::tryToParseChildBlock() { 2408 assert(Style.isJavaScript() || Style.isCSharp()); 2409 assert(FormatTok->is(TT_FatArrow)); 2410 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2411 // They always start an expression or a child block if followed by a curly 2412 // brace. 2413 nextToken(); 2414 if (FormatTok->isNot(tok::l_brace)) 2415 return false; 2416 parseChildBlock(); 2417 return true; 2418 } 2419 2420 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2421 bool HasError = false; 2422 2423 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2424 // replace this by using parseAssignmentExpression() inside. 2425 do { 2426 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2427 tryToParseChildBlock()) { 2428 continue; 2429 } 2430 if (Style.isJavaScript()) { 2431 if (FormatTok->is(Keywords.kw_function)) { 2432 tryToParseJSFunction(); 2433 continue; 2434 } 2435 if (FormatTok->is(tok::l_brace)) { 2436 // Could be a method inside of a braced list `{a() { return 1; }}`. 2437 if (tryToParseBracedList()) 2438 continue; 2439 parseChildBlock(); 2440 } 2441 } 2442 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2443 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2444 addUnwrappedLine(); 2445 nextToken(); 2446 return !HasError; 2447 } 2448 switch (FormatTok->Tok.getKind()) { 2449 case tok::l_square: 2450 if (Style.isCSharp()) 2451 parseSquare(); 2452 else 2453 tryToParseLambda(); 2454 break; 2455 case tok::l_paren: 2456 parseParens(); 2457 // JavaScript can just have free standing methods and getters/setters in 2458 // object literals. Detect them by a "{" following ")". 2459 if (Style.isJavaScript()) { 2460 if (FormatTok->is(tok::l_brace)) 2461 parseChildBlock(); 2462 break; 2463 } 2464 break; 2465 case tok::l_brace: 2466 // Assume there are no blocks inside a braced init list apart 2467 // from the ones we explicitly parse out (like lambdas). 2468 FormatTok->setBlockKind(BK_BracedInit); 2469 nextToken(); 2470 parseBracedList(); 2471 break; 2472 case tok::less: 2473 nextToken(); 2474 if (IsAngleBracket) 2475 parseBracedList(/*IsAngleBracket=*/true); 2476 break; 2477 case tok::semi: 2478 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2479 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2480 // used for error recovery if we have otherwise determined that this is 2481 // a braced list. 2482 if (Style.isJavaScript()) { 2483 nextToken(); 2484 break; 2485 } 2486 HasError = true; 2487 if (!IsEnum) 2488 return false; 2489 nextToken(); 2490 break; 2491 case tok::comma: 2492 nextToken(); 2493 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2494 addUnwrappedLine(); 2495 break; 2496 default: 2497 nextToken(); 2498 break; 2499 } 2500 } while (!eof()); 2501 return false; 2502 } 2503 2504 /// \brief Parses a pair of parentheses (and everything between them). 2505 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2506 /// double ampersands. This applies for all nested scopes as well. 2507 /// 2508 /// Returns whether there is a `=` token between the parentheses. 2509 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2510 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2511 auto *LeftParen = FormatTok; 2512 bool SeenEqual = false; 2513 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2514 nextToken(); 2515 do { 2516 switch (FormatTok->Tok.getKind()) { 2517 case tok::l_paren: 2518 if (parseParens(AmpAmpTokenType)) 2519 SeenEqual = true; 2520 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2521 parseChildBlock(); 2522 break; 2523 case tok::r_paren: 2524 if (!MightBeStmtExpr && !Line->InMacroBody && 2525 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2526 const auto *Prev = LeftParen->Previous; 2527 const auto *Next = Tokens->peekNextToken(); 2528 const bool DoubleParens = 2529 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2530 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2531 const bool Blacklisted = 2532 PrevPrev && 2533 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2534 (SeenEqual && 2535 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2536 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2537 const bool ReturnParens = 2538 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2539 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2540 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2541 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2542 Next->is(tok::semi); 2543 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2544 LeftParen->Optional = true; 2545 FormatTok->Optional = true; 2546 } 2547 } 2548 nextToken(); 2549 return SeenEqual; 2550 case tok::r_brace: 2551 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2552 return SeenEqual; 2553 case tok::l_square: 2554 tryToParseLambda(); 2555 break; 2556 case tok::l_brace: 2557 if (!tryToParseBracedList()) 2558 parseChildBlock(); 2559 break; 2560 case tok::at: 2561 nextToken(); 2562 if (FormatTok->is(tok::l_brace)) { 2563 nextToken(); 2564 parseBracedList(); 2565 } 2566 break; 2567 case tok::equal: 2568 SeenEqual = true; 2569 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2570 tryToParseChildBlock(); 2571 else 2572 nextToken(); 2573 break; 2574 case tok::kw_class: 2575 if (Style.isJavaScript()) 2576 parseRecord(/*ParseAsExpr=*/true); 2577 else 2578 nextToken(); 2579 break; 2580 case tok::identifier: 2581 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2582 tryToParseJSFunction(); 2583 else 2584 nextToken(); 2585 break; 2586 case tok::kw_requires: { 2587 auto RequiresToken = FormatTok; 2588 nextToken(); 2589 parseRequiresExpression(RequiresToken); 2590 break; 2591 } 2592 case tok::ampamp: 2593 if (AmpAmpTokenType != TT_Unknown) 2594 FormatTok->setFinalizedType(AmpAmpTokenType); 2595 [[fallthrough]]; 2596 default: 2597 nextToken(); 2598 break; 2599 } 2600 } while (!eof()); 2601 return SeenEqual; 2602 } 2603 2604 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2605 if (!LambdaIntroducer) { 2606 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2607 if (tryToParseLambda()) 2608 return; 2609 } 2610 do { 2611 switch (FormatTok->Tok.getKind()) { 2612 case tok::l_paren: 2613 parseParens(); 2614 break; 2615 case tok::r_square: 2616 nextToken(); 2617 return; 2618 case tok::r_brace: 2619 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2620 return; 2621 case tok::l_square: 2622 parseSquare(); 2623 break; 2624 case tok::l_brace: { 2625 if (!tryToParseBracedList()) 2626 parseChildBlock(); 2627 break; 2628 } 2629 case tok::at: 2630 nextToken(); 2631 if (FormatTok->is(tok::l_brace)) { 2632 nextToken(); 2633 parseBracedList(); 2634 } 2635 break; 2636 default: 2637 nextToken(); 2638 break; 2639 } 2640 } while (!eof()); 2641 } 2642 2643 void UnwrappedLineParser::keepAncestorBraces() { 2644 if (!Style.RemoveBracesLLVM) 2645 return; 2646 2647 const int MaxNestingLevels = 2; 2648 const int Size = NestedTooDeep.size(); 2649 if (Size >= MaxNestingLevels) 2650 NestedTooDeep[Size - MaxNestingLevels] = true; 2651 NestedTooDeep.push_back(false); 2652 } 2653 2654 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2655 for (const auto &Token : llvm::reverse(Line.Tokens)) 2656 if (Token.Tok->isNot(tok::comment)) 2657 return Token.Tok; 2658 2659 return nullptr; 2660 } 2661 2662 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2663 FormatToken *Tok = nullptr; 2664 2665 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2666 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2667 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2668 ? getLastNonComment(*Line) 2669 : Line->Tokens.back().Tok; 2670 assert(Tok); 2671 if (Tok->BraceCount < 0) { 2672 assert(Tok->BraceCount == -1); 2673 Tok = nullptr; 2674 } else { 2675 Tok->BraceCount = -1; 2676 } 2677 } 2678 2679 addUnwrappedLine(); 2680 ++Line->Level; 2681 parseStructuralElement(); 2682 2683 if (Tok) { 2684 assert(!Line->InPPDirective); 2685 Tok = nullptr; 2686 for (const auto &L : llvm::reverse(*CurrentLines)) { 2687 if (!L.InPPDirective && getLastNonComment(L)) { 2688 Tok = L.Tokens.back().Tok; 2689 break; 2690 } 2691 } 2692 assert(Tok); 2693 ++Tok->BraceCount; 2694 } 2695 2696 if (CheckEOF && eof()) 2697 addUnwrappedLine(); 2698 2699 --Line->Level; 2700 } 2701 2702 static void markOptionalBraces(FormatToken *LeftBrace) { 2703 if (!LeftBrace) 2704 return; 2705 2706 assert(LeftBrace->is(tok::l_brace)); 2707 2708 FormatToken *RightBrace = LeftBrace->MatchingParen; 2709 if (!RightBrace) { 2710 assert(!LeftBrace->Optional); 2711 return; 2712 } 2713 2714 assert(RightBrace->is(tok::r_brace)); 2715 assert(RightBrace->MatchingParen == LeftBrace); 2716 assert(LeftBrace->Optional == RightBrace->Optional); 2717 2718 LeftBrace->Optional = true; 2719 RightBrace->Optional = true; 2720 } 2721 2722 void UnwrappedLineParser::handleAttributes() { 2723 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2724 if (FormatTok->isAttribute()) 2725 nextToken(); 2726 else if (FormatTok->is(tok::l_square)) 2727 handleCppAttributes(); 2728 } 2729 2730 bool UnwrappedLineParser::handleCppAttributes() { 2731 // Handle [[likely]] / [[unlikely]] attributes. 2732 assert(FormatTok->is(tok::l_square)); 2733 if (!tryToParseSimpleAttribute()) 2734 return false; 2735 parseSquare(); 2736 return true; 2737 } 2738 2739 /// Returns whether \c Tok begins a block. 2740 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2741 // FIXME: rename the function or make 2742 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2743 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2744 : Tok.is(tok::l_brace); 2745 } 2746 2747 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2748 bool KeepBraces, 2749 bool IsVerilogAssert) { 2750 assert((FormatTok->is(tok::kw_if) || 2751 (Style.isVerilog() && 2752 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2753 Keywords.kw_assume, Keywords.kw_cover))) && 2754 "'if' expected"); 2755 nextToken(); 2756 2757 if (IsVerilogAssert) { 2758 // Handle `assert #0` and `assert final`. 2759 if (FormatTok->is(Keywords.kw_verilogHash)) { 2760 nextToken(); 2761 if (FormatTok->is(tok::numeric_constant)) 2762 nextToken(); 2763 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2764 Keywords.kw_sequence)) { 2765 nextToken(); 2766 } 2767 } 2768 2769 // TableGen's if statement has the form of `if <cond> then { ... }`. 2770 if (Style.isTableGen()) { 2771 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2772 // Simply skip until then. This range only contains a value. 2773 nextToken(); 2774 } 2775 } 2776 2777 // Handle `if !consteval`. 2778 if (FormatTok->is(tok::exclaim)) 2779 nextToken(); 2780 2781 bool KeepIfBraces = true; 2782 if (FormatTok->is(tok::kw_consteval)) { 2783 nextToken(); 2784 } else { 2785 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2786 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2787 nextToken(); 2788 if (FormatTok->is(tok::l_paren)) { 2789 FormatTok->setFinalizedType(TT_ConditionLParen); 2790 parseParens(); 2791 } 2792 } 2793 handleAttributes(); 2794 // The then action is optional in Verilog assert statements. 2795 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2796 nextToken(); 2797 addUnwrappedLine(); 2798 return nullptr; 2799 } 2800 2801 bool NeedsUnwrappedLine = false; 2802 keepAncestorBraces(); 2803 2804 FormatToken *IfLeftBrace = nullptr; 2805 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2806 2807 if (isBlockBegin(*FormatTok)) { 2808 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2809 IfLeftBrace = FormatTok; 2810 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2811 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2812 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2813 setPreviousRBraceType(TT_ControlStatementRBrace); 2814 if (Style.BraceWrapping.BeforeElse) 2815 addUnwrappedLine(); 2816 else 2817 NeedsUnwrappedLine = true; 2818 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2819 addUnwrappedLine(); 2820 } else { 2821 parseUnbracedBody(); 2822 } 2823 2824 if (Style.RemoveBracesLLVM) { 2825 assert(!NestedTooDeep.empty()); 2826 KeepIfBraces = KeepIfBraces || 2827 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2828 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2829 IfBlockKind == IfStmtKind::IfElseIf; 2830 } 2831 2832 bool KeepElseBraces = KeepIfBraces; 2833 FormatToken *ElseLeftBrace = nullptr; 2834 IfStmtKind Kind = IfStmtKind::IfOnly; 2835 2836 if (FormatTok->is(tok::kw_else)) { 2837 if (Style.RemoveBracesLLVM) { 2838 NestedTooDeep.back() = false; 2839 Kind = IfStmtKind::IfElse; 2840 } 2841 nextToken(); 2842 handleAttributes(); 2843 if (isBlockBegin(*FormatTok)) { 2844 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2845 FormatTok->setFinalizedType(TT_ElseLBrace); 2846 ElseLeftBrace = FormatTok; 2847 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2848 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2849 FormatToken *IfLBrace = 2850 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2851 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2852 setPreviousRBraceType(TT_ElseRBrace); 2853 if (FormatTok->is(tok::kw_else)) { 2854 KeepElseBraces = KeepElseBraces || 2855 ElseBlockKind == IfStmtKind::IfOnly || 2856 ElseBlockKind == IfStmtKind::IfElseIf; 2857 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2858 KeepElseBraces = true; 2859 assert(ElseLeftBrace->MatchingParen); 2860 markOptionalBraces(ElseLeftBrace); 2861 } 2862 addUnwrappedLine(); 2863 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2864 const FormatToken *Previous = Tokens->getPreviousToken(); 2865 assert(Previous); 2866 const bool IsPrecededByComment = Previous->is(tok::comment); 2867 if (IsPrecededByComment) { 2868 addUnwrappedLine(); 2869 ++Line->Level; 2870 } 2871 bool TooDeep = true; 2872 if (Style.RemoveBracesLLVM) { 2873 Kind = IfStmtKind::IfElseIf; 2874 TooDeep = NestedTooDeep.pop_back_val(); 2875 } 2876 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2877 if (Style.RemoveBracesLLVM) 2878 NestedTooDeep.push_back(TooDeep); 2879 if (IsPrecededByComment) 2880 --Line->Level; 2881 } else { 2882 parseUnbracedBody(/*CheckEOF=*/true); 2883 } 2884 } else { 2885 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2886 if (NeedsUnwrappedLine) 2887 addUnwrappedLine(); 2888 } 2889 2890 if (!Style.RemoveBracesLLVM) 2891 return nullptr; 2892 2893 assert(!NestedTooDeep.empty()); 2894 KeepElseBraces = KeepElseBraces || 2895 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2896 NestedTooDeep.back(); 2897 2898 NestedTooDeep.pop_back(); 2899 2900 if (!KeepIfBraces && !KeepElseBraces) { 2901 markOptionalBraces(IfLeftBrace); 2902 markOptionalBraces(ElseLeftBrace); 2903 } else if (IfLeftBrace) { 2904 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2905 if (IfRightBrace) { 2906 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2907 assert(!IfLeftBrace->Optional); 2908 assert(!IfRightBrace->Optional); 2909 IfLeftBrace->MatchingParen = nullptr; 2910 IfRightBrace->MatchingParen = nullptr; 2911 } 2912 } 2913 2914 if (IfKind) 2915 *IfKind = Kind; 2916 2917 return IfLeftBrace; 2918 } 2919 2920 void UnwrappedLineParser::parseTryCatch() { 2921 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2922 nextToken(); 2923 bool NeedsUnwrappedLine = false; 2924 if (FormatTok->is(tok::colon)) { 2925 // We are in a function try block, what comes is an initializer list. 2926 nextToken(); 2927 2928 // In case identifiers were removed by clang-tidy, what might follow is 2929 // multiple commas in sequence - before the first identifier. 2930 while (FormatTok->is(tok::comma)) 2931 nextToken(); 2932 2933 while (FormatTok->is(tok::identifier)) { 2934 nextToken(); 2935 if (FormatTok->is(tok::l_paren)) 2936 parseParens(); 2937 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2938 FormatTok->is(tok::l_brace)) { 2939 do { 2940 nextToken(); 2941 } while (FormatTok->isNot(tok::r_brace)); 2942 nextToken(); 2943 } 2944 2945 // In case identifiers were removed by clang-tidy, what might follow is 2946 // multiple commas in sequence - after the first identifier. 2947 while (FormatTok->is(tok::comma)) 2948 nextToken(); 2949 } 2950 } 2951 // Parse try with resource. 2952 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2953 parseParens(); 2954 2955 keepAncestorBraces(); 2956 2957 if (FormatTok->is(tok::l_brace)) { 2958 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2959 parseBlock(); 2960 if (Style.BraceWrapping.BeforeCatch) 2961 addUnwrappedLine(); 2962 else 2963 NeedsUnwrappedLine = true; 2964 } else if (FormatTok->isNot(tok::kw_catch)) { 2965 // The C++ standard requires a compound-statement after a try. 2966 // If there's none, we try to assume there's a structuralElement 2967 // and try to continue. 2968 addUnwrappedLine(); 2969 ++Line->Level; 2970 parseStructuralElement(); 2971 --Line->Level; 2972 } 2973 while (true) { 2974 if (FormatTok->is(tok::at)) 2975 nextToken(); 2976 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2977 tok::kw___finally) || 2978 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2979 FormatTok->is(Keywords.kw_finally)) || 2980 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2981 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2982 break; 2983 } 2984 nextToken(); 2985 while (FormatTok->isNot(tok::l_brace)) { 2986 if (FormatTok->is(tok::l_paren)) { 2987 parseParens(); 2988 continue; 2989 } 2990 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2991 if (Style.RemoveBracesLLVM) 2992 NestedTooDeep.pop_back(); 2993 return; 2994 } 2995 nextToken(); 2996 } 2997 NeedsUnwrappedLine = false; 2998 Line->MustBeDeclaration = false; 2999 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3000 parseBlock(); 3001 if (Style.BraceWrapping.BeforeCatch) 3002 addUnwrappedLine(); 3003 else 3004 NeedsUnwrappedLine = true; 3005 } 3006 3007 if (Style.RemoveBracesLLVM) 3008 NestedTooDeep.pop_back(); 3009 3010 if (NeedsUnwrappedLine) 3011 addUnwrappedLine(); 3012 } 3013 3014 void UnwrappedLineParser::parseNamespace() { 3015 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3016 "'namespace' expected"); 3017 3018 const FormatToken &InitialToken = *FormatTok; 3019 nextToken(); 3020 if (InitialToken.is(TT_NamespaceMacro)) { 3021 parseParens(); 3022 } else { 3023 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3024 tok::l_square, tok::period, tok::l_paren) || 3025 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3026 if (FormatTok->is(tok::l_square)) 3027 parseSquare(); 3028 else if (FormatTok->is(tok::l_paren)) 3029 parseParens(); 3030 else 3031 nextToken(); 3032 } 3033 } 3034 if (FormatTok->is(tok::l_brace)) { 3035 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3036 3037 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3038 addUnwrappedLine(); 3039 3040 unsigned AddLevels = 3041 Style.NamespaceIndentation == FormatStyle::NI_All || 3042 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3043 DeclarationScopeStack.size() > 1) 3044 ? 1u 3045 : 0u; 3046 bool ManageWhitesmithsBraces = 3047 AddLevels == 0u && 3048 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3049 3050 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3051 // the whole block. 3052 if (ManageWhitesmithsBraces) 3053 ++Line->Level; 3054 3055 // Munch the semicolon after a namespace. This is more common than one would 3056 // think. Putting the semicolon into its own line is very ugly. 3057 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3058 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3059 ManageWhitesmithsBraces); 3060 3061 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3062 3063 if (ManageWhitesmithsBraces) 3064 --Line->Level; 3065 } 3066 // FIXME: Add error handling. 3067 } 3068 3069 void UnwrappedLineParser::parseNew() { 3070 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3071 nextToken(); 3072 3073 if (Style.isCSharp()) { 3074 do { 3075 // Handle constructor invocation, e.g. `new(field: value)`. 3076 if (FormatTok->is(tok::l_paren)) 3077 parseParens(); 3078 3079 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3080 if (FormatTok->is(tok::l_brace)) 3081 parseBracedList(); 3082 3083 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3084 return; 3085 3086 nextToken(); 3087 } while (!eof()); 3088 } 3089 3090 if (Style.Language != FormatStyle::LK_Java) 3091 return; 3092 3093 // In Java, we can parse everything up to the parens, which aren't optional. 3094 do { 3095 // There should not be a ;, { or } before the new's open paren. 3096 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3097 return; 3098 3099 // Consume the parens. 3100 if (FormatTok->is(tok::l_paren)) { 3101 parseParens(); 3102 3103 // If there is a class body of an anonymous class, consume that as child. 3104 if (FormatTok->is(tok::l_brace)) 3105 parseChildBlock(); 3106 return; 3107 } 3108 nextToken(); 3109 } while (!eof()); 3110 } 3111 3112 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3113 keepAncestorBraces(); 3114 3115 if (isBlockBegin(*FormatTok)) { 3116 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3117 FormatToken *LeftBrace = FormatTok; 3118 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3119 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3120 /*MunchSemi=*/true, KeepBraces); 3121 setPreviousRBraceType(TT_ControlStatementRBrace); 3122 if (!KeepBraces) { 3123 assert(!NestedTooDeep.empty()); 3124 if (!NestedTooDeep.back()) 3125 markOptionalBraces(LeftBrace); 3126 } 3127 if (WrapRightBrace) 3128 addUnwrappedLine(); 3129 } else { 3130 parseUnbracedBody(); 3131 } 3132 3133 if (!KeepBraces) 3134 NestedTooDeep.pop_back(); 3135 } 3136 3137 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3138 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3139 (Style.isVerilog() && 3140 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3141 Keywords.kw_always_ff, Keywords.kw_always_latch, 3142 Keywords.kw_final, Keywords.kw_initial, 3143 Keywords.kw_foreach, Keywords.kw_forever, 3144 Keywords.kw_repeat))) && 3145 "'for', 'while' or foreach macro expected"); 3146 const bool KeepBraces = !Style.RemoveBracesLLVM || 3147 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3148 3149 nextToken(); 3150 // JS' for await ( ... 3151 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3152 nextToken(); 3153 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 3154 nextToken(); 3155 if (HasParens && FormatTok->is(tok::l_paren)) { 3156 // The type is only set for Verilog basically because we were afraid to 3157 // change the existing behavior for loops. See the discussion on D121756 for 3158 // details. 3159 if (Style.isVerilog()) 3160 FormatTok->setFinalizedType(TT_ConditionLParen); 3161 parseParens(); 3162 } 3163 3164 if (Style.isVerilog()) { 3165 // Event control. 3166 parseVerilogSensitivityList(); 3167 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3168 Tokens->getPreviousToken()->is(tok::r_paren)) { 3169 nextToken(); 3170 addUnwrappedLine(); 3171 return; 3172 } 3173 3174 handleAttributes(); 3175 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3176 } 3177 3178 void UnwrappedLineParser::parseDoWhile() { 3179 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3180 nextToken(); 3181 3182 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3183 3184 // FIXME: Add error handling. 3185 if (FormatTok->isNot(tok::kw_while)) { 3186 addUnwrappedLine(); 3187 return; 3188 } 3189 3190 FormatTok->setFinalizedType(TT_DoWhile); 3191 3192 // If in Whitesmiths mode, the line with the while() needs to be indented 3193 // to the same level as the block. 3194 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3195 ++Line->Level; 3196 3197 nextToken(); 3198 parseStructuralElement(); 3199 } 3200 3201 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3202 nextToken(); 3203 unsigned OldLineLevel = Line->Level; 3204 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3205 --Line->Level; 3206 if (LeftAlignLabel) 3207 Line->Level = 0; 3208 3209 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3210 FormatTok->is(tok::l_brace)) { 3211 3212 CompoundStatementIndenter Indenter(this, Line->Level, 3213 Style.BraceWrapping.AfterCaseLabel, 3214 Style.BraceWrapping.IndentBraces); 3215 parseBlock(); 3216 if (FormatTok->is(tok::kw_break)) { 3217 if (Style.BraceWrapping.AfterControlStatement == 3218 FormatStyle::BWACS_Always) { 3219 addUnwrappedLine(); 3220 if (!Style.IndentCaseBlocks && 3221 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3222 ++Line->Level; 3223 } 3224 } 3225 parseStructuralElement(); 3226 } 3227 addUnwrappedLine(); 3228 } else { 3229 if (FormatTok->is(tok::semi)) 3230 nextToken(); 3231 addUnwrappedLine(); 3232 } 3233 Line->Level = OldLineLevel; 3234 if (FormatTok->isNot(tok::l_brace)) { 3235 parseStructuralElement(); 3236 addUnwrappedLine(); 3237 } 3238 } 3239 3240 void UnwrappedLineParser::parseCaseLabel() { 3241 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3242 3243 // FIXME: fix handling of complex expressions here. 3244 do { 3245 nextToken(); 3246 if (FormatTok->is(tok::colon)) { 3247 FormatTok->setFinalizedType(TT_CaseLabelColon); 3248 break; 3249 } 3250 } while (!eof()); 3251 parseLabel(); 3252 } 3253 3254 void UnwrappedLineParser::parseSwitch() { 3255 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3256 nextToken(); 3257 if (FormatTok->is(tok::l_paren)) 3258 parseParens(); 3259 3260 keepAncestorBraces(); 3261 3262 if (FormatTok->is(tok::l_brace)) { 3263 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3264 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3265 parseBlock(); 3266 setPreviousRBraceType(TT_ControlStatementRBrace); 3267 addUnwrappedLine(); 3268 } else { 3269 addUnwrappedLine(); 3270 ++Line->Level; 3271 parseStructuralElement(); 3272 --Line->Level; 3273 } 3274 3275 if (Style.RemoveBracesLLVM) 3276 NestedTooDeep.pop_back(); 3277 } 3278 3279 // Operators that can follow a C variable. 3280 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 3281 switch (kind) { 3282 case tok::ampamp: 3283 case tok::ampequal: 3284 case tok::arrow: 3285 case tok::caret: 3286 case tok::caretequal: 3287 case tok::comma: 3288 case tok::ellipsis: 3289 case tok::equal: 3290 case tok::equalequal: 3291 case tok::exclaim: 3292 case tok::exclaimequal: 3293 case tok::greater: 3294 case tok::greaterequal: 3295 case tok::greatergreater: 3296 case tok::greatergreaterequal: 3297 case tok::l_paren: 3298 case tok::l_square: 3299 case tok::less: 3300 case tok::lessequal: 3301 case tok::lessless: 3302 case tok::lesslessequal: 3303 case tok::minus: 3304 case tok::minusequal: 3305 case tok::minusminus: 3306 case tok::percent: 3307 case tok::percentequal: 3308 case tok::period: 3309 case tok::pipe: 3310 case tok::pipeequal: 3311 case tok::pipepipe: 3312 case tok::plus: 3313 case tok::plusequal: 3314 case tok::plusplus: 3315 case tok::question: 3316 case tok::r_brace: 3317 case tok::r_paren: 3318 case tok::r_square: 3319 case tok::semi: 3320 case tok::slash: 3321 case tok::slashequal: 3322 case tok::star: 3323 case tok::starequal: 3324 return true; 3325 default: 3326 return false; 3327 } 3328 } 3329 3330 void UnwrappedLineParser::parseAccessSpecifier() { 3331 FormatToken *AccessSpecifierCandidate = FormatTok; 3332 nextToken(); 3333 // Understand Qt's slots. 3334 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3335 nextToken(); 3336 // Otherwise, we don't know what it is, and we'd better keep the next token. 3337 if (FormatTok->is(tok::colon)) { 3338 nextToken(); 3339 addUnwrappedLine(); 3340 } else if (FormatTok->isNot(tok::coloncolon) && 3341 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3342 // Not a variable name nor namespace name. 3343 addUnwrappedLine(); 3344 } else if (AccessSpecifierCandidate) { 3345 // Consider the access specifier to be a C identifier. 3346 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3347 } 3348 } 3349 3350 /// \brief Parses a requires, decides if it is a clause or an expression. 3351 /// \pre The current token has to be the requires keyword. 3352 /// \returns true if it parsed a clause. 3353 bool clang::format::UnwrappedLineParser::parseRequires() { 3354 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3355 auto RequiresToken = FormatTok; 3356 3357 // We try to guess if it is a requires clause, or a requires expression. For 3358 // that we first consume the keyword and check the next token. 3359 nextToken(); 3360 3361 switch (FormatTok->Tok.getKind()) { 3362 case tok::l_brace: 3363 // This can only be an expression, never a clause. 3364 parseRequiresExpression(RequiresToken); 3365 return false; 3366 case tok::l_paren: 3367 // Clauses and expression can start with a paren, it's unclear what we have. 3368 break; 3369 default: 3370 // All other tokens can only be a clause. 3371 parseRequiresClause(RequiresToken); 3372 return true; 3373 } 3374 3375 // Looking forward we would have to decide if there are function declaration 3376 // like arguments to the requires expression: 3377 // requires (T t) { 3378 // Or there is a constraint expression for the requires clause: 3379 // requires (C<T> && ... 3380 3381 // But first let's look behind. 3382 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3383 3384 if (!PreviousNonComment || 3385 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3386 // If there is no token, or an expression left brace, we are a requires 3387 // clause within a requires expression. 3388 parseRequiresClause(RequiresToken); 3389 return true; 3390 } 3391 3392 switch (PreviousNonComment->Tok.getKind()) { 3393 case tok::greater: 3394 case tok::r_paren: 3395 case tok::kw_noexcept: 3396 case tok::kw_const: 3397 // This is a requires clause. 3398 parseRequiresClause(RequiresToken); 3399 return true; 3400 case tok::amp: 3401 case tok::ampamp: { 3402 // This can be either: 3403 // if (... && requires (T t) ...) 3404 // Or 3405 // void member(...) && requires (C<T> ... 3406 // We check the one token before that for a const: 3407 // void member(...) const && requires (C<T> ... 3408 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3409 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3410 parseRequiresClause(RequiresToken); 3411 return true; 3412 } 3413 break; 3414 } 3415 default: 3416 if (PreviousNonComment->isTypeOrIdentifier()) { 3417 // This is a requires clause. 3418 parseRequiresClause(RequiresToken); 3419 return true; 3420 } 3421 // It's an expression. 3422 parseRequiresExpression(RequiresToken); 3423 return false; 3424 } 3425 3426 // Now we look forward and try to check if the paren content is a parameter 3427 // list. The parameters can be cv-qualified and contain references or 3428 // pointers. 3429 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3430 // of stuff: typename, const, *, &, &&, ::, identifiers. 3431 3432 unsigned StoredPosition = Tokens->getPosition(); 3433 FormatToken *NextToken = Tokens->getNextToken(); 3434 int Lookahead = 0; 3435 auto PeekNext = [&Lookahead, &NextToken, this] { 3436 ++Lookahead; 3437 NextToken = Tokens->getNextToken(); 3438 }; 3439 3440 bool FoundType = false; 3441 bool LastWasColonColon = false; 3442 int OpenAngles = 0; 3443 3444 for (; Lookahead < 50; PeekNext()) { 3445 switch (NextToken->Tok.getKind()) { 3446 case tok::kw_volatile: 3447 case tok::kw_const: 3448 case tok::comma: 3449 if (OpenAngles == 0) { 3450 FormatTok = Tokens->setPosition(StoredPosition); 3451 parseRequiresExpression(RequiresToken); 3452 return false; 3453 } 3454 break; 3455 case tok::r_paren: 3456 case tok::pipepipe: 3457 FormatTok = Tokens->setPosition(StoredPosition); 3458 parseRequiresClause(RequiresToken); 3459 return true; 3460 case tok::eof: 3461 // Break out of the loop. 3462 Lookahead = 50; 3463 break; 3464 case tok::coloncolon: 3465 LastWasColonColon = true; 3466 break; 3467 case tok::identifier: 3468 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3469 FormatTok = Tokens->setPosition(StoredPosition); 3470 parseRequiresExpression(RequiresToken); 3471 return false; 3472 } 3473 FoundType = true; 3474 LastWasColonColon = false; 3475 break; 3476 case tok::less: 3477 ++OpenAngles; 3478 break; 3479 case tok::greater: 3480 --OpenAngles; 3481 break; 3482 default: 3483 if (NextToken->isSimpleTypeSpecifier()) { 3484 FormatTok = Tokens->setPosition(StoredPosition); 3485 parseRequiresExpression(RequiresToken); 3486 return false; 3487 } 3488 break; 3489 } 3490 } 3491 // This seems to be a complicated expression, just assume it's a clause. 3492 FormatTok = Tokens->setPosition(StoredPosition); 3493 parseRequiresClause(RequiresToken); 3494 return true; 3495 } 3496 3497 /// \brief Parses a requires clause. 3498 /// \param RequiresToken The requires keyword token, which starts this clause. 3499 /// \pre We need to be on the next token after the requires keyword. 3500 /// \sa parseRequiresExpression 3501 /// 3502 /// Returns if it either has finished parsing the clause, or it detects, that 3503 /// the clause is incorrect. 3504 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3505 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3506 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3507 3508 // If there is no previous token, we are within a requires expression, 3509 // otherwise we will always have the template or function declaration in front 3510 // of it. 3511 bool InRequiresExpression = 3512 !RequiresToken->Previous || 3513 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3514 3515 RequiresToken->setFinalizedType(InRequiresExpression 3516 ? TT_RequiresClauseInARequiresExpression 3517 : TT_RequiresClause); 3518 3519 // NOTE: parseConstraintExpression is only ever called from this function. 3520 // It could be inlined into here. 3521 parseConstraintExpression(); 3522 3523 if (!InRequiresExpression) 3524 FormatTok->Previous->ClosesRequiresClause = true; 3525 } 3526 3527 /// \brief Parses a requires expression. 3528 /// \param RequiresToken The requires keyword token, which starts this clause. 3529 /// \pre We need to be on the next token after the requires keyword. 3530 /// \sa parseRequiresClause 3531 /// 3532 /// Returns if it either has finished parsing the expression, or it detects, 3533 /// that the expression is incorrect. 3534 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3535 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3536 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3537 3538 RequiresToken->setFinalizedType(TT_RequiresExpression); 3539 3540 if (FormatTok->is(tok::l_paren)) { 3541 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3542 parseParens(); 3543 } 3544 3545 if (FormatTok->is(tok::l_brace)) { 3546 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3547 parseChildBlock(); 3548 } 3549 } 3550 3551 /// \brief Parses a constraint expression. 3552 /// 3553 /// This is the body of a requires clause. It returns, when the parsing is 3554 /// complete, or the expression is incorrect. 3555 void UnwrappedLineParser::parseConstraintExpression() { 3556 // The special handling for lambdas is needed since tryToParseLambda() eats a 3557 // token and if a requires expression is the last part of a requires clause 3558 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3559 // not set on the correct token. Thus we need to be aware if we even expect a 3560 // lambda to be possible. 3561 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3562 bool LambdaNextTimeAllowed = true; 3563 3564 // Within lambda declarations, it is permitted to put a requires clause after 3565 // its template parameter list, which would place the requires clause right 3566 // before the parentheses of the parameters of the lambda declaration. Thus, 3567 // we track if we expect to see grouping parentheses at all. 3568 // Without this check, `requires foo<T> (T t)` in the below example would be 3569 // seen as the whole requires clause, accidentally eating the parameters of 3570 // the lambda. 3571 // [&]<typename T> requires foo<T> (T t) { ... }; 3572 bool TopLevelParensAllowed = true; 3573 3574 do { 3575 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3576 3577 switch (FormatTok->Tok.getKind()) { 3578 case tok::kw_requires: { 3579 auto RequiresToken = FormatTok; 3580 nextToken(); 3581 parseRequiresExpression(RequiresToken); 3582 break; 3583 } 3584 3585 case tok::l_paren: 3586 if (!TopLevelParensAllowed) 3587 return; 3588 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3589 TopLevelParensAllowed = false; 3590 break; 3591 3592 case tok::l_square: 3593 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3594 return; 3595 break; 3596 3597 case tok::kw_const: 3598 case tok::semi: 3599 case tok::kw_class: 3600 case tok::kw_struct: 3601 case tok::kw_union: 3602 return; 3603 3604 case tok::l_brace: 3605 // Potential function body. 3606 return; 3607 3608 case tok::ampamp: 3609 case tok::pipepipe: 3610 FormatTok->setFinalizedType(TT_BinaryOperator); 3611 nextToken(); 3612 LambdaNextTimeAllowed = true; 3613 TopLevelParensAllowed = true; 3614 break; 3615 3616 case tok::comma: 3617 case tok::comment: 3618 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3619 nextToken(); 3620 break; 3621 3622 case tok::kw_sizeof: 3623 case tok::greater: 3624 case tok::greaterequal: 3625 case tok::greatergreater: 3626 case tok::less: 3627 case tok::lessequal: 3628 case tok::lessless: 3629 case tok::equalequal: 3630 case tok::exclaim: 3631 case tok::exclaimequal: 3632 case tok::plus: 3633 case tok::minus: 3634 case tok::star: 3635 case tok::slash: 3636 LambdaNextTimeAllowed = true; 3637 TopLevelParensAllowed = true; 3638 // Just eat them. 3639 nextToken(); 3640 break; 3641 3642 case tok::numeric_constant: 3643 case tok::coloncolon: 3644 case tok::kw_true: 3645 case tok::kw_false: 3646 TopLevelParensAllowed = false; 3647 // Just eat them. 3648 nextToken(); 3649 break; 3650 3651 case tok::kw_static_cast: 3652 case tok::kw_const_cast: 3653 case tok::kw_reinterpret_cast: 3654 case tok::kw_dynamic_cast: 3655 nextToken(); 3656 if (FormatTok->isNot(tok::less)) 3657 return; 3658 3659 nextToken(); 3660 parseBracedList(/*IsAngleBracket=*/true); 3661 break; 3662 3663 default: 3664 if (!FormatTok->Tok.getIdentifierInfo()) { 3665 // Identifiers are part of the default case, we check for more then 3666 // tok::identifier to handle builtin type traits. 3667 return; 3668 } 3669 3670 // We need to differentiate identifiers for a template deduction guide, 3671 // variables, or function return types (the constraint expression has 3672 // ended before that), and basically all other cases. But it's easier to 3673 // check the other way around. 3674 assert(FormatTok->Previous); 3675 switch (FormatTok->Previous->Tok.getKind()) { 3676 case tok::coloncolon: // Nested identifier. 3677 case tok::ampamp: // Start of a function or variable for the 3678 case tok::pipepipe: // constraint expression. (binary) 3679 case tok::exclaim: // The same as above, but unary. 3680 case tok::kw_requires: // Initial identifier of a requires clause. 3681 case tok::equal: // Initial identifier of a concept declaration. 3682 break; 3683 default: 3684 return; 3685 } 3686 3687 // Read identifier with optional template declaration. 3688 nextToken(); 3689 if (FormatTok->is(tok::less)) { 3690 nextToken(); 3691 parseBracedList(/*IsAngleBracket=*/true); 3692 } 3693 TopLevelParensAllowed = false; 3694 break; 3695 } 3696 } while (!eof()); 3697 } 3698 3699 bool UnwrappedLineParser::parseEnum() { 3700 const FormatToken &InitialToken = *FormatTok; 3701 3702 // Won't be 'enum' for NS_ENUMs. 3703 if (FormatTok->is(tok::kw_enum)) 3704 nextToken(); 3705 3706 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3707 // declarations. An "enum" keyword followed by a colon would be a syntax 3708 // error and thus assume it is just an identifier. 3709 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3710 return false; 3711 3712 // In protobuf, "enum" can be used as a field name. 3713 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3714 return false; 3715 3716 // Eat up enum class ... 3717 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3718 nextToken(); 3719 3720 while (FormatTok->Tok.getIdentifierInfo() || 3721 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3722 tok::greater, tok::comma, tok::question, 3723 tok::l_square, tok::r_square)) { 3724 if (Style.isVerilog()) { 3725 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3726 nextToken(); 3727 // In Verilog the base type can have dimensions. 3728 while (FormatTok->is(tok::l_square)) 3729 parseSquare(); 3730 } else { 3731 nextToken(); 3732 } 3733 // We can have macros or attributes in between 'enum' and the enum name. 3734 if (FormatTok->is(tok::l_paren)) 3735 parseParens(); 3736 assert(FormatTok->isNot(TT_AttributeSquare)); 3737 if (FormatTok->is(tok::identifier)) { 3738 nextToken(); 3739 // If there are two identifiers in a row, this is likely an elaborate 3740 // return type. In Java, this can be "implements", etc. 3741 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3742 return false; 3743 } 3744 } 3745 3746 // Just a declaration or something is wrong. 3747 if (FormatTok->isNot(tok::l_brace)) 3748 return true; 3749 FormatTok->setFinalizedType(TT_EnumLBrace); 3750 FormatTok->setBlockKind(BK_Block); 3751 3752 if (Style.Language == FormatStyle::LK_Java) { 3753 // Java enums are different. 3754 parseJavaEnumBody(); 3755 return true; 3756 } 3757 if (Style.Language == FormatStyle::LK_Proto) { 3758 parseBlock(/*MustBeDeclaration=*/true); 3759 return true; 3760 } 3761 3762 if (!Style.AllowShortEnumsOnASingleLine && 3763 ShouldBreakBeforeBrace(Style, InitialToken)) { 3764 addUnwrappedLine(); 3765 } 3766 // Parse enum body. 3767 nextToken(); 3768 if (!Style.AllowShortEnumsOnASingleLine) { 3769 addUnwrappedLine(); 3770 Line->Level += 1; 3771 } 3772 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3773 if (!Style.AllowShortEnumsOnASingleLine) 3774 Line->Level -= 1; 3775 if (HasError) { 3776 if (FormatTok->is(tok::semi)) 3777 nextToken(); 3778 addUnwrappedLine(); 3779 } 3780 setPreviousRBraceType(TT_EnumRBrace); 3781 return true; 3782 3783 // There is no addUnwrappedLine() here so that we fall through to parsing a 3784 // structural element afterwards. Thus, in "enum A {} n, m;", 3785 // "} n, m;" will end up in one unwrapped line. 3786 } 3787 3788 bool UnwrappedLineParser::parseStructLike() { 3789 // parseRecord falls through and does not yet add an unwrapped line as a 3790 // record declaration or definition can start a structural element. 3791 parseRecord(); 3792 // This does not apply to Java, JavaScript and C#. 3793 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3794 Style.isCSharp()) { 3795 if (FormatTok->is(tok::semi)) 3796 nextToken(); 3797 addUnwrappedLine(); 3798 return true; 3799 } 3800 return false; 3801 } 3802 3803 namespace { 3804 // A class used to set and restore the Token position when peeking 3805 // ahead in the token source. 3806 class ScopedTokenPosition { 3807 unsigned StoredPosition; 3808 FormatTokenSource *Tokens; 3809 3810 public: 3811 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3812 assert(Tokens && "Tokens expected to not be null"); 3813 StoredPosition = Tokens->getPosition(); 3814 } 3815 3816 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3817 }; 3818 } // namespace 3819 3820 // Look to see if we have [[ by looking ahead, if 3821 // its not then rewind to the original position. 3822 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3823 ScopedTokenPosition AutoPosition(Tokens); 3824 FormatToken *Tok = Tokens->getNextToken(); 3825 // We already read the first [ check for the second. 3826 if (Tok->isNot(tok::l_square)) 3827 return false; 3828 // Double check that the attribute is just something 3829 // fairly simple. 3830 while (Tok->isNot(tok::eof)) { 3831 if (Tok->is(tok::r_square)) 3832 break; 3833 Tok = Tokens->getNextToken(); 3834 } 3835 if (Tok->is(tok::eof)) 3836 return false; 3837 Tok = Tokens->getNextToken(); 3838 if (Tok->isNot(tok::r_square)) 3839 return false; 3840 Tok = Tokens->getNextToken(); 3841 if (Tok->is(tok::semi)) 3842 return false; 3843 return true; 3844 } 3845 3846 void UnwrappedLineParser::parseJavaEnumBody() { 3847 assert(FormatTok->is(tok::l_brace)); 3848 const FormatToken *OpeningBrace = FormatTok; 3849 3850 // Determine whether the enum is simple, i.e. does not have a semicolon or 3851 // constants with class bodies. Simple enums can be formatted like braced 3852 // lists, contracted to a single line, etc. 3853 unsigned StoredPosition = Tokens->getPosition(); 3854 bool IsSimple = true; 3855 FormatToken *Tok = Tokens->getNextToken(); 3856 while (Tok->isNot(tok::eof)) { 3857 if (Tok->is(tok::r_brace)) 3858 break; 3859 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3860 IsSimple = false; 3861 break; 3862 } 3863 // FIXME: This will also mark enums with braces in the arguments to enum 3864 // constants as "not simple". This is probably fine in practice, though. 3865 Tok = Tokens->getNextToken(); 3866 } 3867 FormatTok = Tokens->setPosition(StoredPosition); 3868 3869 if (IsSimple) { 3870 nextToken(); 3871 parseBracedList(); 3872 addUnwrappedLine(); 3873 return; 3874 } 3875 3876 // Parse the body of a more complex enum. 3877 // First add a line for everything up to the "{". 3878 nextToken(); 3879 addUnwrappedLine(); 3880 ++Line->Level; 3881 3882 // Parse the enum constants. 3883 while (!eof()) { 3884 if (FormatTok->is(tok::l_brace)) { 3885 // Parse the constant's class body. 3886 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3887 /*MunchSemi=*/false); 3888 } else if (FormatTok->is(tok::l_paren)) { 3889 parseParens(); 3890 } else if (FormatTok->is(tok::comma)) { 3891 nextToken(); 3892 addUnwrappedLine(); 3893 } else if (FormatTok->is(tok::semi)) { 3894 nextToken(); 3895 addUnwrappedLine(); 3896 break; 3897 } else if (FormatTok->is(tok::r_brace)) { 3898 addUnwrappedLine(); 3899 break; 3900 } else { 3901 nextToken(); 3902 } 3903 } 3904 3905 // Parse the class body after the enum's ";" if any. 3906 parseLevel(OpeningBrace); 3907 nextToken(); 3908 --Line->Level; 3909 addUnwrappedLine(); 3910 } 3911 3912 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3913 const FormatToken &InitialToken = *FormatTok; 3914 nextToken(); 3915 3916 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 3917 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 3918 }; 3919 // The actual identifier can be a nested name specifier, and in macros 3920 // it is often token-pasted. 3921 // An [[attribute]] can be before the identifier. 3922 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3923 tok::kw_alignas, tok::l_square) || 3924 FormatTok->isAttribute() || 3925 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3926 FormatTok->isOneOf(tok::period, tok::comma))) { 3927 if (Style.isJavaScript() && 3928 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3929 // JavaScript/TypeScript supports inline object types in 3930 // extends/implements positions: 3931 // class Foo implements {bar: number} { } 3932 nextToken(); 3933 if (FormatTok->is(tok::l_brace)) { 3934 tryToParseBracedList(); 3935 continue; 3936 } 3937 } 3938 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 3939 continue; 3940 nextToken(); 3941 // We can have macros in between 'class' and the class name. 3942 if (!IsNonMacroIdentifier(FormatTok->Previous) && 3943 FormatTok->is(tok::l_paren)) { 3944 parseParens(); 3945 } 3946 } 3947 3948 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3949 int AngleNestingLevel = 0; 3950 do { 3951 if (FormatTok->is(tok::less)) 3952 ++AngleNestingLevel; 3953 else if (FormatTok->is(tok::greater)) 3954 --AngleNestingLevel; 3955 3956 if (AngleNestingLevel == 0 && FormatTok->is(tok::l_paren) && 3957 IsNonMacroIdentifier(FormatTok->Previous)) { 3958 break; 3959 } 3960 if (FormatTok->is(tok::l_brace)) { 3961 calculateBraceTypes(/*ExpectClassBody=*/true); 3962 if (!tryToParseBracedList()) 3963 break; 3964 } 3965 if (FormatTok->is(tok::l_square)) { 3966 FormatToken *Previous = FormatTok->Previous; 3967 if (!Previous || 3968 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3969 // Don't try parsing a lambda if we had a closing parenthesis before, 3970 // it was probably a pointer to an array: int (*)[]. 3971 if (!tryToParseLambda()) 3972 continue; 3973 } else { 3974 parseSquare(); 3975 continue; 3976 } 3977 } 3978 if (FormatTok->is(tok::semi)) 3979 return; 3980 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3981 addUnwrappedLine(); 3982 nextToken(); 3983 parseCSharpGenericTypeConstraint(); 3984 break; 3985 } 3986 nextToken(); 3987 } while (!eof()); 3988 } 3989 3990 auto GetBraceTypes = 3991 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 3992 switch (RecordTok.Tok.getKind()) { 3993 case tok::kw_class: 3994 return {TT_ClassLBrace, TT_ClassRBrace}; 3995 case tok::kw_struct: 3996 return {TT_StructLBrace, TT_StructRBrace}; 3997 case tok::kw_union: 3998 return {TT_UnionLBrace, TT_UnionRBrace}; 3999 default: 4000 // Useful for e.g. interface. 4001 return {TT_RecordLBrace, TT_RecordRBrace}; 4002 } 4003 }; 4004 if (FormatTok->is(tok::l_brace)) { 4005 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4006 FormatTok->setFinalizedType(OpenBraceType); 4007 if (ParseAsExpr) { 4008 parseChildBlock(); 4009 } else { 4010 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4011 addUnwrappedLine(); 4012 4013 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4014 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4015 } 4016 setPreviousRBraceType(ClosingBraceType); 4017 } 4018 // There is no addUnwrappedLine() here so that we fall through to parsing a 4019 // structural element afterwards. Thus, in "class A {} n, m;", 4020 // "} n, m;" will end up in one unwrapped line. 4021 } 4022 4023 void UnwrappedLineParser::parseObjCMethod() { 4024 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4025 "'(' or identifier expected."); 4026 do { 4027 if (FormatTok->is(tok::semi)) { 4028 nextToken(); 4029 addUnwrappedLine(); 4030 return; 4031 } else if (FormatTok->is(tok::l_brace)) { 4032 if (Style.BraceWrapping.AfterFunction) 4033 addUnwrappedLine(); 4034 parseBlock(); 4035 addUnwrappedLine(); 4036 return; 4037 } else { 4038 nextToken(); 4039 } 4040 } while (!eof()); 4041 } 4042 4043 void UnwrappedLineParser::parseObjCProtocolList() { 4044 assert(FormatTok->is(tok::less) && "'<' expected."); 4045 do { 4046 nextToken(); 4047 // Early exit in case someone forgot a close angle. 4048 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4049 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4050 return; 4051 } 4052 } while (!eof() && FormatTok->isNot(tok::greater)); 4053 nextToken(); // Skip '>'. 4054 } 4055 4056 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4057 do { 4058 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4059 nextToken(); 4060 addUnwrappedLine(); 4061 break; 4062 } 4063 if (FormatTok->is(tok::l_brace)) { 4064 parseBlock(); 4065 // In ObjC interfaces, nothing should be following the "}". 4066 addUnwrappedLine(); 4067 } else if (FormatTok->is(tok::r_brace)) { 4068 // Ignore stray "}". parseStructuralElement doesn't consume them. 4069 nextToken(); 4070 addUnwrappedLine(); 4071 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4072 nextToken(); 4073 parseObjCMethod(); 4074 } else { 4075 parseStructuralElement(); 4076 } 4077 } while (!eof()); 4078 } 4079 4080 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4081 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4082 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4083 nextToken(); 4084 nextToken(); // interface name 4085 4086 // @interface can be followed by a lightweight generic 4087 // specialization list, then either a base class or a category. 4088 if (FormatTok->is(tok::less)) 4089 parseObjCLightweightGenerics(); 4090 if (FormatTok->is(tok::colon)) { 4091 nextToken(); 4092 nextToken(); // base class name 4093 // The base class can also have lightweight generics applied to it. 4094 if (FormatTok->is(tok::less)) 4095 parseObjCLightweightGenerics(); 4096 } else if (FormatTok->is(tok::l_paren)) { 4097 // Skip category, if present. 4098 parseParens(); 4099 } 4100 4101 if (FormatTok->is(tok::less)) 4102 parseObjCProtocolList(); 4103 4104 if (FormatTok->is(tok::l_brace)) { 4105 if (Style.BraceWrapping.AfterObjCDeclaration) 4106 addUnwrappedLine(); 4107 parseBlock(/*MustBeDeclaration=*/true); 4108 } 4109 4110 // With instance variables, this puts '}' on its own line. Without instance 4111 // variables, this ends the @interface line. 4112 addUnwrappedLine(); 4113 4114 parseObjCUntilAtEnd(); 4115 } 4116 4117 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4118 assert(FormatTok->is(tok::less)); 4119 // Unlike protocol lists, generic parameterizations support 4120 // nested angles: 4121 // 4122 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4123 // NSObject <NSCopying, NSSecureCoding> 4124 // 4125 // so we need to count how many open angles we have left. 4126 unsigned NumOpenAngles = 1; 4127 do { 4128 nextToken(); 4129 // Early exit in case someone forgot a close angle. 4130 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4131 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4132 break; 4133 } 4134 if (FormatTok->is(tok::less)) { 4135 ++NumOpenAngles; 4136 } else if (FormatTok->is(tok::greater)) { 4137 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4138 --NumOpenAngles; 4139 } 4140 } while (!eof() && NumOpenAngles != 0); 4141 nextToken(); // Skip '>'. 4142 } 4143 4144 // Returns true for the declaration/definition form of @protocol, 4145 // false for the expression form. 4146 bool UnwrappedLineParser::parseObjCProtocol() { 4147 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4148 nextToken(); 4149 4150 if (FormatTok->is(tok::l_paren)) { 4151 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4152 return false; 4153 } 4154 4155 // The definition/declaration form, 4156 // @protocol Foo 4157 // - (int)someMethod; 4158 // @end 4159 4160 nextToken(); // protocol name 4161 4162 if (FormatTok->is(tok::less)) 4163 parseObjCProtocolList(); 4164 4165 // Check for protocol declaration. 4166 if (FormatTok->is(tok::semi)) { 4167 nextToken(); 4168 addUnwrappedLine(); 4169 return true; 4170 } 4171 4172 addUnwrappedLine(); 4173 parseObjCUntilAtEnd(); 4174 return true; 4175 } 4176 4177 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4178 bool IsImport = FormatTok->is(Keywords.kw_import); 4179 assert(IsImport || FormatTok->is(tok::kw_export)); 4180 nextToken(); 4181 4182 // Consume the "default" in "export default class/function". 4183 if (FormatTok->is(tok::kw_default)) 4184 nextToken(); 4185 4186 // Consume "async function", "function" and "default function", so that these 4187 // get parsed as free-standing JS functions, i.e. do not require a trailing 4188 // semicolon. 4189 if (FormatTok->is(Keywords.kw_async)) 4190 nextToken(); 4191 if (FormatTok->is(Keywords.kw_function)) { 4192 nextToken(); 4193 return; 4194 } 4195 4196 // For imports, `export *`, `export {...}`, consume the rest of the line up 4197 // to the terminating `;`. For everything else, just return and continue 4198 // parsing the structural element, i.e. the declaration or expression for 4199 // `export default`. 4200 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4201 !FormatTok->isStringLiteral() && 4202 !(FormatTok->is(Keywords.kw_type) && 4203 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4204 return; 4205 } 4206 4207 while (!eof()) { 4208 if (FormatTok->is(tok::semi)) 4209 return; 4210 if (Line->Tokens.empty()) { 4211 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4212 // import statement should terminate. 4213 return; 4214 } 4215 if (FormatTok->is(tok::l_brace)) { 4216 FormatTok->setBlockKind(BK_Block); 4217 nextToken(); 4218 parseBracedList(); 4219 } else { 4220 nextToken(); 4221 } 4222 } 4223 } 4224 4225 void UnwrappedLineParser::parseStatementMacro() { 4226 nextToken(); 4227 if (FormatTok->is(tok::l_paren)) 4228 parseParens(); 4229 if (FormatTok->is(tok::semi)) 4230 nextToken(); 4231 addUnwrappedLine(); 4232 } 4233 4234 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4235 // consume things like a::`b.c[d:e] or a::* 4236 while (true) { 4237 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4238 tok::coloncolon, tok::hash) || 4239 Keywords.isVerilogIdentifier(*FormatTok)) { 4240 nextToken(); 4241 } else if (FormatTok->is(tok::l_square)) { 4242 parseSquare(); 4243 } else { 4244 break; 4245 } 4246 } 4247 } 4248 4249 void UnwrappedLineParser::parseVerilogSensitivityList() { 4250 if (FormatTok->isNot(tok::at)) 4251 return; 4252 nextToken(); 4253 // A block event expression has 2 at signs. 4254 if (FormatTok->is(tok::at)) 4255 nextToken(); 4256 switch (FormatTok->Tok.getKind()) { 4257 case tok::star: 4258 nextToken(); 4259 break; 4260 case tok::l_paren: 4261 parseParens(); 4262 break; 4263 default: 4264 parseVerilogHierarchyIdentifier(); 4265 break; 4266 } 4267 } 4268 4269 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4270 unsigned AddLevels = 0; 4271 4272 if (FormatTok->is(Keywords.kw_clocking)) { 4273 nextToken(); 4274 if (Keywords.isVerilogIdentifier(*FormatTok)) 4275 nextToken(); 4276 parseVerilogSensitivityList(); 4277 if (FormatTok->is(tok::semi)) 4278 nextToken(); 4279 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4280 Keywords.kw_casez, Keywords.kw_randcase, 4281 Keywords.kw_randsequence)) { 4282 if (Style.IndentCaseLabels) 4283 AddLevels++; 4284 nextToken(); 4285 if (FormatTok->is(tok::l_paren)) { 4286 FormatTok->setFinalizedType(TT_ConditionLParen); 4287 parseParens(); 4288 } 4289 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4290 nextToken(); 4291 // The case header has no semicolon. 4292 } else { 4293 // "module" etc. 4294 nextToken(); 4295 // all the words like the name of the module and specifiers like 4296 // "automatic" and the width of function return type 4297 while (true) { 4298 if (FormatTok->is(tok::l_square)) { 4299 auto Prev = FormatTok->getPreviousNonComment(); 4300 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4301 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4302 parseSquare(); 4303 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4304 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4305 nextToken(); 4306 } else { 4307 break; 4308 } 4309 } 4310 4311 auto NewLine = [this]() { 4312 addUnwrappedLine(); 4313 Line->IsContinuation = true; 4314 }; 4315 4316 // package imports 4317 while (FormatTok->is(Keywords.kw_import)) { 4318 NewLine(); 4319 nextToken(); 4320 parseVerilogHierarchyIdentifier(); 4321 if (FormatTok->is(tok::semi)) 4322 nextToken(); 4323 } 4324 4325 // parameters and ports 4326 if (FormatTok->is(Keywords.kw_verilogHash)) { 4327 NewLine(); 4328 nextToken(); 4329 if (FormatTok->is(tok::l_paren)) { 4330 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4331 parseParens(); 4332 } 4333 } 4334 if (FormatTok->is(tok::l_paren)) { 4335 NewLine(); 4336 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4337 parseParens(); 4338 } 4339 4340 // extends and implements 4341 if (FormatTok->is(Keywords.kw_extends)) { 4342 NewLine(); 4343 nextToken(); 4344 parseVerilogHierarchyIdentifier(); 4345 if (FormatTok->is(tok::l_paren)) 4346 parseParens(); 4347 } 4348 if (FormatTok->is(Keywords.kw_implements)) { 4349 NewLine(); 4350 do { 4351 nextToken(); 4352 parseVerilogHierarchyIdentifier(); 4353 } while (FormatTok->is(tok::comma)); 4354 } 4355 4356 // Coverage event for cover groups. 4357 if (FormatTok->is(tok::at)) { 4358 NewLine(); 4359 parseVerilogSensitivityList(); 4360 } 4361 4362 if (FormatTok->is(tok::semi)) 4363 nextToken(/*LevelDifference=*/1); 4364 addUnwrappedLine(); 4365 } 4366 4367 return AddLevels; 4368 } 4369 4370 void UnwrappedLineParser::parseVerilogTable() { 4371 assert(FormatTok->is(Keywords.kw_table)); 4372 nextToken(/*LevelDifference=*/1); 4373 addUnwrappedLine(); 4374 4375 auto InitialLevel = Line->Level++; 4376 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4377 FormatToken *Tok = FormatTok; 4378 nextToken(); 4379 if (Tok->is(tok::semi)) 4380 addUnwrappedLine(); 4381 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4382 Tok->setFinalizedType(TT_VerilogTableItem); 4383 } 4384 Line->Level = InitialLevel; 4385 nextToken(/*LevelDifference=*/-1); 4386 addUnwrappedLine(); 4387 } 4388 4389 void UnwrappedLineParser::parseVerilogCaseLabel() { 4390 // The label will get unindented in AnnotatingParser. If there are no leading 4391 // spaces, indent the rest here so that things inside the block will be 4392 // indented relative to things outside. We don't use parseLabel because we 4393 // don't know whether this colon is a label or a ternary expression at this 4394 // point. 4395 auto OrigLevel = Line->Level; 4396 auto FirstLine = CurrentLines->size(); 4397 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4398 ++Line->Level; 4399 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4400 --Line->Level; 4401 parseStructuralElement(); 4402 // Restore the indentation in both the new line and the line that has the 4403 // label. 4404 if (CurrentLines->size() > FirstLine) 4405 (*CurrentLines)[FirstLine].Level = OrigLevel; 4406 Line->Level = OrigLevel; 4407 } 4408 4409 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4410 for (const auto &N : Line.Tokens) { 4411 if (N.Tok->MacroCtx) 4412 return true; 4413 for (const UnwrappedLine &Child : N.Children) 4414 if (containsExpansion(Child)) 4415 return true; 4416 } 4417 return false; 4418 } 4419 4420 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4421 if (Line->Tokens.empty()) 4422 return; 4423 LLVM_DEBUG({ 4424 if (!parsingPPDirective()) { 4425 llvm::dbgs() << "Adding unwrapped line:\n"; 4426 printDebugInfo(*Line); 4427 } 4428 }); 4429 4430 // If this line closes a block when in Whitesmiths mode, remember that 4431 // information so that the level can be decreased after the line is added. 4432 // This has to happen after the addition of the line since the line itself 4433 // needs to be indented. 4434 bool ClosesWhitesmithsBlock = 4435 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4436 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4437 4438 // If the current line was expanded from a macro call, we use it to 4439 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4440 // line and the unexpanded token stream. 4441 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4442 if (!Reconstruct) 4443 Reconstruct.emplace(Line->Level, Unexpanded); 4444 Reconstruct->addLine(*Line); 4445 4446 // While the reconstructed unexpanded lines are stored in the normal 4447 // flow of lines, the expanded lines are stored on the side to be analyzed 4448 // in an extra step. 4449 CurrentExpandedLines.push_back(std::move(*Line)); 4450 4451 if (Reconstruct->finished()) { 4452 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4453 assert(!Reconstructed.Tokens.empty() && 4454 "Reconstructed must at least contain the macro identifier."); 4455 assert(!parsingPPDirective()); 4456 LLVM_DEBUG({ 4457 llvm::dbgs() << "Adding unexpanded line:\n"; 4458 printDebugInfo(Reconstructed); 4459 }); 4460 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4461 Lines.push_back(std::move(Reconstructed)); 4462 CurrentExpandedLines.clear(); 4463 Reconstruct.reset(); 4464 } 4465 } else { 4466 // At the top level we only get here when no unexpansion is going on, or 4467 // when conditional formatting led to unfinished macro reconstructions. 4468 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4469 CurrentLines->push_back(std::move(*Line)); 4470 } 4471 Line->Tokens.clear(); 4472 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4473 Line->FirstStartColumn = 0; 4474 Line->IsContinuation = false; 4475 Line->SeenDecltypeAuto = false; 4476 4477 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4478 --Line->Level; 4479 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4480 CurrentLines->append( 4481 std::make_move_iterator(PreprocessorDirectives.begin()), 4482 std::make_move_iterator(PreprocessorDirectives.end())); 4483 PreprocessorDirectives.clear(); 4484 } 4485 // Disconnect the current token from the last token on the previous line. 4486 FormatTok->Previous = nullptr; 4487 } 4488 4489 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4490 4491 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4492 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4493 FormatTok.NewlinesBefore > 0; 4494 } 4495 4496 // Checks if \p FormatTok is a line comment that continues the line comment 4497 // section on \p Line. 4498 static bool 4499 continuesLineCommentSection(const FormatToken &FormatTok, 4500 const UnwrappedLine &Line, 4501 const llvm::Regex &CommentPragmasRegex) { 4502 if (Line.Tokens.empty()) 4503 return false; 4504 4505 StringRef IndentContent = FormatTok.TokenText; 4506 if (FormatTok.TokenText.starts_with("//") || 4507 FormatTok.TokenText.starts_with("/*")) { 4508 IndentContent = FormatTok.TokenText.substr(2); 4509 } 4510 if (CommentPragmasRegex.match(IndentContent)) 4511 return false; 4512 4513 // If Line starts with a line comment, then FormatTok continues the comment 4514 // section if its original column is greater or equal to the original start 4515 // column of the line. 4516 // 4517 // Define the min column token of a line as follows: if a line ends in '{' or 4518 // contains a '{' followed by a line comment, then the min column token is 4519 // that '{'. Otherwise, the min column token of the line is the first token of 4520 // the line. 4521 // 4522 // If Line starts with a token other than a line comment, then FormatTok 4523 // continues the comment section if its original column is greater than the 4524 // original start column of the min column token of the line. 4525 // 4526 // For example, the second line comment continues the first in these cases: 4527 // 4528 // // first line 4529 // // second line 4530 // 4531 // and: 4532 // 4533 // // first line 4534 // // second line 4535 // 4536 // and: 4537 // 4538 // int i; // first line 4539 // // second line 4540 // 4541 // and: 4542 // 4543 // do { // first line 4544 // // second line 4545 // int i; 4546 // } while (true); 4547 // 4548 // and: 4549 // 4550 // enum { 4551 // a, // first line 4552 // // second line 4553 // b 4554 // }; 4555 // 4556 // The second line comment doesn't continue the first in these cases: 4557 // 4558 // // first line 4559 // // second line 4560 // 4561 // and: 4562 // 4563 // int i; // first line 4564 // // second line 4565 // 4566 // and: 4567 // 4568 // do { // first line 4569 // // second line 4570 // int i; 4571 // } while (true); 4572 // 4573 // and: 4574 // 4575 // enum { 4576 // a, // first line 4577 // // second line 4578 // }; 4579 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4580 4581 // Scan for '{//'. If found, use the column of '{' as a min column for line 4582 // comment section continuation. 4583 const FormatToken *PreviousToken = nullptr; 4584 for (const UnwrappedLineNode &Node : Line.Tokens) { 4585 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4586 isLineComment(*Node.Tok)) { 4587 MinColumnToken = PreviousToken; 4588 break; 4589 } 4590 PreviousToken = Node.Tok; 4591 4592 // Grab the last newline preceding a token in this unwrapped line. 4593 if (Node.Tok->NewlinesBefore > 0) 4594 MinColumnToken = Node.Tok; 4595 } 4596 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4597 MinColumnToken = PreviousToken; 4598 4599 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4600 MinColumnToken); 4601 } 4602 4603 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4604 bool JustComments = Line->Tokens.empty(); 4605 for (FormatToken *Tok : CommentsBeforeNextToken) { 4606 // Line comments that belong to the same line comment section are put on the 4607 // same line since later we might want to reflow content between them. 4608 // Additional fine-grained breaking of line comment sections is controlled 4609 // by the class BreakableLineCommentSection in case it is desirable to keep 4610 // several line comment sections in the same unwrapped line. 4611 // 4612 // FIXME: Consider putting separate line comment sections as children to the 4613 // unwrapped line instead. 4614 Tok->ContinuesLineCommentSection = 4615 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4616 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4617 addUnwrappedLine(); 4618 pushToken(Tok); 4619 } 4620 if (NewlineBeforeNext && JustComments) 4621 addUnwrappedLine(); 4622 CommentsBeforeNextToken.clear(); 4623 } 4624 4625 void UnwrappedLineParser::nextToken(int LevelDifference) { 4626 if (eof()) 4627 return; 4628 flushComments(isOnNewLine(*FormatTok)); 4629 pushToken(FormatTok); 4630 FormatToken *Previous = FormatTok; 4631 if (!Style.isJavaScript()) 4632 readToken(LevelDifference); 4633 else 4634 readTokenWithJavaScriptASI(); 4635 FormatTok->Previous = Previous; 4636 if (Style.isVerilog()) { 4637 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4638 // keywords like `begin`, we can't treat them the same as left braces 4639 // because some contexts require one of them. For example structs use 4640 // braces and if blocks use keywords, and a left brace can occur in an if 4641 // statement, but it is not a block. For keywords like `end`, we simply 4642 // treat them the same as right braces. 4643 if (Keywords.isVerilogEnd(*FormatTok)) 4644 FormatTok->Tok.setKind(tok::r_brace); 4645 } 4646 } 4647 4648 void UnwrappedLineParser::distributeComments( 4649 const SmallVectorImpl<FormatToken *> &Comments, 4650 const FormatToken *NextTok) { 4651 // Whether or not a line comment token continues a line is controlled by 4652 // the method continuesLineCommentSection, with the following caveat: 4653 // 4654 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4655 // that each comment line from the trail is aligned with the next token, if 4656 // the next token exists. If a trail exists, the beginning of the maximal 4657 // trail is marked as a start of a new comment section. 4658 // 4659 // For example in this code: 4660 // 4661 // int a; // line about a 4662 // // line 1 about b 4663 // // line 2 about b 4664 // int b; 4665 // 4666 // the two lines about b form a maximal trail, so there are two sections, the 4667 // first one consisting of the single comment "// line about a" and the 4668 // second one consisting of the next two comments. 4669 if (Comments.empty()) 4670 return; 4671 bool ShouldPushCommentsInCurrentLine = true; 4672 bool HasTrailAlignedWithNextToken = false; 4673 unsigned StartOfTrailAlignedWithNextToken = 0; 4674 if (NextTok) { 4675 // We are skipping the first element intentionally. 4676 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4677 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4678 HasTrailAlignedWithNextToken = true; 4679 StartOfTrailAlignedWithNextToken = i; 4680 } 4681 } 4682 } 4683 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4684 FormatToken *FormatTok = Comments[i]; 4685 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4686 FormatTok->ContinuesLineCommentSection = false; 4687 } else { 4688 FormatTok->ContinuesLineCommentSection = 4689 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4690 } 4691 if (!FormatTok->ContinuesLineCommentSection && 4692 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4693 ShouldPushCommentsInCurrentLine = false; 4694 } 4695 if (ShouldPushCommentsInCurrentLine) 4696 pushToken(FormatTok); 4697 else 4698 CommentsBeforeNextToken.push_back(FormatTok); 4699 } 4700 } 4701 4702 void UnwrappedLineParser::readToken(int LevelDifference) { 4703 SmallVector<FormatToken *, 1> Comments; 4704 bool PreviousWasComment = false; 4705 bool FirstNonCommentOnLine = false; 4706 do { 4707 FormatTok = Tokens->getNextToken(); 4708 assert(FormatTok); 4709 while (FormatTok->getType() == TT_ConflictStart || 4710 FormatTok->getType() == TT_ConflictEnd || 4711 FormatTok->getType() == TT_ConflictAlternative) { 4712 if (FormatTok->getType() == TT_ConflictStart) 4713 conditionalCompilationStart(/*Unreachable=*/false); 4714 else if (FormatTok->getType() == TT_ConflictAlternative) 4715 conditionalCompilationAlternative(); 4716 else if (FormatTok->getType() == TT_ConflictEnd) 4717 conditionalCompilationEnd(); 4718 FormatTok = Tokens->getNextToken(); 4719 FormatTok->MustBreakBefore = true; 4720 FormatTok->MustBreakBeforeFinalized = true; 4721 } 4722 4723 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4724 const FormatToken &Tok, 4725 bool PreviousWasComment) { 4726 auto IsFirstOnLine = [](const FormatToken &Tok) { 4727 return Tok.HasUnescapedNewline || Tok.IsFirst; 4728 }; 4729 4730 // Consider preprocessor directives preceded by block comments as first 4731 // on line. 4732 if (PreviousWasComment) 4733 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4734 return IsFirstOnLine(Tok); 4735 }; 4736 4737 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4738 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4739 PreviousWasComment = FormatTok->is(tok::comment); 4740 4741 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4742 (!Style.isVerilog() || 4743 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4744 FirstNonCommentOnLine) { 4745 distributeComments(Comments, FormatTok); 4746 Comments.clear(); 4747 // If there is an unfinished unwrapped line, we flush the preprocessor 4748 // directives only after that unwrapped line was finished later. 4749 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4750 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4751 assert((LevelDifference >= 0 || 4752 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4753 "LevelDifference makes Line->Level negative"); 4754 Line->Level += LevelDifference; 4755 // Comments stored before the preprocessor directive need to be output 4756 // before the preprocessor directive, at the same level as the 4757 // preprocessor directive, as we consider them to apply to the directive. 4758 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4759 PPBranchLevel > 0) { 4760 Line->Level += PPBranchLevel; 4761 } 4762 flushComments(isOnNewLine(*FormatTok)); 4763 parsePPDirective(); 4764 PreviousWasComment = FormatTok->is(tok::comment); 4765 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4766 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4767 } 4768 4769 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4770 !Line->InPPDirective) { 4771 continue; 4772 } 4773 4774 if (FormatTok->is(tok::identifier) && 4775 Macros.defined(FormatTok->TokenText) && 4776 // FIXME: Allow expanding macros in preprocessor directives. 4777 !Line->InPPDirective) { 4778 FormatToken *ID = FormatTok; 4779 unsigned Position = Tokens->getPosition(); 4780 4781 // To correctly parse the code, we need to replace the tokens of the macro 4782 // call with its expansion. 4783 auto PreCall = std::move(Line); 4784 Line.reset(new UnwrappedLine); 4785 bool OldInExpansion = InExpansion; 4786 InExpansion = true; 4787 // We parse the macro call into a new line. 4788 auto Args = parseMacroCall(); 4789 InExpansion = OldInExpansion; 4790 assert(Line->Tokens.front().Tok == ID); 4791 // And remember the unexpanded macro call tokens. 4792 auto UnexpandedLine = std::move(Line); 4793 // Reset to the old line. 4794 Line = std::move(PreCall); 4795 4796 LLVM_DEBUG({ 4797 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4798 if (Args) { 4799 llvm::dbgs() << "("; 4800 for (const auto &Arg : Args.value()) 4801 for (const auto &T : Arg) 4802 llvm::dbgs() << T->TokenText << " "; 4803 llvm::dbgs() << ")"; 4804 } 4805 llvm::dbgs() << "\n"; 4806 }); 4807 if (Macros.objectLike(ID->TokenText) && Args && 4808 !Macros.hasArity(ID->TokenText, Args->size())) { 4809 // The macro is either 4810 // - object-like, but we got argumnets, or 4811 // - overloaded to be both object-like and function-like, but none of 4812 // the function-like arities match the number of arguments. 4813 // Thus, expand as object-like macro. 4814 LLVM_DEBUG(llvm::dbgs() 4815 << "Macro \"" << ID->TokenText 4816 << "\" not overloaded for arity " << Args->size() 4817 << "or not function-like, using object-like overload."); 4818 Args.reset(); 4819 UnexpandedLine->Tokens.resize(1); 4820 Tokens->setPosition(Position); 4821 nextToken(); 4822 assert(!Args && Macros.objectLike(ID->TokenText)); 4823 } 4824 if ((!Args && Macros.objectLike(ID->TokenText)) || 4825 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4826 // Next, we insert the expanded tokens in the token stream at the 4827 // current position, and continue parsing. 4828 Unexpanded[ID] = std::move(UnexpandedLine); 4829 SmallVector<FormatToken *, 8> Expansion = 4830 Macros.expand(ID, std::move(Args)); 4831 if (!Expansion.empty()) 4832 FormatTok = Tokens->insertTokens(Expansion); 4833 4834 LLVM_DEBUG({ 4835 llvm::dbgs() << "Expanded: "; 4836 for (const auto &T : Expansion) 4837 llvm::dbgs() << T->TokenText << " "; 4838 llvm::dbgs() << "\n"; 4839 }); 4840 } else { 4841 LLVM_DEBUG({ 4842 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4843 << "\", because it was used "; 4844 if (Args) 4845 llvm::dbgs() << "with " << Args->size(); 4846 else 4847 llvm::dbgs() << "without"; 4848 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4849 }); 4850 Tokens->setPosition(Position); 4851 FormatTok = ID; 4852 } 4853 } 4854 4855 if (FormatTok->isNot(tok::comment)) { 4856 distributeComments(Comments, FormatTok); 4857 Comments.clear(); 4858 return; 4859 } 4860 4861 Comments.push_back(FormatTok); 4862 } while (!eof()); 4863 4864 distributeComments(Comments, nullptr); 4865 Comments.clear(); 4866 } 4867 4868 namespace { 4869 template <typename Iterator> 4870 void pushTokens(Iterator Begin, Iterator End, 4871 llvm::SmallVectorImpl<FormatToken *> &Into) { 4872 for (auto I = Begin; I != End; ++I) { 4873 Into.push_back(I->Tok); 4874 for (const auto &Child : I->Children) 4875 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4876 } 4877 } 4878 } // namespace 4879 4880 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4881 UnwrappedLineParser::parseMacroCall() { 4882 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4883 assert(Line->Tokens.empty()); 4884 nextToken(); 4885 if (FormatTok->isNot(tok::l_paren)) 4886 return Args; 4887 unsigned Position = Tokens->getPosition(); 4888 FormatToken *Tok = FormatTok; 4889 nextToken(); 4890 Args.emplace(); 4891 auto ArgStart = std::prev(Line->Tokens.end()); 4892 4893 int Parens = 0; 4894 do { 4895 switch (FormatTok->Tok.getKind()) { 4896 case tok::l_paren: 4897 ++Parens; 4898 nextToken(); 4899 break; 4900 case tok::r_paren: { 4901 if (Parens > 0) { 4902 --Parens; 4903 nextToken(); 4904 break; 4905 } 4906 Args->push_back({}); 4907 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4908 nextToken(); 4909 return Args; 4910 } 4911 case tok::comma: { 4912 if (Parens > 0) { 4913 nextToken(); 4914 break; 4915 } 4916 Args->push_back({}); 4917 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4918 nextToken(); 4919 ArgStart = std::prev(Line->Tokens.end()); 4920 break; 4921 } 4922 default: 4923 nextToken(); 4924 break; 4925 } 4926 } while (!eof()); 4927 Line->Tokens.resize(1); 4928 Tokens->setPosition(Position); 4929 FormatTok = Tok; 4930 return {}; 4931 } 4932 4933 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4934 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4935 if (MustBreakBeforeNextToken) { 4936 Line->Tokens.back().Tok->MustBreakBefore = true; 4937 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 4938 MustBreakBeforeNextToken = false; 4939 } 4940 } 4941 4942 } // end namespace format 4943 } // end namespace clang 4944