1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (SmallVectorImpl<UnwrappedLine>::const_iterator 55 CI = I->Children.begin(), 56 CE = I->Children.end(); 57 CI != CE; ++CI) { 58 OS << "\n"; 59 printLine(OS, *CI, (Prefix + " ").str()); 60 NewLine = true; 61 } 62 } 63 if (!NewLine) 64 OS << "\n"; 65 } 66 67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 68 printLine(llvm::dbgs(), Line); 69 } 70 71 class ScopedDeclarationState { 72 public: 73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 74 bool MustBeDeclaration) 75 : Line(Line), Stack(Stack) { 76 Line.MustBeDeclaration = MustBeDeclaration; 77 Stack.push_back(MustBeDeclaration); 78 } 79 ~ScopedDeclarationState() { 80 Stack.pop_back(); 81 if (!Stack.empty()) 82 Line.MustBeDeclaration = Stack.back(); 83 else 84 Line.MustBeDeclaration = true; 85 } 86 87 private: 88 UnwrappedLine &Line; 89 llvm::BitVector &Stack; 90 }; 91 92 } // end anonymous namespace 93 94 std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) { 95 llvm::raw_os_ostream OS(Stream); 96 printLine(OS, Line); 97 return Stream; 98 } 99 100 class ScopedLineState { 101 public: 102 ScopedLineState(UnwrappedLineParser &Parser, 103 bool SwitchToPreprocessorLines = false) 104 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 105 if (SwitchToPreprocessorLines) 106 Parser.CurrentLines = &Parser.PreprocessorDirectives; 107 else if (!Parser.Line->Tokens.empty()) 108 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 109 PreBlockLine = std::move(Parser.Line); 110 Parser.Line = std::make_unique<UnwrappedLine>(); 111 Parser.Line->Level = PreBlockLine->Level; 112 Parser.Line->PPLevel = PreBlockLine->PPLevel; 113 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 114 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 115 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; 116 } 117 118 ~ScopedLineState() { 119 if (!Parser.Line->Tokens.empty()) 120 Parser.addUnwrappedLine(); 121 assert(Parser.Line->Tokens.empty()); 122 Parser.Line = std::move(PreBlockLine); 123 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 124 Parser.MustBreakBeforeNextToken = true; 125 Parser.CurrentLines = OriginalLines; 126 } 127 128 private: 129 UnwrappedLineParser &Parser; 130 131 std::unique_ptr<UnwrappedLine> PreBlockLine; 132 SmallVectorImpl<UnwrappedLine> *OriginalLines; 133 }; 134 135 class CompoundStatementIndenter { 136 public: 137 CompoundStatementIndenter(UnwrappedLineParser *Parser, 138 const FormatStyle &Style, unsigned &LineLevel) 139 : CompoundStatementIndenter(Parser, LineLevel, 140 Style.BraceWrapping.AfterControlStatement, 141 Style.BraceWrapping.IndentBraces) {} 142 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 143 bool WrapBrace, bool IndentBrace) 144 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 145 if (WrapBrace) 146 Parser->addUnwrappedLine(); 147 if (IndentBrace) 148 ++LineLevel; 149 } 150 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 151 152 private: 153 unsigned &LineLevel; 154 unsigned OldLineLevel; 155 }; 156 157 UnwrappedLineParser::UnwrappedLineParser( 158 SourceManager &SourceMgr, const FormatStyle &Style, 159 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 160 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 161 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 162 IdentifierTable &IdentTable) 163 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 164 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), 165 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), 166 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 167 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 168 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 169 ? IG_Rejected 170 : IG_Inited), 171 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 172 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { 173 assert(IsCpp == LangOpts.CXXOperatorNames); 174 } 175 176 void UnwrappedLineParser::reset() { 177 PPBranchLevel = -1; 178 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 179 ? IG_Rejected 180 : IG_Inited; 181 IncludeGuardToken = nullptr; 182 Line.reset(new UnwrappedLine); 183 CommentsBeforeNextToken.clear(); 184 FormatTok = nullptr; 185 MustBreakBeforeNextToken = false; 186 IsDecltypeAutoFunction = false; 187 PreprocessorDirectives.clear(); 188 CurrentLines = &Lines; 189 DeclarationScopeStack.clear(); 190 NestedTooDeep.clear(); 191 NestedLambdas.clear(); 192 PPStack.clear(); 193 Line->FirstStartColumn = FirstStartColumn; 194 195 if (!Unexpanded.empty()) 196 for (FormatToken *Token : AllTokens) 197 Token->MacroCtx.reset(); 198 CurrentExpandedLines.clear(); 199 ExpandedLines.clear(); 200 Unexpanded.clear(); 201 InExpansion = false; 202 Reconstruct.reset(); 203 } 204 205 void UnwrappedLineParser::parse() { 206 IndexedTokenSource TokenSource(AllTokens); 207 Line->FirstStartColumn = FirstStartColumn; 208 do { 209 LLVM_DEBUG(llvm::dbgs() << "----\n"); 210 reset(); 211 Tokens = &TokenSource; 212 TokenSource.reset(); 213 214 readToken(); 215 parseFile(); 216 217 // If we found an include guard then all preprocessor directives (other than 218 // the guard) are over-indented by one. 219 if (IncludeGuard == IG_Found) { 220 for (auto &Line : Lines) 221 if (Line.InPPDirective && Line.Level > 0) 222 --Line.Level; 223 } 224 225 // Create line with eof token. 226 assert(eof()); 227 pushToken(FormatTok); 228 addUnwrappedLine(); 229 230 // In a first run, format everything with the lines containing macro calls 231 // replaced by the expansion. 232 if (!ExpandedLines.empty()) { 233 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 234 for (const auto &Line : Lines) { 235 if (!Line.Tokens.empty()) { 236 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 237 if (it != ExpandedLines.end()) { 238 for (const auto &Expanded : it->second) { 239 LLVM_DEBUG(printDebugInfo(Expanded)); 240 Callback.consumeUnwrappedLine(Expanded); 241 } 242 continue; 243 } 244 } 245 LLVM_DEBUG(printDebugInfo(Line)); 246 Callback.consumeUnwrappedLine(Line); 247 } 248 Callback.finishRun(); 249 } 250 251 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 252 for (const UnwrappedLine &Line : Lines) { 253 LLVM_DEBUG(printDebugInfo(Line)); 254 Callback.consumeUnwrappedLine(Line); 255 } 256 Callback.finishRun(); 257 Lines.clear(); 258 while (!PPLevelBranchIndex.empty() && 259 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 260 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 261 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 262 } 263 if (!PPLevelBranchIndex.empty()) { 264 ++PPLevelBranchIndex.back(); 265 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 266 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 267 } 268 } while (!PPLevelBranchIndex.empty()); 269 } 270 271 void UnwrappedLineParser::parseFile() { 272 // The top-level context in a file always has declarations, except for pre- 273 // processor directives and JavaScript files. 274 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 275 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 276 MustBeDeclaration); 277 if (Style.Language == FormatStyle::LK_TextProto) 278 parseBracedList(); 279 else 280 parseLevel(); 281 // Make sure to format the remaining tokens. 282 // 283 // LK_TextProto is special since its top-level is parsed as the body of a 284 // braced list, which does not necessarily have natural line separators such 285 // as a semicolon. Comments after the last entry that have been determined to 286 // not belong to that line, as in: 287 // key: value 288 // // endfile comment 289 // do not have a chance to be put on a line of their own until this point. 290 // Here we add this newline before end-of-file comments. 291 if (Style.Language == FormatStyle::LK_TextProto && 292 !CommentsBeforeNextToken.empty()) { 293 addUnwrappedLine(); 294 } 295 flushComments(true); 296 addUnwrappedLine(); 297 } 298 299 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 300 do { 301 switch (FormatTok->Tok.getKind()) { 302 case tok::l_brace: 303 return; 304 default: 305 if (FormatTok->is(Keywords.kw_where)) { 306 addUnwrappedLine(); 307 nextToken(); 308 parseCSharpGenericTypeConstraint(); 309 break; 310 } 311 nextToken(); 312 break; 313 } 314 } while (!eof()); 315 } 316 317 void UnwrappedLineParser::parseCSharpAttribute() { 318 int UnpairedSquareBrackets = 1; 319 do { 320 switch (FormatTok->Tok.getKind()) { 321 case tok::r_square: 322 nextToken(); 323 --UnpairedSquareBrackets; 324 if (UnpairedSquareBrackets == 0) { 325 addUnwrappedLine(); 326 return; 327 } 328 break; 329 case tok::l_square: 330 ++UnpairedSquareBrackets; 331 nextToken(); 332 break; 333 default: 334 nextToken(); 335 break; 336 } 337 } while (!eof()); 338 } 339 340 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 341 if (!Lines.empty() && Lines.back().InPPDirective) 342 return true; 343 344 const FormatToken *Previous = Tokens->getPreviousToken(); 345 return Previous && Previous->is(tok::comment) && 346 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 347 } 348 349 /// \brief Parses a level, that is ???. 350 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 351 /// \param IfKind The \p if statement kind in the level. 352 /// \param IfLeftBrace The left brace of the \p if block in the level. 353 /// \returns true if a simple block of if/else/for/while, or false otherwise. 354 /// (A simple block has a single statement.) 355 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 356 IfStmtKind *IfKind, 357 FormatToken **IfLeftBrace) { 358 const bool InRequiresExpression = 359 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 360 const bool IsPrecededByCommentOrPPDirective = 361 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 362 FormatToken *IfLBrace = nullptr; 363 bool HasDoWhile = false; 364 bool HasLabel = false; 365 unsigned StatementCount = 0; 366 bool SwitchLabelEncountered = false; 367 368 do { 369 if (FormatTok->isAttribute()) { 370 nextToken(); 371 if (FormatTok->is(tok::l_paren)) 372 parseParens(); 373 continue; 374 } 375 tok::TokenKind Kind = FormatTok->Tok.getKind(); 376 if (FormatTok->is(TT_MacroBlockBegin)) 377 Kind = tok::l_brace; 378 else if (FormatTok->is(TT_MacroBlockEnd)) 379 Kind = tok::r_brace; 380 381 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 382 &HasLabel, &StatementCount] { 383 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 384 HasDoWhile ? nullptr : &HasDoWhile, 385 HasLabel ? nullptr : &HasLabel); 386 ++StatementCount; 387 assert(StatementCount > 0 && "StatementCount overflow!"); 388 }; 389 390 switch (Kind) { 391 case tok::comment: 392 nextToken(); 393 addUnwrappedLine(); 394 break; 395 case tok::l_brace: 396 if (InRequiresExpression) { 397 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 398 } else if (FormatTok->Previous && 399 FormatTok->Previous->ClosesRequiresClause) { 400 // We need the 'default' case here to correctly parse a function 401 // l_brace. 402 ParseDefault(); 403 continue; 404 } 405 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { 406 if (tryToParseBracedList()) 407 continue; 408 FormatTok->setFinalizedType(TT_BlockLBrace); 409 } 410 parseBlock(); 411 ++StatementCount; 412 assert(StatementCount > 0 && "StatementCount overflow!"); 413 addUnwrappedLine(); 414 break; 415 case tok::r_brace: 416 if (OpeningBrace) { 417 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 418 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 419 return false; 420 } 421 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 422 HasDoWhile || IsPrecededByCommentOrPPDirective || 423 precededByCommentOrPPDirective()) { 424 return false; 425 } 426 const FormatToken *Next = Tokens->peekNextToken(); 427 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 428 return false; 429 if (IfLeftBrace) 430 *IfLeftBrace = IfLBrace; 431 return true; 432 } 433 nextToken(); 434 addUnwrappedLine(); 435 break; 436 case tok::kw_default: { 437 unsigned StoredPosition = Tokens->getPosition(); 438 auto *Next = Tokens->getNextNonComment(); 439 FormatTok = Tokens->setPosition(StoredPosition); 440 if (!Next->isOneOf(tok::colon, tok::arrow)) { 441 // default not followed by `:` or `->` is not a case label; treat it 442 // like an identifier. 443 parseStructuralElement(); 444 break; 445 } 446 // Else, if it is 'default:', fall through to the case handling. 447 [[fallthrough]]; 448 } 449 case tok::kw_case: 450 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 451 (Style.isJavaScript() && Line->MustBeDeclaration)) { 452 // Proto: there are no switch/case statements 453 // Verilog: Case labels don't have this word. We handle case 454 // labels including default in TokenAnnotator. 455 // JavaScript: A 'case: string' style field declaration. 456 ParseDefault(); 457 break; 458 } 459 if (!SwitchLabelEncountered && 460 (Style.IndentCaseLabels || 461 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) || 462 (Line->InPPDirective && Line->Level == 1))) { 463 ++Line->Level; 464 } 465 SwitchLabelEncountered = true; 466 parseStructuralElement(); 467 break; 468 case tok::l_square: 469 if (Style.isCSharp()) { 470 nextToken(); 471 parseCSharpAttribute(); 472 break; 473 } 474 if (handleCppAttributes()) 475 break; 476 [[fallthrough]]; 477 default: 478 ParseDefault(); 479 break; 480 } 481 } while (!eof()); 482 483 return false; 484 } 485 486 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 487 // We'll parse forward through the tokens until we hit 488 // a closing brace or eof - note that getNextToken() will 489 // parse macros, so this will magically work inside macro 490 // definitions, too. 491 unsigned StoredPosition = Tokens->getPosition(); 492 FormatToken *Tok = FormatTok; 493 const FormatToken *PrevTok = Tok->Previous; 494 // Keep a stack of positions of lbrace tokens. We will 495 // update information about whether an lbrace starts a 496 // braced init list or a different block during the loop. 497 struct StackEntry { 498 FormatToken *Tok; 499 const FormatToken *PrevTok; 500 }; 501 SmallVector<StackEntry, 8> LBraceStack; 502 assert(Tok->is(tok::l_brace)); 503 504 do { 505 auto *NextTok = Tokens->getNextNonComment(); 506 507 if (!Line->InMacroBody && !Style.isTableGen()) { 508 // Skip PPDirective lines and comments. 509 while (NextTok->is(tok::hash)) { 510 NextTok = Tokens->getNextToken(); 511 if (NextTok->is(tok::pp_not_keyword)) 512 break; 513 do { 514 NextTok = Tokens->getNextToken(); 515 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); 516 517 while (NextTok->is(tok::comment)) 518 NextTok = Tokens->getNextToken(); 519 } 520 } 521 522 switch (Tok->Tok.getKind()) { 523 case tok::l_brace: 524 if (Style.isJavaScript() && PrevTok) { 525 if (PrevTok->isOneOf(tok::colon, tok::less)) { 526 // A ':' indicates this code is in a type, or a braced list 527 // following a label in an object literal ({a: {b: 1}}). 528 // A '<' could be an object used in a comparison, but that is nonsense 529 // code (can never return true), so more likely it is a generic type 530 // argument (`X<{a: string; b: number}>`). 531 // The code below could be confused by semicolons between the 532 // individual members in a type member list, which would normally 533 // trigger BK_Block. In both cases, this must be parsed as an inline 534 // braced init. 535 Tok->setBlockKind(BK_BracedInit); 536 } else if (PrevTok->is(tok::r_paren)) { 537 // `) { }` can only occur in function or method declarations in JS. 538 Tok->setBlockKind(BK_Block); 539 } 540 } else { 541 Tok->setBlockKind(BK_Unknown); 542 } 543 LBraceStack.push_back({Tok, PrevTok}); 544 break; 545 case tok::r_brace: 546 if (LBraceStack.empty()) 547 break; 548 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) { 549 bool ProbablyBracedList = false; 550 if (Style.Language == FormatStyle::LK_Proto) { 551 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 552 } else if (LBrace->isNot(TT_EnumLBrace)) { 553 // Using OriginalColumn to distinguish between ObjC methods and 554 // binary operators is a bit hacky. 555 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 556 NextTok->OriginalColumn == 0; 557 558 // Try to detect a braced list. Note that regardless how we mark inner 559 // braces here, we will overwrite the BlockKind later if we parse a 560 // braced list (where all blocks inside are by default braced lists), 561 // or when we explicitly detect blocks (for example while parsing 562 // lambdas). 563 564 // If we already marked the opening brace as braced list, the closing 565 // must also be part of it. 566 ProbablyBracedList = LBrace->is(TT_BracedListLBrace); 567 568 ProbablyBracedList = ProbablyBracedList || 569 (Style.isJavaScript() && 570 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 571 Keywords.kw_as)); 572 ProbablyBracedList = 573 ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || 574 NextTok->is(tok::l_paren))); 575 576 // If there is a comma, semicolon or right paren after the closing 577 // brace, we assume this is a braced initializer list. 578 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 579 // braced list in JS. 580 ProbablyBracedList = 581 ProbablyBracedList || 582 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 583 tok::r_paren, tok::r_square, tok::ellipsis); 584 585 // Distinguish between braced list in a constructor initializer list 586 // followed by constructor body, or just adjacent blocks. 587 ProbablyBracedList = 588 ProbablyBracedList || 589 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 590 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 591 tok::greater)); 592 593 ProbablyBracedList = 594 ProbablyBracedList || 595 (NextTok->is(tok::identifier) && 596 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 597 598 ProbablyBracedList = ProbablyBracedList || 599 (NextTok->is(tok::semi) && 600 (!ExpectClassBody || LBraceStack.size() != 1)); 601 602 ProbablyBracedList = 603 ProbablyBracedList || 604 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 605 606 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 607 // We can have an array subscript after a braced init 608 // list, but C++11 attributes are expected after blocks. 609 NextTok = Tokens->getNextToken(); 610 ProbablyBracedList = NextTok->isNot(tok::l_square); 611 } 612 613 // Cpp macro definition body that is a nonempty braced list or block: 614 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && 615 !FormatTok->Previous && NextTok->is(tok::eof) && 616 // A statement can end with only `;` (simple statement), a block 617 // closing brace (compound statement), or `:` (label statement). 618 // If PrevTok is a block opening brace, Tok ends an empty block. 619 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { 620 ProbablyBracedList = true; 621 } 622 } 623 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block; 624 Tok->setBlockKind(BlockKind); 625 LBrace->setBlockKind(BlockKind); 626 } 627 LBraceStack.pop_back(); 628 break; 629 case tok::identifier: 630 if (Tok->isNot(TT_StatementMacro)) 631 break; 632 [[fallthrough]]; 633 case tok::at: 634 case tok::semi: 635 case tok::kw_if: 636 case tok::kw_while: 637 case tok::kw_for: 638 case tok::kw_switch: 639 case tok::kw_try: 640 case tok::kw___try: 641 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 642 LBraceStack.back().Tok->setBlockKind(BK_Block); 643 break; 644 default: 645 break; 646 } 647 648 PrevTok = Tok; 649 Tok = NextTok; 650 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 651 652 // Assume other blocks for all unclosed opening braces. 653 for (const auto &Entry : LBraceStack) 654 if (Entry.Tok->is(BK_Unknown)) 655 Entry.Tok->setBlockKind(BK_Block); 656 657 FormatTok = Tokens->setPosition(StoredPosition); 658 } 659 660 // Sets the token type of the directly previous right brace. 661 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 662 if (auto Prev = FormatTok->getPreviousNonComment(); 663 Prev && Prev->is(tok::r_brace)) { 664 Prev->setFinalizedType(Type); 665 } 666 } 667 668 template <class T> 669 static inline void hash_combine(std::size_t &seed, const T &v) { 670 std::hash<T> hasher; 671 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 672 } 673 674 size_t UnwrappedLineParser::computePPHash() const { 675 size_t h = 0; 676 for (const auto &i : PPStack) { 677 hash_combine(h, size_t(i.Kind)); 678 hash_combine(h, i.Line); 679 } 680 return h; 681 } 682 683 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 684 // is not null, subtracts its length (plus the preceding space) when computing 685 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 686 // running the token annotator on it so that we can restore them afterward. 687 bool UnwrappedLineParser::mightFitOnOneLine( 688 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 689 const auto ColumnLimit = Style.ColumnLimit; 690 if (ColumnLimit == 0) 691 return true; 692 693 auto &Tokens = ParsedLine.Tokens; 694 assert(!Tokens.empty()); 695 696 const auto *LastToken = Tokens.back().Tok; 697 assert(LastToken); 698 699 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 700 701 int Index = 0; 702 for (const auto &Token : Tokens) { 703 assert(Token.Tok); 704 auto &SavedToken = SavedTokens[Index++]; 705 SavedToken.Tok = new FormatToken; 706 SavedToken.Tok->copyFrom(*Token.Tok); 707 SavedToken.Children = std::move(Token.Children); 708 } 709 710 AnnotatedLine Line(ParsedLine); 711 assert(Line.Last == LastToken); 712 713 TokenAnnotator Annotator(Style, Keywords); 714 Annotator.annotate(Line); 715 Annotator.calculateFormattingInformation(Line); 716 717 auto Length = LastToken->TotalLength; 718 if (OpeningBrace) { 719 assert(OpeningBrace != Tokens.front().Tok); 720 if (auto Prev = OpeningBrace->Previous; 721 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 722 Length -= ColumnLimit; 723 } 724 Length -= OpeningBrace->TokenText.size() + 1; 725 } 726 727 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 728 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 729 Length -= FirstToken->TokenText.size() + 1; 730 } 731 732 Index = 0; 733 for (auto &Token : Tokens) { 734 const auto &SavedToken = SavedTokens[Index++]; 735 Token.Tok->copyFrom(*SavedToken.Tok); 736 Token.Children = std::move(SavedToken.Children); 737 delete SavedToken.Tok; 738 } 739 740 // If these change PPLevel needs to be used for get correct indentation. 741 assert(!Line.InMacroBody); 742 assert(!Line.InPPDirective); 743 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 744 } 745 746 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 747 unsigned AddLevels, bool MunchSemi, 748 bool KeepBraces, 749 IfStmtKind *IfKind, 750 bool UnindentWhitesmithsBraces) { 751 auto HandleVerilogBlockLabel = [this]() { 752 // ":" name 753 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 754 nextToken(); 755 if (Keywords.isVerilogIdentifier(*FormatTok)) 756 nextToken(); 757 } 758 }; 759 760 // Whether this is a Verilog-specific block that has a special header like a 761 // module. 762 const bool VerilogHierarchy = 763 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 764 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 765 (Style.isVerilog() && 766 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 767 "'{' or macro block token expected"); 768 FormatToken *Tok = FormatTok; 769 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 770 auto Index = CurrentLines->size(); 771 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 772 FormatTok->setBlockKind(BK_Block); 773 774 // For Whitesmiths mode, jump to the next level prior to skipping over the 775 // braces. 776 if (!VerilogHierarchy && AddLevels > 0 && 777 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 778 ++Line->Level; 779 } 780 781 size_t PPStartHash = computePPHash(); 782 783 const unsigned InitialLevel = Line->Level; 784 if (VerilogHierarchy) { 785 AddLevels += parseVerilogHierarchyHeader(); 786 } else { 787 nextToken(/*LevelDifference=*/AddLevels); 788 HandleVerilogBlockLabel(); 789 } 790 791 // Bail out if there are too many levels. Otherwise, the stack might overflow. 792 if (Line->Level > 300) 793 return nullptr; 794 795 if (MacroBlock && FormatTok->is(tok::l_paren)) 796 parseParens(); 797 798 size_t NbPreprocessorDirectives = 799 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 800 addUnwrappedLine(); 801 size_t OpeningLineIndex = 802 CurrentLines->empty() 803 ? (UnwrappedLine::kInvalidIndex) 804 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 805 806 // Whitesmiths is weird here. The brace needs to be indented for the namespace 807 // block, but the block itself may not be indented depending on the style 808 // settings. This allows the format to back up one level in those cases. 809 if (UnindentWhitesmithsBraces) 810 --Line->Level; 811 812 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 813 MustBeDeclaration); 814 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 815 Line->Level += AddLevels; 816 817 FormatToken *IfLBrace = nullptr; 818 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 819 820 if (eof()) 821 return IfLBrace; 822 823 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 824 : FormatTok->isNot(tok::r_brace)) { 825 Line->Level = InitialLevel; 826 FormatTok->setBlockKind(BK_Block); 827 return IfLBrace; 828 } 829 830 if (FormatTok->is(tok::r_brace)) { 831 FormatTok->setBlockKind(BK_Block); 832 if (Tok->is(TT_NamespaceLBrace)) 833 FormatTok->setFinalizedType(TT_NamespaceRBrace); 834 } 835 836 const bool IsFunctionRBrace = 837 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 838 839 auto RemoveBraces = [=]() mutable { 840 if (!SimpleBlock) 841 return false; 842 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 843 assert(FormatTok->is(tok::r_brace)); 844 const bool WrappedOpeningBrace = !Tok->Previous; 845 if (WrappedOpeningBrace && FollowedByComment) 846 return false; 847 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 848 if (KeepBraces && !HasRequiredIfBraces) 849 return false; 850 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 851 const FormatToken *Previous = Tokens->getPreviousToken(); 852 assert(Previous); 853 if (Previous->is(tok::r_brace) && !Previous->Optional) 854 return false; 855 } 856 assert(!CurrentLines->empty()); 857 auto &LastLine = CurrentLines->back(); 858 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 859 return false; 860 if (Tok->is(TT_ElseLBrace)) 861 return true; 862 if (WrappedOpeningBrace) { 863 assert(Index > 0); 864 --Index; // The line above the wrapped l_brace. 865 Tok = nullptr; 866 } 867 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 868 }; 869 if (RemoveBraces()) { 870 Tok->MatchingParen = FormatTok; 871 FormatTok->MatchingParen = Tok; 872 } 873 874 size_t PPEndHash = computePPHash(); 875 876 // Munch the closing brace. 877 nextToken(/*LevelDifference=*/-AddLevels); 878 879 // When this is a function block and there is an unnecessary semicolon 880 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 881 // it later). 882 if (Style.RemoveSemicolon && IsFunctionRBrace) { 883 while (FormatTok->is(tok::semi)) { 884 FormatTok->Optional = true; 885 nextToken(); 886 } 887 } 888 889 HandleVerilogBlockLabel(); 890 891 if (MacroBlock && FormatTok->is(tok::l_paren)) 892 parseParens(); 893 894 Line->Level = InitialLevel; 895 896 if (FormatTok->is(tok::kw_noexcept)) { 897 // A noexcept in a requires expression. 898 nextToken(); 899 } 900 901 if (FormatTok->is(tok::arrow)) { 902 // Following the } or noexcept we can find a trailing return type arrow 903 // as part of an implicit conversion constraint. 904 nextToken(); 905 parseStructuralElement(); 906 } 907 908 if (MunchSemi && FormatTok->is(tok::semi)) 909 nextToken(); 910 911 if (PPStartHash == PPEndHash) { 912 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 913 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 914 // Update the opening line to add the forward reference as well 915 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 916 CurrentLines->size() - 1; 917 } 918 } 919 920 return IfLBrace; 921 } 922 923 static bool isGoogScope(const UnwrappedLine &Line) { 924 // FIXME: Closure-library specific stuff should not be hard-coded but be 925 // configurable. 926 if (Line.Tokens.size() < 4) 927 return false; 928 auto I = Line.Tokens.begin(); 929 if (I->Tok->TokenText != "goog") 930 return false; 931 ++I; 932 if (I->Tok->isNot(tok::period)) 933 return false; 934 ++I; 935 if (I->Tok->TokenText != "scope") 936 return false; 937 ++I; 938 return I->Tok->is(tok::l_paren); 939 } 940 941 static bool isIIFE(const UnwrappedLine &Line, 942 const AdditionalKeywords &Keywords) { 943 // Look for the start of an immediately invoked anonymous function. 944 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 945 // This is commonly done in JavaScript to create a new, anonymous scope. 946 // Example: (function() { ... })() 947 if (Line.Tokens.size() < 3) 948 return false; 949 auto I = Line.Tokens.begin(); 950 if (I->Tok->isNot(tok::l_paren)) 951 return false; 952 ++I; 953 if (I->Tok->isNot(Keywords.kw_function)) 954 return false; 955 ++I; 956 return I->Tok->is(tok::l_paren); 957 } 958 959 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 960 const FormatToken &InitialToken) { 961 tok::TokenKind Kind = InitialToken.Tok.getKind(); 962 if (InitialToken.is(TT_NamespaceMacro)) 963 Kind = tok::kw_namespace; 964 965 switch (Kind) { 966 case tok::kw_namespace: 967 return Style.BraceWrapping.AfterNamespace; 968 case tok::kw_class: 969 return Style.BraceWrapping.AfterClass; 970 case tok::kw_union: 971 return Style.BraceWrapping.AfterUnion; 972 case tok::kw_struct: 973 return Style.BraceWrapping.AfterStruct; 974 case tok::kw_enum: 975 return Style.BraceWrapping.AfterEnum; 976 default: 977 return false; 978 } 979 } 980 981 void UnwrappedLineParser::parseChildBlock() { 982 assert(FormatTok->is(tok::l_brace)); 983 FormatTok->setBlockKind(BK_Block); 984 const FormatToken *OpeningBrace = FormatTok; 985 nextToken(); 986 { 987 bool SkipIndent = (Style.isJavaScript() && 988 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 989 ScopedLineState LineState(*this); 990 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 991 /*MustBeDeclaration=*/false); 992 Line->Level += SkipIndent ? 0 : 1; 993 parseLevel(OpeningBrace); 994 flushComments(isOnNewLine(*FormatTok)); 995 Line->Level -= SkipIndent ? 0 : 1; 996 } 997 nextToken(); 998 } 999 1000 void UnwrappedLineParser::parsePPDirective() { 1001 assert(FormatTok->is(tok::hash) && "'#' expected"); 1002 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1003 1004 nextToken(); 1005 1006 if (!FormatTok->Tok.getIdentifierInfo()) { 1007 parsePPUnknown(); 1008 return; 1009 } 1010 1011 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1012 case tok::pp_define: 1013 parsePPDefine(); 1014 return; 1015 case tok::pp_if: 1016 parsePPIf(/*IfDef=*/false); 1017 break; 1018 case tok::pp_ifdef: 1019 case tok::pp_ifndef: 1020 parsePPIf(/*IfDef=*/true); 1021 break; 1022 case tok::pp_else: 1023 case tok::pp_elifdef: 1024 case tok::pp_elifndef: 1025 case tok::pp_elif: 1026 parsePPElse(); 1027 break; 1028 case tok::pp_endif: 1029 parsePPEndIf(); 1030 break; 1031 case tok::pp_pragma: 1032 parsePPPragma(); 1033 break; 1034 default: 1035 parsePPUnknown(); 1036 break; 1037 } 1038 } 1039 1040 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1041 size_t Line = CurrentLines->size(); 1042 if (CurrentLines == &PreprocessorDirectives) 1043 Line += Lines.size(); 1044 1045 if (Unreachable || 1046 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1047 PPStack.push_back({PP_Unreachable, Line}); 1048 } else { 1049 PPStack.push_back({PP_Conditional, Line}); 1050 } 1051 } 1052 1053 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1054 ++PPBranchLevel; 1055 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1056 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1057 PPLevelBranchIndex.push_back(0); 1058 PPLevelBranchCount.push_back(0); 1059 } 1060 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1061 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1062 conditionalCompilationCondition(Unreachable || Skip); 1063 } 1064 1065 void UnwrappedLineParser::conditionalCompilationAlternative() { 1066 if (!PPStack.empty()) 1067 PPStack.pop_back(); 1068 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1069 if (!PPChainBranchIndex.empty()) 1070 ++PPChainBranchIndex.top(); 1071 conditionalCompilationCondition( 1072 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1073 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1074 } 1075 1076 void UnwrappedLineParser::conditionalCompilationEnd() { 1077 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1078 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1079 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1080 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1081 } 1082 // Guard against #endif's without #if. 1083 if (PPBranchLevel > -1) 1084 --PPBranchLevel; 1085 if (!PPChainBranchIndex.empty()) 1086 PPChainBranchIndex.pop(); 1087 if (!PPStack.empty()) 1088 PPStack.pop_back(); 1089 } 1090 1091 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1092 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1093 nextToken(); 1094 bool Unreachable = false; 1095 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1096 Unreachable = true; 1097 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1098 Unreachable = true; 1099 conditionalCompilationStart(Unreachable); 1100 FormatToken *IfCondition = FormatTok; 1101 // If there's a #ifndef on the first line, and the only lines before it are 1102 // comments, it could be an include guard. 1103 bool MaybeIncludeGuard = IfNDef; 1104 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1105 for (auto &Line : Lines) { 1106 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1107 MaybeIncludeGuard = false; 1108 IncludeGuard = IG_Rejected; 1109 break; 1110 } 1111 } 1112 } 1113 --PPBranchLevel; 1114 parsePPUnknown(); 1115 ++PPBranchLevel; 1116 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1117 IncludeGuard = IG_IfNdefed; 1118 IncludeGuardToken = IfCondition; 1119 } 1120 } 1121 1122 void UnwrappedLineParser::parsePPElse() { 1123 // If a potential include guard has an #else, it's not an include guard. 1124 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1125 IncludeGuard = IG_Rejected; 1126 // Don't crash when there is an #else without an #if. 1127 assert(PPBranchLevel >= -1); 1128 if (PPBranchLevel == -1) 1129 conditionalCompilationStart(/*Unreachable=*/true); 1130 conditionalCompilationAlternative(); 1131 --PPBranchLevel; 1132 parsePPUnknown(); 1133 ++PPBranchLevel; 1134 } 1135 1136 void UnwrappedLineParser::parsePPEndIf() { 1137 conditionalCompilationEnd(); 1138 parsePPUnknown(); 1139 // If the #endif of a potential include guard is the last thing in the file, 1140 // then we found an include guard. 1141 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1142 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1143 IncludeGuard = IG_Found; 1144 } 1145 } 1146 1147 void UnwrappedLineParser::parsePPDefine() { 1148 nextToken(); 1149 1150 if (!FormatTok->Tok.getIdentifierInfo()) { 1151 IncludeGuard = IG_Rejected; 1152 IncludeGuardToken = nullptr; 1153 parsePPUnknown(); 1154 return; 1155 } 1156 1157 if (IncludeGuard == IG_IfNdefed && 1158 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1159 IncludeGuard = IG_Defined; 1160 IncludeGuardToken = nullptr; 1161 for (auto &Line : Lines) { 1162 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1163 IncludeGuard = IG_Rejected; 1164 break; 1165 } 1166 } 1167 } 1168 1169 // In the context of a define, even keywords should be treated as normal 1170 // identifiers. Setting the kind to identifier is not enough, because we need 1171 // to treat additional keywords like __except as well, which are already 1172 // identifiers. Setting the identifier info to null interferes with include 1173 // guard processing above, and changes preprocessing nesting. 1174 FormatTok->Tok.setKind(tok::identifier); 1175 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1176 nextToken(); 1177 if (FormatTok->Tok.getKind() == tok::l_paren && 1178 !FormatTok->hasWhitespaceBefore()) { 1179 parseParens(); 1180 } 1181 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1182 Line->Level += PPBranchLevel + 1; 1183 addUnwrappedLine(); 1184 ++Line->Level; 1185 1186 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1187 assert((int)Line->PPLevel >= 0); 1188 Line->InMacroBody = true; 1189 1190 if (Style.SkipMacroDefinitionBody) { 1191 while (!eof()) { 1192 FormatTok->Finalized = true; 1193 FormatTok = Tokens->getNextToken(); 1194 } 1195 addUnwrappedLine(); 1196 return; 1197 } 1198 1199 // Errors during a preprocessor directive can only affect the layout of the 1200 // preprocessor directive, and thus we ignore them. An alternative approach 1201 // would be to use the same approach we use on the file level (no 1202 // re-indentation if there was a structural error) within the macro 1203 // definition. 1204 parseFile(); 1205 } 1206 1207 void UnwrappedLineParser::parsePPPragma() { 1208 Line->InPragmaDirective = true; 1209 parsePPUnknown(); 1210 } 1211 1212 void UnwrappedLineParser::parsePPUnknown() { 1213 do { 1214 nextToken(); 1215 } while (!eof()); 1216 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1217 Line->Level += PPBranchLevel + 1; 1218 addUnwrappedLine(); 1219 } 1220 1221 // Here we exclude certain tokens that are not usually the first token in an 1222 // unwrapped line. This is used in attempt to distinguish macro calls without 1223 // trailing semicolons from other constructs split to several lines. 1224 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1225 // Semicolon can be a null-statement, l_square can be a start of a macro or 1226 // a C++11 attribute, but this doesn't seem to be common. 1227 return !Tok.isOneOf(tok::semi, tok::l_brace, 1228 // Tokens that can only be used as binary operators and a 1229 // part of overloaded operator names. 1230 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1231 tok::less, tok::greater, tok::slash, tok::percent, 1232 tok::lessless, tok::greatergreater, tok::equal, 1233 tok::plusequal, tok::minusequal, tok::starequal, 1234 tok::slashequal, tok::percentequal, tok::ampequal, 1235 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1236 tok::lesslessequal, 1237 // Colon is used in labels, base class lists, initializer 1238 // lists, range-based for loops, ternary operator, but 1239 // should never be the first token in an unwrapped line. 1240 tok::colon, 1241 // 'noexcept' is a trailing annotation. 1242 tok::kw_noexcept); 1243 } 1244 1245 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1246 const FormatToken *FormatTok) { 1247 // FIXME: This returns true for C/C++ keywords like 'struct'. 1248 return FormatTok->is(tok::identifier) && 1249 (!FormatTok->Tok.getIdentifierInfo() || 1250 !FormatTok->isOneOf( 1251 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1252 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1253 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1254 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1255 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1256 Keywords.kw_instanceof, Keywords.kw_interface, 1257 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1258 } 1259 1260 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1261 const FormatToken *FormatTok) { 1262 return FormatTok->Tok.isLiteral() || 1263 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1264 mustBeJSIdent(Keywords, FormatTok); 1265 } 1266 1267 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1268 // when encountered after a value (see mustBeJSIdentOrValue). 1269 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1270 const FormatToken *FormatTok) { 1271 return FormatTok->isOneOf( 1272 tok::kw_return, Keywords.kw_yield, 1273 // conditionals 1274 tok::kw_if, tok::kw_else, 1275 // loops 1276 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1277 // switch/case 1278 tok::kw_switch, tok::kw_case, 1279 // exceptions 1280 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1281 // declaration 1282 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1283 Keywords.kw_async, Keywords.kw_function, 1284 // import/export 1285 Keywords.kw_import, tok::kw_export); 1286 } 1287 1288 // Checks whether a token is a type in K&R C (aka C78). 1289 static bool isC78Type(const FormatToken &Tok) { 1290 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1291 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1292 tok::identifier); 1293 } 1294 1295 // This function checks whether a token starts the first parameter declaration 1296 // in a K&R C (aka C78) function definition, e.g.: 1297 // int f(a, b) 1298 // short a, b; 1299 // { 1300 // return a + b; 1301 // } 1302 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1303 const FormatToken *FuncName) { 1304 assert(Tok); 1305 assert(Next); 1306 assert(FuncName); 1307 1308 if (FuncName->isNot(tok::identifier)) 1309 return false; 1310 1311 const FormatToken *Prev = FuncName->Previous; 1312 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1313 return false; 1314 1315 if (!isC78Type(*Tok) && 1316 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1317 return false; 1318 } 1319 1320 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1321 return false; 1322 1323 Tok = Tok->Previous; 1324 if (!Tok || Tok->isNot(tok::r_paren)) 1325 return false; 1326 1327 Tok = Tok->Previous; 1328 if (!Tok || Tok->isNot(tok::identifier)) 1329 return false; 1330 1331 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1332 } 1333 1334 bool UnwrappedLineParser::parseModuleImport() { 1335 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1336 1337 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1338 !Token->Tok.getIdentifierInfo() && 1339 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1340 return false; 1341 } 1342 1343 nextToken(); 1344 while (!eof()) { 1345 if (FormatTok->is(tok::colon)) { 1346 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1347 } 1348 // Handle import <foo/bar.h> as we would an include statement. 1349 else if (FormatTok->is(tok::less)) { 1350 nextToken(); 1351 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1352 // Mark tokens up to the trailing line comments as implicit string 1353 // literals. 1354 if (FormatTok->isNot(tok::comment) && 1355 !FormatTok->TokenText.starts_with("//")) { 1356 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1357 } 1358 nextToken(); 1359 } 1360 } 1361 if (FormatTok->is(tok::semi)) { 1362 nextToken(); 1363 break; 1364 } 1365 nextToken(); 1366 } 1367 1368 addUnwrappedLine(); 1369 return true; 1370 } 1371 1372 // readTokenWithJavaScriptASI reads the next token and terminates the current 1373 // line if JavaScript Automatic Semicolon Insertion must 1374 // happen between the current token and the next token. 1375 // 1376 // This method is conservative - it cannot cover all edge cases of JavaScript, 1377 // but only aims to correctly handle certain well known cases. It *must not* 1378 // return true in speculative cases. 1379 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1380 FormatToken *Previous = FormatTok; 1381 readToken(); 1382 FormatToken *Next = FormatTok; 1383 1384 bool IsOnSameLine = 1385 CommentsBeforeNextToken.empty() 1386 ? Next->NewlinesBefore == 0 1387 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1388 if (IsOnSameLine) 1389 return; 1390 1391 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1392 bool PreviousStartsTemplateExpr = 1393 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1394 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1395 // If the line contains an '@' sign, the previous token might be an 1396 // annotation, which can precede another identifier/value. 1397 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1398 return LineNode.Tok->is(tok::at); 1399 }); 1400 if (HasAt) 1401 return; 1402 } 1403 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1404 return addUnwrappedLine(); 1405 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1406 bool NextEndsTemplateExpr = 1407 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1408 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1409 (PreviousMustBeValue || 1410 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1411 tok::minusminus))) { 1412 return addUnwrappedLine(); 1413 } 1414 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1415 isJSDeclOrStmt(Keywords, Next)) { 1416 return addUnwrappedLine(); 1417 } 1418 } 1419 1420 void UnwrappedLineParser::parseStructuralElement( 1421 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1422 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1423 if (Style.Language == FormatStyle::LK_TableGen && 1424 FormatTok->is(tok::pp_include)) { 1425 nextToken(); 1426 if (FormatTok->is(tok::string_literal)) 1427 nextToken(); 1428 addUnwrappedLine(); 1429 return; 1430 } 1431 1432 if (IsCpp) { 1433 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1434 } 1435 } else if (Style.isVerilog()) { 1436 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1437 parseForOrWhileLoop(/*HasParens=*/false); 1438 return; 1439 } 1440 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1441 parseForOrWhileLoop(); 1442 return; 1443 } 1444 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1445 Keywords.kw_assume, Keywords.kw_cover)) { 1446 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1447 return; 1448 } 1449 1450 // Skip things that can exist before keywords like 'if' and 'case'. 1451 while (true) { 1452 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1453 Keywords.kw_unique0)) { 1454 nextToken(); 1455 } else if (FormatTok->is(tok::l_paren) && 1456 Tokens->peekNextToken()->is(tok::star)) { 1457 parseParens(); 1458 } else { 1459 break; 1460 } 1461 } 1462 } 1463 1464 // Tokens that only make sense at the beginning of a line. 1465 if (FormatTok->isAccessSpecifierKeyword()) { 1466 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1467 Style.isCSharp()) { 1468 nextToken(); 1469 } else { 1470 parseAccessSpecifier(); 1471 } 1472 return; 1473 } 1474 switch (FormatTok->Tok.getKind()) { 1475 case tok::kw_asm: 1476 nextToken(); 1477 if (FormatTok->is(tok::l_brace)) { 1478 FormatTok->setFinalizedType(TT_InlineASMBrace); 1479 nextToken(); 1480 while (FormatTok && !eof()) { 1481 if (FormatTok->is(tok::r_brace)) { 1482 FormatTok->setFinalizedType(TT_InlineASMBrace); 1483 nextToken(); 1484 addUnwrappedLine(); 1485 break; 1486 } 1487 FormatTok->Finalized = true; 1488 nextToken(); 1489 } 1490 } 1491 break; 1492 case tok::kw_namespace: 1493 parseNamespace(); 1494 return; 1495 case tok::kw_if: { 1496 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1497 // field/method declaration. 1498 break; 1499 } 1500 FormatToken *Tok = parseIfThenElse(IfKind); 1501 if (IfLeftBrace) 1502 *IfLeftBrace = Tok; 1503 return; 1504 } 1505 case tok::kw_for: 1506 case tok::kw_while: 1507 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1508 // field/method declaration. 1509 break; 1510 } 1511 parseForOrWhileLoop(); 1512 return; 1513 case tok::kw_do: 1514 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1515 // field/method declaration. 1516 break; 1517 } 1518 parseDoWhile(); 1519 if (HasDoWhile) 1520 *HasDoWhile = true; 1521 return; 1522 case tok::kw_switch: 1523 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1524 // 'switch: string' field declaration. 1525 break; 1526 } 1527 parseSwitch(/*IsExpr=*/false); 1528 return; 1529 case tok::kw_default: { 1530 // In Verilog default along with other labels are handled in the next loop. 1531 if (Style.isVerilog()) 1532 break; 1533 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1534 // 'default: string' field declaration. 1535 break; 1536 } 1537 auto *Default = FormatTok; 1538 nextToken(); 1539 if (FormatTok->is(tok::colon)) { 1540 FormatTok->setFinalizedType(TT_CaseLabelColon); 1541 parseLabel(); 1542 return; 1543 } 1544 if (FormatTok->is(tok::arrow)) { 1545 FormatTok->setFinalizedType(TT_CaseLabelArrow); 1546 Default->setFinalizedType(TT_SwitchExpressionLabel); 1547 parseLabel(); 1548 return; 1549 } 1550 // e.g. "default void f() {}" in a Java interface. 1551 break; 1552 } 1553 case tok::kw_case: 1554 // Proto: there are no switch/case statements. 1555 if (Style.Language == FormatStyle::LK_Proto) { 1556 nextToken(); 1557 return; 1558 } 1559 if (Style.isVerilog()) { 1560 parseBlock(); 1561 addUnwrappedLine(); 1562 return; 1563 } 1564 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1565 // 'case: string' field declaration. 1566 nextToken(); 1567 break; 1568 } 1569 parseCaseLabel(); 1570 return; 1571 case tok::kw_try: 1572 case tok::kw___try: 1573 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1574 // field/method declaration. 1575 break; 1576 } 1577 parseTryCatch(); 1578 return; 1579 case tok::kw_extern: 1580 nextToken(); 1581 if (Style.isVerilog()) { 1582 // In Verilog and extern module declaration looks like a start of module. 1583 // But there is no body and endmodule. So we handle it separately. 1584 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1585 parseVerilogHierarchyHeader(); 1586 return; 1587 } 1588 } else if (FormatTok->is(tok::string_literal)) { 1589 nextToken(); 1590 if (FormatTok->is(tok::l_brace)) { 1591 if (Style.BraceWrapping.AfterExternBlock) 1592 addUnwrappedLine(); 1593 // Either we indent or for backwards compatibility we follow the 1594 // AfterExternBlock style. 1595 unsigned AddLevels = 1596 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1597 (Style.BraceWrapping.AfterExternBlock && 1598 Style.IndentExternBlock == 1599 FormatStyle::IEBS_AfterExternBlock) 1600 ? 1u 1601 : 0u; 1602 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1603 addUnwrappedLine(); 1604 return; 1605 } 1606 } 1607 break; 1608 case tok::kw_export: 1609 if (Style.isJavaScript()) { 1610 parseJavaScriptEs6ImportExport(); 1611 return; 1612 } 1613 if (IsCpp) { 1614 nextToken(); 1615 if (FormatTok->is(tok::kw_namespace)) { 1616 parseNamespace(); 1617 return; 1618 } 1619 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1620 return; 1621 } 1622 break; 1623 case tok::kw_inline: 1624 nextToken(); 1625 if (FormatTok->is(tok::kw_namespace)) { 1626 parseNamespace(); 1627 return; 1628 } 1629 break; 1630 case tok::identifier: 1631 if (FormatTok->is(TT_ForEachMacro)) { 1632 parseForOrWhileLoop(); 1633 return; 1634 } 1635 if (FormatTok->is(TT_MacroBlockBegin)) { 1636 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1637 /*MunchSemi=*/false); 1638 return; 1639 } 1640 if (FormatTok->is(Keywords.kw_import)) { 1641 if (Style.isJavaScript()) { 1642 parseJavaScriptEs6ImportExport(); 1643 return; 1644 } 1645 if (Style.Language == FormatStyle::LK_Proto) { 1646 nextToken(); 1647 if (FormatTok->is(tok::kw_public)) 1648 nextToken(); 1649 if (FormatTok->isNot(tok::string_literal)) 1650 return; 1651 nextToken(); 1652 if (FormatTok->is(tok::semi)) 1653 nextToken(); 1654 addUnwrappedLine(); 1655 return; 1656 } 1657 if (IsCpp && parseModuleImport()) 1658 return; 1659 } 1660 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1661 Keywords.kw_slots, Keywords.kw_qslots)) { 1662 nextToken(); 1663 if (FormatTok->is(tok::colon)) { 1664 nextToken(); 1665 addUnwrappedLine(); 1666 return; 1667 } 1668 } 1669 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 1670 parseStatementMacro(); 1671 return; 1672 } 1673 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) { 1674 parseNamespace(); 1675 return; 1676 } 1677 // In Verilog labels can be any expression, so we don't do them here. 1678 // JS doesn't have macros, and within classes colons indicate fields, not 1679 // labels. 1680 // TableGen doesn't have labels. 1681 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1682 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1683 nextToken(); 1684 if (!Line->InMacroBody || CurrentLines->size() > 1) 1685 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1686 FormatTok->setFinalizedType(TT_GotoLabelColon); 1687 parseLabel(!Style.IndentGotoLabels); 1688 if (HasLabel) 1689 *HasLabel = true; 1690 return; 1691 } 1692 // In all other cases, parse the declaration. 1693 break; 1694 default: 1695 break; 1696 } 1697 1698 for (const bool InRequiresExpression = 1699 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 1700 !eof();) { 1701 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { 1702 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); 1703 Next && Next->isBinaryOperator()) { 1704 FormatTok->Tok.setKind(tok::identifier); 1705 } 1706 } 1707 const FormatToken *Previous = FormatTok->Previous; 1708 switch (FormatTok->Tok.getKind()) { 1709 case tok::at: 1710 nextToken(); 1711 if (FormatTok->is(tok::l_brace)) { 1712 nextToken(); 1713 parseBracedList(); 1714 break; 1715 } else if (Style.Language == FormatStyle::LK_Java && 1716 FormatTok->is(Keywords.kw_interface)) { 1717 nextToken(); 1718 break; 1719 } 1720 switch (FormatTok->Tok.getObjCKeywordID()) { 1721 case tok::objc_public: 1722 case tok::objc_protected: 1723 case tok::objc_package: 1724 case tok::objc_private: 1725 return parseAccessSpecifier(); 1726 case tok::objc_interface: 1727 case tok::objc_implementation: 1728 return parseObjCInterfaceOrImplementation(); 1729 case tok::objc_protocol: 1730 if (parseObjCProtocol()) 1731 return; 1732 break; 1733 case tok::objc_end: 1734 return; // Handled by the caller. 1735 case tok::objc_optional: 1736 case tok::objc_required: 1737 nextToken(); 1738 addUnwrappedLine(); 1739 return; 1740 case tok::objc_autoreleasepool: 1741 nextToken(); 1742 if (FormatTok->is(tok::l_brace)) { 1743 if (Style.BraceWrapping.AfterControlStatement == 1744 FormatStyle::BWACS_Always) { 1745 addUnwrappedLine(); 1746 } 1747 parseBlock(); 1748 } 1749 addUnwrappedLine(); 1750 return; 1751 case tok::objc_synchronized: 1752 nextToken(); 1753 if (FormatTok->is(tok::l_paren)) { 1754 // Skip synchronization object 1755 parseParens(); 1756 } 1757 if (FormatTok->is(tok::l_brace)) { 1758 if (Style.BraceWrapping.AfterControlStatement == 1759 FormatStyle::BWACS_Always) { 1760 addUnwrappedLine(); 1761 } 1762 parseBlock(); 1763 } 1764 addUnwrappedLine(); 1765 return; 1766 case tok::objc_try: 1767 // This branch isn't strictly necessary (the kw_try case below would 1768 // do this too after the tok::at is parsed above). But be explicit. 1769 parseTryCatch(); 1770 return; 1771 default: 1772 break; 1773 } 1774 break; 1775 case tok::kw_requires: { 1776 if (IsCpp) { 1777 bool ParsedClause = parseRequires(); 1778 if (ParsedClause) 1779 return; 1780 } else { 1781 nextToken(); 1782 } 1783 break; 1784 } 1785 case tok::kw_enum: 1786 // Ignore if this is part of "template <enum ..." or "... -> enum" or 1787 // "template <..., enum ...>". 1788 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) { 1789 nextToken(); 1790 break; 1791 } 1792 1793 // parseEnum falls through and does not yet add an unwrapped line as an 1794 // enum definition can start a structural element. 1795 if (!parseEnum()) 1796 break; 1797 // This only applies to C++ and Verilog. 1798 if (!IsCpp && !Style.isVerilog()) { 1799 addUnwrappedLine(); 1800 return; 1801 } 1802 break; 1803 case tok::kw_typedef: 1804 nextToken(); 1805 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1806 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1807 Keywords.kw_CF_CLOSED_ENUM, 1808 Keywords.kw_NS_CLOSED_ENUM)) { 1809 parseEnum(); 1810 } 1811 break; 1812 case tok::kw_class: 1813 if (Style.isVerilog()) { 1814 parseBlock(); 1815 addUnwrappedLine(); 1816 return; 1817 } 1818 if (Style.isTableGen()) { 1819 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1820 // This is same as def and so on. 1821 nextToken(); 1822 break; 1823 } 1824 [[fallthrough]]; 1825 case tok::kw_struct: 1826 case tok::kw_union: 1827 if (parseStructLike()) 1828 return; 1829 break; 1830 case tok::kw_decltype: 1831 nextToken(); 1832 if (FormatTok->is(tok::l_paren)) { 1833 parseParens(); 1834 assert(FormatTok->Previous); 1835 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1836 tok::l_paren)) { 1837 Line->SeenDecltypeAuto = true; 1838 } 1839 } 1840 break; 1841 case tok::period: 1842 nextToken(); 1843 // In Java, classes have an implicit static member "class". 1844 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1845 FormatTok->is(tok::kw_class)) { 1846 nextToken(); 1847 } 1848 if (Style.isJavaScript() && FormatTok && 1849 FormatTok->Tok.getIdentifierInfo()) { 1850 // JavaScript only has pseudo keywords, all keywords are allowed to 1851 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1852 nextToken(); 1853 } 1854 break; 1855 case tok::semi: 1856 nextToken(); 1857 addUnwrappedLine(); 1858 return; 1859 case tok::r_brace: 1860 addUnwrappedLine(); 1861 return; 1862 case tok::l_paren: { 1863 parseParens(); 1864 // Break the unwrapped line if a K&R C function definition has a parameter 1865 // declaration. 1866 if (OpeningBrace || !IsCpp || !Previous || eof()) 1867 break; 1868 if (isC78ParameterDecl(FormatTok, 1869 Tokens->peekNextToken(/*SkipComment=*/true), 1870 Previous)) { 1871 addUnwrappedLine(); 1872 return; 1873 } 1874 break; 1875 } 1876 case tok::kw_operator: 1877 nextToken(); 1878 if (FormatTok->isBinaryOperator()) 1879 nextToken(); 1880 break; 1881 case tok::caret: 1882 nextToken(); 1883 // Block return type. 1884 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) { 1885 nextToken(); 1886 // Return types: pointers are ok too. 1887 while (FormatTok->is(tok::star)) 1888 nextToken(); 1889 } 1890 // Block argument list. 1891 if (FormatTok->is(tok::l_paren)) 1892 parseParens(); 1893 // Block body. 1894 if (FormatTok->is(tok::l_brace)) 1895 parseChildBlock(); 1896 break; 1897 case tok::l_brace: 1898 if (InRequiresExpression) 1899 FormatTok->setFinalizedType(TT_BracedListLBrace); 1900 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1901 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1902 // A block outside of parentheses must be the last part of a 1903 // structural element. 1904 // FIXME: Figure out cases where this is not true, and add projections 1905 // for them (the one we know is missing are lambdas). 1906 if (Style.Language == FormatStyle::LK_Java && 1907 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1908 // If necessary, we could set the type to something different than 1909 // TT_FunctionLBrace. 1910 if (Style.BraceWrapping.AfterControlStatement == 1911 FormatStyle::BWACS_Always) { 1912 addUnwrappedLine(); 1913 } 1914 } else if (Style.BraceWrapping.AfterFunction) { 1915 addUnwrappedLine(); 1916 } 1917 if (!Previous || Previous->isNot(TT_TypeDeclarationParen)) 1918 FormatTok->setFinalizedType(TT_FunctionLBrace); 1919 parseBlock(); 1920 IsDecltypeAutoFunction = false; 1921 addUnwrappedLine(); 1922 return; 1923 } 1924 // Otherwise this was a braced init list, and the structural 1925 // element continues. 1926 break; 1927 case tok::kw_try: 1928 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1929 // field/method declaration. 1930 nextToken(); 1931 break; 1932 } 1933 // We arrive here when parsing function-try blocks. 1934 if (Style.BraceWrapping.AfterFunction) 1935 addUnwrappedLine(); 1936 parseTryCatch(); 1937 return; 1938 case tok::identifier: { 1939 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1940 Line->MustBeDeclaration) { 1941 addUnwrappedLine(); 1942 parseCSharpGenericTypeConstraint(); 1943 break; 1944 } 1945 if (FormatTok->is(TT_MacroBlockEnd)) { 1946 addUnwrappedLine(); 1947 return; 1948 } 1949 1950 // Function declarations (as opposed to function expressions) are parsed 1951 // on their own unwrapped line by continuing this loop. Function 1952 // expressions (functions that are not on their own line) must not create 1953 // a new unwrapped line, so they are special cased below. 1954 size_t TokenCount = Line->Tokens.size(); 1955 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1956 (TokenCount > 1 || 1957 (TokenCount == 1 && 1958 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1959 tryToParseJSFunction(); 1960 break; 1961 } 1962 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1963 FormatTok->is(Keywords.kw_interface)) { 1964 if (Style.isJavaScript()) { 1965 // In JavaScript/TypeScript, "interface" can be used as a standalone 1966 // identifier, e.g. in `var interface = 1;`. If "interface" is 1967 // followed by another identifier, it is very like to be an actual 1968 // interface declaration. 1969 unsigned StoredPosition = Tokens->getPosition(); 1970 FormatToken *Next = Tokens->getNextToken(); 1971 FormatTok = Tokens->setPosition(StoredPosition); 1972 if (!mustBeJSIdent(Keywords, Next)) { 1973 nextToken(); 1974 break; 1975 } 1976 } 1977 parseRecord(); 1978 addUnwrappedLine(); 1979 return; 1980 } 1981 1982 if (Style.isVerilog()) { 1983 if (FormatTok->is(Keywords.kw_table)) { 1984 parseVerilogTable(); 1985 return; 1986 } 1987 if (Keywords.isVerilogBegin(*FormatTok) || 1988 Keywords.isVerilogHierarchy(*FormatTok)) { 1989 parseBlock(); 1990 addUnwrappedLine(); 1991 return; 1992 } 1993 } 1994 1995 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) { 1996 if (parseStructLike()) 1997 return; 1998 break; 1999 } 2000 2001 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 2002 parseStatementMacro(); 2003 return; 2004 } 2005 2006 // See if the following token should start a new unwrapped line. 2007 StringRef Text = FormatTok->TokenText; 2008 2009 FormatToken *PreviousToken = FormatTok; 2010 nextToken(); 2011 2012 // JS doesn't have macros, and within classes colons indicate fields, not 2013 // labels. 2014 if (Style.isJavaScript()) 2015 break; 2016 2017 auto OneTokenSoFar = [&]() { 2018 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2019 while (I != E && I->Tok->is(tok::comment)) 2020 ++I; 2021 if (Style.isVerilog()) 2022 while (I != E && I->Tok->is(tok::hash)) 2023 ++I; 2024 return I != E && (++I == E); 2025 }; 2026 if (OneTokenSoFar()) { 2027 // Recognize function-like macro usages without trailing semicolon as 2028 // well as free-standing macros like Q_OBJECT. 2029 bool FunctionLike = FormatTok->is(tok::l_paren); 2030 if (FunctionLike) 2031 parseParens(); 2032 2033 bool FollowedByNewline = 2034 CommentsBeforeNextToken.empty() 2035 ? FormatTok->NewlinesBefore > 0 2036 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2037 2038 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2039 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2040 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2041 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2042 addUnwrappedLine(); 2043 return; 2044 } 2045 } 2046 break; 2047 } 2048 case tok::equal: 2049 if ((Style.isJavaScript() || Style.isCSharp()) && 2050 FormatTok->is(TT_FatArrow)) { 2051 tryToParseChildBlock(); 2052 break; 2053 } 2054 2055 nextToken(); 2056 if (FormatTok->is(tok::l_brace)) { 2057 // Block kind should probably be set to BK_BracedInit for any language. 2058 // C# needs this change to ensure that array initialisers and object 2059 // initialisers are indented the same way. 2060 if (Style.isCSharp()) 2061 FormatTok->setBlockKind(BK_BracedInit); 2062 // TableGen's defset statement has syntax of the form, 2063 // `defset <type> <name> = { <statement>... }` 2064 if (Style.isTableGen() && 2065 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2066 FormatTok->setFinalizedType(TT_FunctionLBrace); 2067 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2068 /*MunchSemi=*/false); 2069 addUnwrappedLine(); 2070 break; 2071 } 2072 nextToken(); 2073 parseBracedList(); 2074 } else if (Style.Language == FormatStyle::LK_Proto && 2075 FormatTok->is(tok::less)) { 2076 nextToken(); 2077 parseBracedList(/*IsAngleBracket=*/true); 2078 } 2079 break; 2080 case tok::l_square: 2081 parseSquare(); 2082 break; 2083 case tok::kw_new: 2084 parseNew(); 2085 break; 2086 case tok::kw_switch: 2087 if (Style.Language == FormatStyle::LK_Java) 2088 parseSwitch(/*IsExpr=*/true); 2089 nextToken(); 2090 break; 2091 case tok::kw_case: 2092 // Proto: there are no switch/case statements. 2093 if (Style.Language == FormatStyle::LK_Proto) { 2094 nextToken(); 2095 return; 2096 } 2097 // In Verilog switch is called case. 2098 if (Style.isVerilog()) { 2099 parseBlock(); 2100 addUnwrappedLine(); 2101 return; 2102 } 2103 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2104 // 'case: string' field declaration. 2105 nextToken(); 2106 break; 2107 } 2108 parseCaseLabel(); 2109 break; 2110 case tok::kw_default: 2111 nextToken(); 2112 if (Style.isVerilog()) { 2113 if (FormatTok->is(tok::colon)) { 2114 // The label will be handled in the next iteration. 2115 break; 2116 } 2117 if (FormatTok->is(Keywords.kw_clocking)) { 2118 // A default clocking block. 2119 parseBlock(); 2120 addUnwrappedLine(); 2121 return; 2122 } 2123 parseVerilogCaseLabel(); 2124 return; 2125 } 2126 break; 2127 case tok::colon: 2128 nextToken(); 2129 if (Style.isVerilog()) { 2130 parseVerilogCaseLabel(); 2131 return; 2132 } 2133 break; 2134 case tok::greater: 2135 nextToken(); 2136 if (FormatTok->is(tok::l_brace)) 2137 FormatTok->Previous->setFinalizedType(TT_TemplateCloser); 2138 break; 2139 default: 2140 nextToken(); 2141 break; 2142 } 2143 } 2144 } 2145 2146 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2147 assert(FormatTok->is(tok::l_brace)); 2148 if (!Style.isCSharp()) 2149 return false; 2150 // See if it's a property accessor. 2151 if (FormatTok->Previous->isNot(tok::identifier)) 2152 return false; 2153 2154 // See if we are inside a property accessor. 2155 // 2156 // Record the current tokenPosition so that we can advance and 2157 // reset the current token. `Next` is not set yet so we need 2158 // another way to advance along the token stream. 2159 unsigned int StoredPosition = Tokens->getPosition(); 2160 FormatToken *Tok = Tokens->getNextToken(); 2161 2162 // A trivial property accessor is of the form: 2163 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2164 // Track these as they do not require line breaks to be introduced. 2165 bool HasSpecialAccessor = false; 2166 bool IsTrivialPropertyAccessor = true; 2167 while (!eof()) { 2168 if (Tok->isAccessSpecifierKeyword() || 2169 Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get, 2170 Keywords.kw_init, Keywords.kw_set)) { 2171 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2172 HasSpecialAccessor = true; 2173 Tok = Tokens->getNextToken(); 2174 continue; 2175 } 2176 if (Tok->isNot(tok::r_brace)) 2177 IsTrivialPropertyAccessor = false; 2178 break; 2179 } 2180 2181 if (!HasSpecialAccessor) { 2182 Tokens->setPosition(StoredPosition); 2183 return false; 2184 } 2185 2186 // Try to parse the property accessor: 2187 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2188 Tokens->setPosition(StoredPosition); 2189 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2190 addUnwrappedLine(); 2191 nextToken(); 2192 do { 2193 switch (FormatTok->Tok.getKind()) { 2194 case tok::r_brace: 2195 nextToken(); 2196 if (FormatTok->is(tok::equal)) { 2197 while (!eof() && FormatTok->isNot(tok::semi)) 2198 nextToken(); 2199 nextToken(); 2200 } 2201 addUnwrappedLine(); 2202 return true; 2203 case tok::l_brace: 2204 ++Line->Level; 2205 parseBlock(/*MustBeDeclaration=*/true); 2206 addUnwrappedLine(); 2207 --Line->Level; 2208 break; 2209 case tok::equal: 2210 if (FormatTok->is(TT_FatArrow)) { 2211 ++Line->Level; 2212 do { 2213 nextToken(); 2214 } while (!eof() && FormatTok->isNot(tok::semi)); 2215 nextToken(); 2216 addUnwrappedLine(); 2217 --Line->Level; 2218 break; 2219 } 2220 nextToken(); 2221 break; 2222 default: 2223 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2224 Keywords.kw_set) && 2225 !IsTrivialPropertyAccessor) { 2226 // Non-trivial get/set needs to be on its own line. 2227 addUnwrappedLine(); 2228 } 2229 nextToken(); 2230 } 2231 } while (!eof()); 2232 2233 // Unreachable for well-formed code (paired '{' and '}'). 2234 return true; 2235 } 2236 2237 bool UnwrappedLineParser::tryToParseLambda() { 2238 assert(FormatTok->is(tok::l_square)); 2239 if (!IsCpp) { 2240 nextToken(); 2241 return false; 2242 } 2243 FormatToken &LSquare = *FormatTok; 2244 if (!tryToParseLambdaIntroducer()) 2245 return false; 2246 2247 bool SeenArrow = false; 2248 bool InTemplateParameterList = false; 2249 2250 while (FormatTok->isNot(tok::l_brace)) { 2251 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) { 2252 nextToken(); 2253 continue; 2254 } 2255 switch (FormatTok->Tok.getKind()) { 2256 case tok::l_brace: 2257 break; 2258 case tok::l_paren: 2259 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2260 break; 2261 case tok::l_square: 2262 parseSquare(); 2263 break; 2264 case tok::less: 2265 assert(FormatTok->Previous); 2266 if (FormatTok->Previous->is(tok::r_square)) 2267 InTemplateParameterList = true; 2268 nextToken(); 2269 break; 2270 case tok::kw_auto: 2271 case tok::kw_class: 2272 case tok::kw_struct: 2273 case tok::kw_union: 2274 case tok::kw_template: 2275 case tok::kw_typename: 2276 case tok::amp: 2277 case tok::star: 2278 case tok::kw_const: 2279 case tok::kw_constexpr: 2280 case tok::kw_consteval: 2281 case tok::comma: 2282 case tok::greater: 2283 case tok::identifier: 2284 case tok::numeric_constant: 2285 case tok::coloncolon: 2286 case tok::kw_mutable: 2287 case tok::kw_noexcept: 2288 case tok::kw_static: 2289 nextToken(); 2290 break; 2291 // Specialization of a template with an integer parameter can contain 2292 // arithmetic, logical, comparison and ternary operators. 2293 // 2294 // FIXME: This also accepts sequences of operators that are not in the scope 2295 // of a template argument list. 2296 // 2297 // In a C++ lambda a template type can only occur after an arrow. We use 2298 // this as an heuristic to distinguish between Objective-C expressions 2299 // followed by an `a->b` expression, such as: 2300 // ([obj func:arg] + a->b) 2301 // Otherwise the code below would parse as a lambda. 2302 case tok::plus: 2303 case tok::minus: 2304 case tok::exclaim: 2305 case tok::tilde: 2306 case tok::slash: 2307 case tok::percent: 2308 case tok::lessless: 2309 case tok::pipe: 2310 case tok::pipepipe: 2311 case tok::ampamp: 2312 case tok::caret: 2313 case tok::equalequal: 2314 case tok::exclaimequal: 2315 case tok::greaterequal: 2316 case tok::lessequal: 2317 case tok::question: 2318 case tok::colon: 2319 case tok::ellipsis: 2320 case tok::kw_true: 2321 case tok::kw_false: 2322 if (SeenArrow || InTemplateParameterList) { 2323 nextToken(); 2324 break; 2325 } 2326 return true; 2327 case tok::arrow: 2328 // This might or might not actually be a lambda arrow (this could be an 2329 // ObjC method invocation followed by a dereferencing arrow). We might 2330 // reset this back to TT_Unknown in TokenAnnotator. 2331 FormatTok->setFinalizedType(TT_LambdaArrow); 2332 SeenArrow = true; 2333 nextToken(); 2334 break; 2335 case tok::kw_requires: { 2336 auto *RequiresToken = FormatTok; 2337 nextToken(); 2338 parseRequiresClause(RequiresToken); 2339 break; 2340 } 2341 case tok::equal: 2342 if (!InTemplateParameterList) 2343 return true; 2344 nextToken(); 2345 break; 2346 default: 2347 return true; 2348 } 2349 } 2350 2351 FormatTok->setFinalizedType(TT_LambdaLBrace); 2352 LSquare.setFinalizedType(TT_LambdaLSquare); 2353 2354 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2355 parseChildBlock(); 2356 assert(!NestedLambdas.empty()); 2357 NestedLambdas.pop_back(); 2358 2359 return true; 2360 } 2361 2362 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2363 const FormatToken *Previous = FormatTok->Previous; 2364 const FormatToken *LeftSquare = FormatTok; 2365 nextToken(); 2366 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2367 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2368 tok::kw_co_yield, tok::kw_co_return)) || 2369 Previous->closesScope())) || 2370 LeftSquare->isCppStructuredBinding(IsCpp)) { 2371 return false; 2372 } 2373 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2374 return false; 2375 if (FormatTok->is(tok::r_square)) { 2376 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2377 if (Next->is(tok::greater)) 2378 return false; 2379 } 2380 parseSquare(/*LambdaIntroducer=*/true); 2381 return true; 2382 } 2383 2384 void UnwrappedLineParser::tryToParseJSFunction() { 2385 assert(FormatTok->is(Keywords.kw_function)); 2386 if (FormatTok->is(Keywords.kw_async)) 2387 nextToken(); 2388 // Consume "function". 2389 nextToken(); 2390 2391 // Consume * (generator function). Treat it like C++'s overloaded operators. 2392 if (FormatTok->is(tok::star)) { 2393 FormatTok->setFinalizedType(TT_OverloadedOperator); 2394 nextToken(); 2395 } 2396 2397 // Consume function name. 2398 if (FormatTok->is(tok::identifier)) 2399 nextToken(); 2400 2401 if (FormatTok->isNot(tok::l_paren)) 2402 return; 2403 2404 // Parse formal parameter list. 2405 parseParens(); 2406 2407 if (FormatTok->is(tok::colon)) { 2408 // Parse a type definition. 2409 nextToken(); 2410 2411 // Eat the type declaration. For braced inline object types, balance braces, 2412 // otherwise just parse until finding an l_brace for the function body. 2413 if (FormatTok->is(tok::l_brace)) 2414 tryToParseBracedList(); 2415 else 2416 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2417 nextToken(); 2418 } 2419 2420 if (FormatTok->is(tok::semi)) 2421 return; 2422 2423 parseChildBlock(); 2424 } 2425 2426 bool UnwrappedLineParser::tryToParseBracedList() { 2427 if (FormatTok->is(BK_Unknown)) 2428 calculateBraceTypes(); 2429 assert(FormatTok->isNot(BK_Unknown)); 2430 if (FormatTok->is(BK_Block)) 2431 return false; 2432 nextToken(); 2433 parseBracedList(); 2434 return true; 2435 } 2436 2437 bool UnwrappedLineParser::tryToParseChildBlock() { 2438 assert(Style.isJavaScript() || Style.isCSharp()); 2439 assert(FormatTok->is(TT_FatArrow)); 2440 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2441 // They always start an expression or a child block if followed by a curly 2442 // brace. 2443 nextToken(); 2444 if (FormatTok->isNot(tok::l_brace)) 2445 return false; 2446 parseChildBlock(); 2447 return true; 2448 } 2449 2450 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2451 assert(!IsAngleBracket || !IsEnum); 2452 bool HasError = false; 2453 2454 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2455 // replace this by using parseAssignmentExpression() inside. 2456 do { 2457 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2458 tryToParseChildBlock()) { 2459 continue; 2460 } 2461 if (Style.isJavaScript()) { 2462 if (FormatTok->is(Keywords.kw_function)) { 2463 tryToParseJSFunction(); 2464 continue; 2465 } 2466 if (FormatTok->is(tok::l_brace)) { 2467 // Could be a method inside of a braced list `{a() { return 1; }}`. 2468 if (tryToParseBracedList()) 2469 continue; 2470 parseChildBlock(); 2471 } 2472 } 2473 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2474 if (IsEnum) { 2475 FormatTok->setBlockKind(BK_Block); 2476 if (!Style.AllowShortEnumsOnASingleLine) 2477 addUnwrappedLine(); 2478 } 2479 nextToken(); 2480 return !HasError; 2481 } 2482 switch (FormatTok->Tok.getKind()) { 2483 case tok::l_square: 2484 if (Style.isCSharp()) 2485 parseSquare(); 2486 else 2487 tryToParseLambda(); 2488 break; 2489 case tok::l_paren: 2490 parseParens(); 2491 // JavaScript can just have free standing methods and getters/setters in 2492 // object literals. Detect them by a "{" following ")". 2493 if (Style.isJavaScript()) { 2494 if (FormatTok->is(tok::l_brace)) 2495 parseChildBlock(); 2496 break; 2497 } 2498 break; 2499 case tok::l_brace: 2500 // Assume there are no blocks inside a braced init list apart 2501 // from the ones we explicitly parse out (like lambdas). 2502 FormatTok->setBlockKind(BK_BracedInit); 2503 if (!IsAngleBracket) { 2504 auto *Prev = FormatTok->Previous; 2505 if (Prev && Prev->is(tok::greater)) 2506 Prev->setFinalizedType(TT_TemplateCloser); 2507 } 2508 nextToken(); 2509 parseBracedList(); 2510 break; 2511 case tok::less: 2512 nextToken(); 2513 if (IsAngleBracket) 2514 parseBracedList(/*IsAngleBracket=*/true); 2515 break; 2516 case tok::semi: 2517 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2518 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2519 // used for error recovery if we have otherwise determined that this is 2520 // a braced list. 2521 if (Style.isJavaScript()) { 2522 nextToken(); 2523 break; 2524 } 2525 HasError = true; 2526 if (!IsEnum) 2527 return false; 2528 nextToken(); 2529 break; 2530 case tok::comma: 2531 nextToken(); 2532 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2533 addUnwrappedLine(); 2534 break; 2535 default: 2536 nextToken(); 2537 break; 2538 } 2539 } while (!eof()); 2540 return false; 2541 } 2542 2543 /// \brief Parses a pair of parentheses (and everything between them). 2544 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2545 /// double ampersands. This applies for all nested scopes as well. 2546 /// 2547 /// Returns whether there is a `=` token between the parentheses. 2548 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2549 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2550 auto *LeftParen = FormatTok; 2551 bool SeenEqual = false; 2552 bool MightBeFoldExpr = false; 2553 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2554 nextToken(); 2555 do { 2556 switch (FormatTok->Tok.getKind()) { 2557 case tok::l_paren: 2558 if (parseParens(AmpAmpTokenType)) 2559 SeenEqual = true; 2560 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2561 parseChildBlock(); 2562 break; 2563 case tok::r_paren: { 2564 auto *Prev = LeftParen->Previous; 2565 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && 2566 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2567 const auto *Next = Tokens->peekNextToken(); 2568 const bool DoubleParens = 2569 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2570 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2571 const bool Blacklisted = 2572 PrevPrev && 2573 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2574 (SeenEqual && 2575 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2576 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2577 const bool ReturnParens = 2578 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2579 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2580 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2581 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2582 Next->is(tok::semi); 2583 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2584 LeftParen->Optional = true; 2585 FormatTok->Optional = true; 2586 } 2587 } 2588 if (Prev) { 2589 if (Prev->is(TT_TypenameMacro)) { 2590 LeftParen->setFinalizedType(TT_TypeDeclarationParen); 2591 FormatTok->setFinalizedType(TT_TypeDeclarationParen); 2592 } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) { 2593 Prev->setFinalizedType(TT_TemplateCloser); 2594 } 2595 } 2596 nextToken(); 2597 return SeenEqual; 2598 } 2599 case tok::r_brace: 2600 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2601 return SeenEqual; 2602 case tok::l_square: 2603 tryToParseLambda(); 2604 break; 2605 case tok::l_brace: 2606 if (!tryToParseBracedList()) 2607 parseChildBlock(); 2608 break; 2609 case tok::at: 2610 nextToken(); 2611 if (FormatTok->is(tok::l_brace)) { 2612 nextToken(); 2613 parseBracedList(); 2614 } 2615 break; 2616 case tok::ellipsis: 2617 MightBeFoldExpr = true; 2618 nextToken(); 2619 break; 2620 case tok::equal: 2621 SeenEqual = true; 2622 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2623 tryToParseChildBlock(); 2624 else 2625 nextToken(); 2626 break; 2627 case tok::kw_class: 2628 if (Style.isJavaScript()) 2629 parseRecord(/*ParseAsExpr=*/true); 2630 else 2631 nextToken(); 2632 break; 2633 case tok::identifier: 2634 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2635 tryToParseJSFunction(); 2636 else 2637 nextToken(); 2638 break; 2639 case tok::kw_switch: 2640 parseSwitch(/*IsExpr=*/true); 2641 break; 2642 case tok::kw_requires: { 2643 auto RequiresToken = FormatTok; 2644 nextToken(); 2645 parseRequiresExpression(RequiresToken); 2646 break; 2647 } 2648 case tok::ampamp: 2649 if (AmpAmpTokenType != TT_Unknown) 2650 FormatTok->setFinalizedType(AmpAmpTokenType); 2651 [[fallthrough]]; 2652 default: 2653 nextToken(); 2654 break; 2655 } 2656 } while (!eof()); 2657 return SeenEqual; 2658 } 2659 2660 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2661 if (!LambdaIntroducer) { 2662 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2663 if (tryToParseLambda()) 2664 return; 2665 } 2666 do { 2667 switch (FormatTok->Tok.getKind()) { 2668 case tok::l_paren: 2669 parseParens(); 2670 break; 2671 case tok::r_square: 2672 nextToken(); 2673 return; 2674 case tok::r_brace: 2675 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2676 return; 2677 case tok::l_square: 2678 parseSquare(); 2679 break; 2680 case tok::l_brace: { 2681 if (!tryToParseBracedList()) 2682 parseChildBlock(); 2683 break; 2684 } 2685 case tok::at: 2686 case tok::colon: 2687 nextToken(); 2688 if (FormatTok->is(tok::l_brace)) { 2689 nextToken(); 2690 parseBracedList(); 2691 } 2692 break; 2693 default: 2694 nextToken(); 2695 break; 2696 } 2697 } while (!eof()); 2698 } 2699 2700 void UnwrappedLineParser::keepAncestorBraces() { 2701 if (!Style.RemoveBracesLLVM) 2702 return; 2703 2704 const int MaxNestingLevels = 2; 2705 const int Size = NestedTooDeep.size(); 2706 if (Size >= MaxNestingLevels) 2707 NestedTooDeep[Size - MaxNestingLevels] = true; 2708 NestedTooDeep.push_back(false); 2709 } 2710 2711 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2712 for (const auto &Token : llvm::reverse(Line.Tokens)) 2713 if (Token.Tok->isNot(tok::comment)) 2714 return Token.Tok; 2715 2716 return nullptr; 2717 } 2718 2719 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2720 FormatToken *Tok = nullptr; 2721 2722 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2723 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2724 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2725 ? getLastNonComment(*Line) 2726 : Line->Tokens.back().Tok; 2727 assert(Tok); 2728 if (Tok->BraceCount < 0) { 2729 assert(Tok->BraceCount == -1); 2730 Tok = nullptr; 2731 } else { 2732 Tok->BraceCount = -1; 2733 } 2734 } 2735 2736 addUnwrappedLine(); 2737 ++Line->Level; 2738 ++Line->UnbracedBodyLevel; 2739 parseStructuralElement(); 2740 --Line->UnbracedBodyLevel; 2741 2742 if (Tok) { 2743 assert(!Line->InPPDirective); 2744 Tok = nullptr; 2745 for (const auto &L : llvm::reverse(*CurrentLines)) { 2746 if (!L.InPPDirective && getLastNonComment(L)) { 2747 Tok = L.Tokens.back().Tok; 2748 break; 2749 } 2750 } 2751 assert(Tok); 2752 ++Tok->BraceCount; 2753 } 2754 2755 if (CheckEOF && eof()) 2756 addUnwrappedLine(); 2757 2758 --Line->Level; 2759 } 2760 2761 static void markOptionalBraces(FormatToken *LeftBrace) { 2762 if (!LeftBrace) 2763 return; 2764 2765 assert(LeftBrace->is(tok::l_brace)); 2766 2767 FormatToken *RightBrace = LeftBrace->MatchingParen; 2768 if (!RightBrace) { 2769 assert(!LeftBrace->Optional); 2770 return; 2771 } 2772 2773 assert(RightBrace->is(tok::r_brace)); 2774 assert(RightBrace->MatchingParen == LeftBrace); 2775 assert(LeftBrace->Optional == RightBrace->Optional); 2776 2777 LeftBrace->Optional = true; 2778 RightBrace->Optional = true; 2779 } 2780 2781 void UnwrappedLineParser::handleAttributes() { 2782 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2783 if (FormatTok->isAttribute()) 2784 nextToken(); 2785 else if (FormatTok->is(tok::l_square)) 2786 handleCppAttributes(); 2787 } 2788 2789 bool UnwrappedLineParser::handleCppAttributes() { 2790 // Handle [[likely]] / [[unlikely]] attributes. 2791 assert(FormatTok->is(tok::l_square)); 2792 if (!tryToParseSimpleAttribute()) 2793 return false; 2794 parseSquare(); 2795 return true; 2796 } 2797 2798 /// Returns whether \c Tok begins a block. 2799 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2800 // FIXME: rename the function or make 2801 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2802 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2803 : Tok.is(tok::l_brace); 2804 } 2805 2806 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2807 bool KeepBraces, 2808 bool IsVerilogAssert) { 2809 assert((FormatTok->is(tok::kw_if) || 2810 (Style.isVerilog() && 2811 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2812 Keywords.kw_assume, Keywords.kw_cover))) && 2813 "'if' expected"); 2814 nextToken(); 2815 2816 if (IsVerilogAssert) { 2817 // Handle `assert #0` and `assert final`. 2818 if (FormatTok->is(Keywords.kw_verilogHash)) { 2819 nextToken(); 2820 if (FormatTok->is(tok::numeric_constant)) 2821 nextToken(); 2822 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2823 Keywords.kw_sequence)) { 2824 nextToken(); 2825 } 2826 } 2827 2828 // TableGen's if statement has the form of `if <cond> then { ... }`. 2829 if (Style.isTableGen()) { 2830 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2831 // Simply skip until then. This range only contains a value. 2832 nextToken(); 2833 } 2834 } 2835 2836 // Handle `if !consteval`. 2837 if (FormatTok->is(tok::exclaim)) 2838 nextToken(); 2839 2840 bool KeepIfBraces = true; 2841 if (FormatTok->is(tok::kw_consteval)) { 2842 nextToken(); 2843 } else { 2844 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2845 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2846 nextToken(); 2847 if (FormatTok->is(tok::l_paren)) { 2848 FormatTok->setFinalizedType(TT_ConditionLParen); 2849 parseParens(); 2850 } 2851 } 2852 handleAttributes(); 2853 // The then action is optional in Verilog assert statements. 2854 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2855 nextToken(); 2856 addUnwrappedLine(); 2857 return nullptr; 2858 } 2859 2860 bool NeedsUnwrappedLine = false; 2861 keepAncestorBraces(); 2862 2863 FormatToken *IfLeftBrace = nullptr; 2864 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2865 2866 if (isBlockBegin(*FormatTok)) { 2867 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2868 IfLeftBrace = FormatTok; 2869 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2870 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2871 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2872 setPreviousRBraceType(TT_ControlStatementRBrace); 2873 if (Style.BraceWrapping.BeforeElse) 2874 addUnwrappedLine(); 2875 else 2876 NeedsUnwrappedLine = true; 2877 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2878 addUnwrappedLine(); 2879 } else { 2880 parseUnbracedBody(); 2881 } 2882 2883 if (Style.RemoveBracesLLVM) { 2884 assert(!NestedTooDeep.empty()); 2885 KeepIfBraces = KeepIfBraces || 2886 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2887 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2888 IfBlockKind == IfStmtKind::IfElseIf; 2889 } 2890 2891 bool KeepElseBraces = KeepIfBraces; 2892 FormatToken *ElseLeftBrace = nullptr; 2893 IfStmtKind Kind = IfStmtKind::IfOnly; 2894 2895 if (FormatTok->is(tok::kw_else)) { 2896 if (Style.RemoveBracesLLVM) { 2897 NestedTooDeep.back() = false; 2898 Kind = IfStmtKind::IfElse; 2899 } 2900 nextToken(); 2901 handleAttributes(); 2902 if (isBlockBegin(*FormatTok)) { 2903 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2904 FormatTok->setFinalizedType(TT_ElseLBrace); 2905 ElseLeftBrace = FormatTok; 2906 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2907 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2908 FormatToken *IfLBrace = 2909 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2910 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2911 setPreviousRBraceType(TT_ElseRBrace); 2912 if (FormatTok->is(tok::kw_else)) { 2913 KeepElseBraces = KeepElseBraces || 2914 ElseBlockKind == IfStmtKind::IfOnly || 2915 ElseBlockKind == IfStmtKind::IfElseIf; 2916 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2917 KeepElseBraces = true; 2918 assert(ElseLeftBrace->MatchingParen); 2919 markOptionalBraces(ElseLeftBrace); 2920 } 2921 addUnwrappedLine(); 2922 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2923 const FormatToken *Previous = Tokens->getPreviousToken(); 2924 assert(Previous); 2925 const bool IsPrecededByComment = Previous->is(tok::comment); 2926 if (IsPrecededByComment) { 2927 addUnwrappedLine(); 2928 ++Line->Level; 2929 } 2930 bool TooDeep = true; 2931 if (Style.RemoveBracesLLVM) { 2932 Kind = IfStmtKind::IfElseIf; 2933 TooDeep = NestedTooDeep.pop_back_val(); 2934 } 2935 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2936 if (Style.RemoveBracesLLVM) 2937 NestedTooDeep.push_back(TooDeep); 2938 if (IsPrecededByComment) 2939 --Line->Level; 2940 } else { 2941 parseUnbracedBody(/*CheckEOF=*/true); 2942 } 2943 } else { 2944 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2945 if (NeedsUnwrappedLine) 2946 addUnwrappedLine(); 2947 } 2948 2949 if (!Style.RemoveBracesLLVM) 2950 return nullptr; 2951 2952 assert(!NestedTooDeep.empty()); 2953 KeepElseBraces = KeepElseBraces || 2954 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2955 NestedTooDeep.back(); 2956 2957 NestedTooDeep.pop_back(); 2958 2959 if (!KeepIfBraces && !KeepElseBraces) { 2960 markOptionalBraces(IfLeftBrace); 2961 markOptionalBraces(ElseLeftBrace); 2962 } else if (IfLeftBrace) { 2963 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2964 if (IfRightBrace) { 2965 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2966 assert(!IfLeftBrace->Optional); 2967 assert(!IfRightBrace->Optional); 2968 IfLeftBrace->MatchingParen = nullptr; 2969 IfRightBrace->MatchingParen = nullptr; 2970 } 2971 } 2972 2973 if (IfKind) 2974 *IfKind = Kind; 2975 2976 return IfLeftBrace; 2977 } 2978 2979 void UnwrappedLineParser::parseTryCatch() { 2980 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2981 nextToken(); 2982 bool NeedsUnwrappedLine = false; 2983 bool HasCtorInitializer = false; 2984 if (FormatTok->is(tok::colon)) { 2985 auto *Colon = FormatTok; 2986 // We are in a function try block, what comes is an initializer list. 2987 nextToken(); 2988 if (FormatTok->is(tok::identifier)) { 2989 HasCtorInitializer = true; 2990 Colon->setFinalizedType(TT_CtorInitializerColon); 2991 } 2992 2993 // In case identifiers were removed by clang-tidy, what might follow is 2994 // multiple commas in sequence - before the first identifier. 2995 while (FormatTok->is(tok::comma)) 2996 nextToken(); 2997 2998 while (FormatTok->is(tok::identifier)) { 2999 nextToken(); 3000 if (FormatTok->is(tok::l_paren)) { 3001 parseParens(); 3002 } else if (FormatTok->is(tok::l_brace)) { 3003 nextToken(); 3004 parseBracedList(); 3005 } 3006 3007 // In case identifiers were removed by clang-tidy, what might follow is 3008 // multiple commas in sequence - after the first identifier. 3009 while (FormatTok->is(tok::comma)) 3010 nextToken(); 3011 } 3012 } 3013 // Parse try with resource. 3014 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 3015 parseParens(); 3016 3017 keepAncestorBraces(); 3018 3019 if (FormatTok->is(tok::l_brace)) { 3020 if (HasCtorInitializer) 3021 FormatTok->setFinalizedType(TT_FunctionLBrace); 3022 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3023 parseBlock(); 3024 if (Style.BraceWrapping.BeforeCatch) 3025 addUnwrappedLine(); 3026 else 3027 NeedsUnwrappedLine = true; 3028 } else if (FormatTok->isNot(tok::kw_catch)) { 3029 // The C++ standard requires a compound-statement after a try. 3030 // If there's none, we try to assume there's a structuralElement 3031 // and try to continue. 3032 addUnwrappedLine(); 3033 ++Line->Level; 3034 parseStructuralElement(); 3035 --Line->Level; 3036 } 3037 while (true) { 3038 if (FormatTok->is(tok::at)) 3039 nextToken(); 3040 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 3041 tok::kw___finally) || 3042 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3043 FormatTok->is(Keywords.kw_finally)) || 3044 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 3045 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 3046 break; 3047 } 3048 nextToken(); 3049 while (FormatTok->isNot(tok::l_brace)) { 3050 if (FormatTok->is(tok::l_paren)) { 3051 parseParens(); 3052 continue; 3053 } 3054 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 3055 if (Style.RemoveBracesLLVM) 3056 NestedTooDeep.pop_back(); 3057 return; 3058 } 3059 nextToken(); 3060 } 3061 NeedsUnwrappedLine = false; 3062 Line->MustBeDeclaration = false; 3063 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3064 parseBlock(); 3065 if (Style.BraceWrapping.BeforeCatch) 3066 addUnwrappedLine(); 3067 else 3068 NeedsUnwrappedLine = true; 3069 } 3070 3071 if (Style.RemoveBracesLLVM) 3072 NestedTooDeep.pop_back(); 3073 3074 if (NeedsUnwrappedLine) 3075 addUnwrappedLine(); 3076 } 3077 3078 void UnwrappedLineParser::parseNamespace() { 3079 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3080 "'namespace' expected"); 3081 3082 const FormatToken &InitialToken = *FormatTok; 3083 nextToken(); 3084 if (InitialToken.is(TT_NamespaceMacro)) { 3085 parseParens(); 3086 } else { 3087 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3088 tok::l_square, tok::period, tok::l_paren) || 3089 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3090 if (FormatTok->is(tok::l_square)) 3091 parseSquare(); 3092 else if (FormatTok->is(tok::l_paren)) 3093 parseParens(); 3094 else 3095 nextToken(); 3096 } 3097 } 3098 if (FormatTok->is(tok::l_brace)) { 3099 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3100 3101 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3102 addUnwrappedLine(); 3103 3104 unsigned AddLevels = 3105 Style.NamespaceIndentation == FormatStyle::NI_All || 3106 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3107 DeclarationScopeStack.size() > 1) 3108 ? 1u 3109 : 0u; 3110 bool ManageWhitesmithsBraces = 3111 AddLevels == 0u && 3112 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3113 3114 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3115 // the whole block. 3116 if (ManageWhitesmithsBraces) 3117 ++Line->Level; 3118 3119 // Munch the semicolon after a namespace. This is more common than one would 3120 // think. Putting the semicolon into its own line is very ugly. 3121 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3122 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3123 ManageWhitesmithsBraces); 3124 3125 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3126 3127 if (ManageWhitesmithsBraces) 3128 --Line->Level; 3129 } 3130 // FIXME: Add error handling. 3131 } 3132 3133 void UnwrappedLineParser::parseNew() { 3134 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3135 nextToken(); 3136 3137 if (Style.isCSharp()) { 3138 do { 3139 // Handle constructor invocation, e.g. `new(field: value)`. 3140 if (FormatTok->is(tok::l_paren)) 3141 parseParens(); 3142 3143 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3144 if (FormatTok->is(tok::l_brace)) 3145 parseBracedList(); 3146 3147 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3148 return; 3149 3150 nextToken(); 3151 } while (!eof()); 3152 } 3153 3154 if (Style.Language != FormatStyle::LK_Java) 3155 return; 3156 3157 // In Java, we can parse everything up to the parens, which aren't optional. 3158 do { 3159 // There should not be a ;, { or } before the new's open paren. 3160 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3161 return; 3162 3163 // Consume the parens. 3164 if (FormatTok->is(tok::l_paren)) { 3165 parseParens(); 3166 3167 // If there is a class body of an anonymous class, consume that as child. 3168 if (FormatTok->is(tok::l_brace)) 3169 parseChildBlock(); 3170 return; 3171 } 3172 nextToken(); 3173 } while (!eof()); 3174 } 3175 3176 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3177 keepAncestorBraces(); 3178 3179 if (isBlockBegin(*FormatTok)) { 3180 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3181 FormatToken *LeftBrace = FormatTok; 3182 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3183 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3184 /*MunchSemi=*/true, KeepBraces); 3185 setPreviousRBraceType(TT_ControlStatementRBrace); 3186 if (!KeepBraces) { 3187 assert(!NestedTooDeep.empty()); 3188 if (!NestedTooDeep.back()) 3189 markOptionalBraces(LeftBrace); 3190 } 3191 if (WrapRightBrace) 3192 addUnwrappedLine(); 3193 } else { 3194 parseUnbracedBody(); 3195 } 3196 3197 if (!KeepBraces) 3198 NestedTooDeep.pop_back(); 3199 } 3200 3201 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3202 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3203 (Style.isVerilog() && 3204 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3205 Keywords.kw_always_ff, Keywords.kw_always_latch, 3206 Keywords.kw_final, Keywords.kw_initial, 3207 Keywords.kw_foreach, Keywords.kw_forever, 3208 Keywords.kw_repeat))) && 3209 "'for', 'while' or foreach macro expected"); 3210 const bool KeepBraces = !Style.RemoveBracesLLVM || 3211 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3212 3213 nextToken(); 3214 // JS' for await ( ... 3215 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3216 nextToken(); 3217 if (IsCpp && FormatTok->is(tok::kw_co_await)) 3218 nextToken(); 3219 if (HasParens && FormatTok->is(tok::l_paren)) { 3220 // The type is only set for Verilog basically because we were afraid to 3221 // change the existing behavior for loops. See the discussion on D121756 for 3222 // details. 3223 if (Style.isVerilog()) 3224 FormatTok->setFinalizedType(TT_ConditionLParen); 3225 parseParens(); 3226 } 3227 3228 if (Style.isVerilog()) { 3229 // Event control. 3230 parseVerilogSensitivityList(); 3231 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3232 Tokens->getPreviousToken()->is(tok::r_paren)) { 3233 nextToken(); 3234 addUnwrappedLine(); 3235 return; 3236 } 3237 3238 handleAttributes(); 3239 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3240 } 3241 3242 void UnwrappedLineParser::parseDoWhile() { 3243 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3244 nextToken(); 3245 3246 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3247 3248 // FIXME: Add error handling. 3249 if (FormatTok->isNot(tok::kw_while)) { 3250 addUnwrappedLine(); 3251 return; 3252 } 3253 3254 FormatTok->setFinalizedType(TT_DoWhile); 3255 3256 // If in Whitesmiths mode, the line with the while() needs to be indented 3257 // to the same level as the block. 3258 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3259 ++Line->Level; 3260 3261 nextToken(); 3262 parseStructuralElement(); 3263 } 3264 3265 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3266 nextToken(); 3267 unsigned OldLineLevel = Line->Level; 3268 3269 if (LeftAlignLabel) 3270 Line->Level = 0; 3271 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3272 --Line->Level; 3273 3274 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3275 FormatTok->is(tok::l_brace)) { 3276 3277 CompoundStatementIndenter Indenter(this, Line->Level, 3278 Style.BraceWrapping.AfterCaseLabel, 3279 Style.BraceWrapping.IndentBraces); 3280 parseBlock(); 3281 if (FormatTok->is(tok::kw_break)) { 3282 if (Style.BraceWrapping.AfterControlStatement == 3283 FormatStyle::BWACS_Always) { 3284 addUnwrappedLine(); 3285 if (!Style.IndentCaseBlocks && 3286 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3287 ++Line->Level; 3288 } 3289 } 3290 parseStructuralElement(); 3291 } 3292 addUnwrappedLine(); 3293 } else { 3294 if (FormatTok->is(tok::semi)) 3295 nextToken(); 3296 addUnwrappedLine(); 3297 } 3298 Line->Level = OldLineLevel; 3299 if (FormatTok->isNot(tok::l_brace)) { 3300 parseStructuralElement(); 3301 addUnwrappedLine(); 3302 } 3303 } 3304 3305 void UnwrappedLineParser::parseCaseLabel() { 3306 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3307 auto *Case = FormatTok; 3308 3309 // FIXME: fix handling of complex expressions here. 3310 do { 3311 nextToken(); 3312 if (FormatTok->is(tok::colon)) { 3313 FormatTok->setFinalizedType(TT_CaseLabelColon); 3314 break; 3315 } 3316 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) { 3317 FormatTok->setFinalizedType(TT_CaseLabelArrow); 3318 Case->setFinalizedType(TT_SwitchExpressionLabel); 3319 break; 3320 } 3321 } while (!eof()); 3322 parseLabel(); 3323 } 3324 3325 void UnwrappedLineParser::parseSwitch(bool IsExpr) { 3326 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3327 nextToken(); 3328 if (FormatTok->is(tok::l_paren)) 3329 parseParens(); 3330 3331 keepAncestorBraces(); 3332 3333 if (FormatTok->is(tok::l_brace)) { 3334 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3335 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace 3336 : TT_ControlStatementLBrace); 3337 if (IsExpr) 3338 parseChildBlock(); 3339 else 3340 parseBlock(); 3341 setPreviousRBraceType(TT_ControlStatementRBrace); 3342 if (!IsExpr) 3343 addUnwrappedLine(); 3344 } else { 3345 addUnwrappedLine(); 3346 ++Line->Level; 3347 parseStructuralElement(); 3348 --Line->Level; 3349 } 3350 3351 if (Style.RemoveBracesLLVM) 3352 NestedTooDeep.pop_back(); 3353 } 3354 3355 // Operators that can follow a C variable. 3356 static bool isCOperatorFollowingVar(tok::TokenKind Kind) { 3357 switch (Kind) { 3358 case tok::ampamp: 3359 case tok::ampequal: 3360 case tok::arrow: 3361 case tok::caret: 3362 case tok::caretequal: 3363 case tok::comma: 3364 case tok::ellipsis: 3365 case tok::equal: 3366 case tok::equalequal: 3367 case tok::exclaim: 3368 case tok::exclaimequal: 3369 case tok::greater: 3370 case tok::greaterequal: 3371 case tok::greatergreater: 3372 case tok::greatergreaterequal: 3373 case tok::l_paren: 3374 case tok::l_square: 3375 case tok::less: 3376 case tok::lessequal: 3377 case tok::lessless: 3378 case tok::lesslessequal: 3379 case tok::minus: 3380 case tok::minusequal: 3381 case tok::minusminus: 3382 case tok::percent: 3383 case tok::percentequal: 3384 case tok::period: 3385 case tok::pipe: 3386 case tok::pipeequal: 3387 case tok::pipepipe: 3388 case tok::plus: 3389 case tok::plusequal: 3390 case tok::plusplus: 3391 case tok::question: 3392 case tok::r_brace: 3393 case tok::r_paren: 3394 case tok::r_square: 3395 case tok::semi: 3396 case tok::slash: 3397 case tok::slashequal: 3398 case tok::star: 3399 case tok::starequal: 3400 return true; 3401 default: 3402 return false; 3403 } 3404 } 3405 3406 void UnwrappedLineParser::parseAccessSpecifier() { 3407 FormatToken *AccessSpecifierCandidate = FormatTok; 3408 nextToken(); 3409 // Understand Qt's slots. 3410 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3411 nextToken(); 3412 // Otherwise, we don't know what it is, and we'd better keep the next token. 3413 if (FormatTok->is(tok::colon)) { 3414 nextToken(); 3415 addUnwrappedLine(); 3416 } else if (FormatTok->isNot(tok::coloncolon) && 3417 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3418 // Not a variable name nor namespace name. 3419 addUnwrappedLine(); 3420 } else if (AccessSpecifierCandidate) { 3421 // Consider the access specifier to be a C identifier. 3422 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3423 } 3424 } 3425 3426 /// \brief Parses a requires, decides if it is a clause or an expression. 3427 /// \pre The current token has to be the requires keyword. 3428 /// \returns true if it parsed a clause. 3429 bool UnwrappedLineParser::parseRequires() { 3430 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3431 auto RequiresToken = FormatTok; 3432 3433 // We try to guess if it is a requires clause, or a requires expression. For 3434 // that we first consume the keyword and check the next token. 3435 nextToken(); 3436 3437 switch (FormatTok->Tok.getKind()) { 3438 case tok::l_brace: 3439 // This can only be an expression, never a clause. 3440 parseRequiresExpression(RequiresToken); 3441 return false; 3442 case tok::l_paren: 3443 // Clauses and expression can start with a paren, it's unclear what we have. 3444 break; 3445 default: 3446 // All other tokens can only be a clause. 3447 parseRequiresClause(RequiresToken); 3448 return true; 3449 } 3450 3451 // Looking forward we would have to decide if there are function declaration 3452 // like arguments to the requires expression: 3453 // requires (T t) { 3454 // Or there is a constraint expression for the requires clause: 3455 // requires (C<T> && ... 3456 3457 // But first let's look behind. 3458 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3459 3460 if (!PreviousNonComment || 3461 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3462 // If there is no token, or an expression left brace, we are a requires 3463 // clause within a requires expression. 3464 parseRequiresClause(RequiresToken); 3465 return true; 3466 } 3467 3468 switch (PreviousNonComment->Tok.getKind()) { 3469 case tok::greater: 3470 case tok::r_paren: 3471 case tok::kw_noexcept: 3472 case tok::kw_const: 3473 // This is a requires clause. 3474 parseRequiresClause(RequiresToken); 3475 return true; 3476 case tok::amp: 3477 case tok::ampamp: { 3478 // This can be either: 3479 // if (... && requires (T t) ...) 3480 // Or 3481 // void member(...) && requires (C<T> ... 3482 // We check the one token before that for a const: 3483 // void member(...) const && requires (C<T> ... 3484 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3485 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3486 parseRequiresClause(RequiresToken); 3487 return true; 3488 } 3489 break; 3490 } 3491 default: 3492 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { 3493 // This is a requires clause. 3494 parseRequiresClause(RequiresToken); 3495 return true; 3496 } 3497 // It's an expression. 3498 parseRequiresExpression(RequiresToken); 3499 return false; 3500 } 3501 3502 // Now we look forward and try to check if the paren content is a parameter 3503 // list. The parameters can be cv-qualified and contain references or 3504 // pointers. 3505 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3506 // of stuff: typename, const, *, &, &&, ::, identifiers. 3507 3508 unsigned StoredPosition = Tokens->getPosition(); 3509 FormatToken *NextToken = Tokens->getNextToken(); 3510 int Lookahead = 0; 3511 auto PeekNext = [&Lookahead, &NextToken, this] { 3512 ++Lookahead; 3513 NextToken = Tokens->getNextToken(); 3514 }; 3515 3516 bool FoundType = false; 3517 bool LastWasColonColon = false; 3518 int OpenAngles = 0; 3519 3520 for (; Lookahead < 50; PeekNext()) { 3521 switch (NextToken->Tok.getKind()) { 3522 case tok::kw_volatile: 3523 case tok::kw_const: 3524 case tok::comma: 3525 if (OpenAngles == 0) { 3526 FormatTok = Tokens->setPosition(StoredPosition); 3527 parseRequiresExpression(RequiresToken); 3528 return false; 3529 } 3530 break; 3531 case tok::eof: 3532 // Break out of the loop. 3533 Lookahead = 50; 3534 break; 3535 case tok::coloncolon: 3536 LastWasColonColon = true; 3537 break; 3538 case tok::kw_decltype: 3539 case tok::identifier: 3540 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3541 FormatTok = Tokens->setPosition(StoredPosition); 3542 parseRequiresExpression(RequiresToken); 3543 return false; 3544 } 3545 FoundType = true; 3546 LastWasColonColon = false; 3547 break; 3548 case tok::less: 3549 ++OpenAngles; 3550 break; 3551 case tok::greater: 3552 --OpenAngles; 3553 break; 3554 default: 3555 if (NextToken->isTypeName(LangOpts)) { 3556 FormatTok = Tokens->setPosition(StoredPosition); 3557 parseRequiresExpression(RequiresToken); 3558 return false; 3559 } 3560 break; 3561 } 3562 } 3563 // This seems to be a complicated expression, just assume it's a clause. 3564 FormatTok = Tokens->setPosition(StoredPosition); 3565 parseRequiresClause(RequiresToken); 3566 return true; 3567 } 3568 3569 /// \brief Parses a requires clause. 3570 /// \param RequiresToken The requires keyword token, which starts this clause. 3571 /// \pre We need to be on the next token after the requires keyword. 3572 /// \sa parseRequiresExpression 3573 /// 3574 /// Returns if it either has finished parsing the clause, or it detects, that 3575 /// the clause is incorrect. 3576 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3577 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3578 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3579 3580 // If there is no previous token, we are within a requires expression, 3581 // otherwise we will always have the template or function declaration in front 3582 // of it. 3583 bool InRequiresExpression = 3584 !RequiresToken->Previous || 3585 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3586 3587 RequiresToken->setFinalizedType(InRequiresExpression 3588 ? TT_RequiresClauseInARequiresExpression 3589 : TT_RequiresClause); 3590 3591 // NOTE: parseConstraintExpression is only ever called from this function. 3592 // It could be inlined into here. 3593 parseConstraintExpression(); 3594 3595 if (!InRequiresExpression) 3596 FormatTok->Previous->ClosesRequiresClause = true; 3597 } 3598 3599 /// \brief Parses a requires expression. 3600 /// \param RequiresToken The requires keyword token, which starts this clause. 3601 /// \pre We need to be on the next token after the requires keyword. 3602 /// \sa parseRequiresClause 3603 /// 3604 /// Returns if it either has finished parsing the expression, or it detects, 3605 /// that the expression is incorrect. 3606 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3607 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3608 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3609 3610 RequiresToken->setFinalizedType(TT_RequiresExpression); 3611 3612 if (FormatTok->is(tok::l_paren)) { 3613 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3614 parseParens(); 3615 } 3616 3617 if (FormatTok->is(tok::l_brace)) { 3618 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3619 parseChildBlock(); 3620 } 3621 } 3622 3623 /// \brief Parses a constraint expression. 3624 /// 3625 /// This is the body of a requires clause. It returns, when the parsing is 3626 /// complete, or the expression is incorrect. 3627 void UnwrappedLineParser::parseConstraintExpression() { 3628 // The special handling for lambdas is needed since tryToParseLambda() eats a 3629 // token and if a requires expression is the last part of a requires clause 3630 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3631 // not set on the correct token. Thus we need to be aware if we even expect a 3632 // lambda to be possible. 3633 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3634 bool LambdaNextTimeAllowed = true; 3635 3636 // Within lambda declarations, it is permitted to put a requires clause after 3637 // its template parameter list, which would place the requires clause right 3638 // before the parentheses of the parameters of the lambda declaration. Thus, 3639 // we track if we expect to see grouping parentheses at all. 3640 // Without this check, `requires foo<T> (T t)` in the below example would be 3641 // seen as the whole requires clause, accidentally eating the parameters of 3642 // the lambda. 3643 // [&]<typename T> requires foo<T> (T t) { ... }; 3644 bool TopLevelParensAllowed = true; 3645 3646 do { 3647 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3648 3649 switch (FormatTok->Tok.getKind()) { 3650 case tok::kw_requires: { 3651 auto RequiresToken = FormatTok; 3652 nextToken(); 3653 parseRequiresExpression(RequiresToken); 3654 break; 3655 } 3656 3657 case tok::l_paren: 3658 if (!TopLevelParensAllowed) 3659 return; 3660 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3661 TopLevelParensAllowed = false; 3662 break; 3663 3664 case tok::l_square: 3665 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3666 return; 3667 break; 3668 3669 case tok::kw_const: 3670 case tok::semi: 3671 case tok::kw_class: 3672 case tok::kw_struct: 3673 case tok::kw_union: 3674 return; 3675 3676 case tok::l_brace: 3677 // Potential function body. 3678 return; 3679 3680 case tok::ampamp: 3681 case tok::pipepipe: 3682 FormatTok->setFinalizedType(TT_BinaryOperator); 3683 nextToken(); 3684 LambdaNextTimeAllowed = true; 3685 TopLevelParensAllowed = true; 3686 break; 3687 3688 case tok::comma: 3689 case tok::comment: 3690 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3691 nextToken(); 3692 break; 3693 3694 case tok::kw_sizeof: 3695 case tok::greater: 3696 case tok::greaterequal: 3697 case tok::greatergreater: 3698 case tok::less: 3699 case tok::lessequal: 3700 case tok::lessless: 3701 case tok::equalequal: 3702 case tok::exclaim: 3703 case tok::exclaimequal: 3704 case tok::plus: 3705 case tok::minus: 3706 case tok::star: 3707 case tok::slash: 3708 LambdaNextTimeAllowed = true; 3709 TopLevelParensAllowed = true; 3710 // Just eat them. 3711 nextToken(); 3712 break; 3713 3714 case tok::numeric_constant: 3715 case tok::coloncolon: 3716 case tok::kw_true: 3717 case tok::kw_false: 3718 TopLevelParensAllowed = false; 3719 // Just eat them. 3720 nextToken(); 3721 break; 3722 3723 case tok::kw_static_cast: 3724 case tok::kw_const_cast: 3725 case tok::kw_reinterpret_cast: 3726 case tok::kw_dynamic_cast: 3727 nextToken(); 3728 if (FormatTok->isNot(tok::less)) 3729 return; 3730 3731 nextToken(); 3732 parseBracedList(/*IsAngleBracket=*/true); 3733 break; 3734 3735 default: 3736 if (!FormatTok->Tok.getIdentifierInfo()) { 3737 // Identifiers are part of the default case, we check for more then 3738 // tok::identifier to handle builtin type traits. 3739 return; 3740 } 3741 3742 // We need to differentiate identifiers for a template deduction guide, 3743 // variables, or function return types (the constraint expression has 3744 // ended before that), and basically all other cases. But it's easier to 3745 // check the other way around. 3746 assert(FormatTok->Previous); 3747 switch (FormatTok->Previous->Tok.getKind()) { 3748 case tok::coloncolon: // Nested identifier. 3749 case tok::ampamp: // Start of a function or variable for the 3750 case tok::pipepipe: // constraint expression. (binary) 3751 case tok::exclaim: // The same as above, but unary. 3752 case tok::kw_requires: // Initial identifier of a requires clause. 3753 case tok::equal: // Initial identifier of a concept declaration. 3754 break; 3755 default: 3756 return; 3757 } 3758 3759 // Read identifier with optional template declaration. 3760 nextToken(); 3761 if (FormatTok->is(tok::less)) { 3762 nextToken(); 3763 parseBracedList(/*IsAngleBracket=*/true); 3764 } 3765 TopLevelParensAllowed = false; 3766 break; 3767 } 3768 } while (!eof()); 3769 } 3770 3771 bool UnwrappedLineParser::parseEnum() { 3772 const FormatToken &InitialToken = *FormatTok; 3773 3774 // Won't be 'enum' for NS_ENUMs. 3775 if (FormatTok->is(tok::kw_enum)) 3776 nextToken(); 3777 3778 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3779 // declarations. An "enum" keyword followed by a colon would be a syntax 3780 // error and thus assume it is just an identifier. 3781 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3782 return false; 3783 3784 // In protobuf, "enum" can be used as a field name. 3785 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3786 return false; 3787 3788 if (IsCpp) { 3789 // Eat up enum class ... 3790 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3791 nextToken(); 3792 while (FormatTok->is(tok::l_square)) 3793 if (!handleCppAttributes()) 3794 return false; 3795 } 3796 3797 while (FormatTok->Tok.getIdentifierInfo() || 3798 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3799 tok::greater, tok::comma, tok::question, 3800 tok::l_square)) { 3801 if (Style.isVerilog()) { 3802 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3803 nextToken(); 3804 // In Verilog the base type can have dimensions. 3805 while (FormatTok->is(tok::l_square)) 3806 parseSquare(); 3807 } else { 3808 nextToken(); 3809 } 3810 // We can have macros or attributes in between 'enum' and the enum name. 3811 if (FormatTok->is(tok::l_paren)) 3812 parseParens(); 3813 if (FormatTok->is(tok::identifier)) { 3814 nextToken(); 3815 // If there are two identifiers in a row, this is likely an elaborate 3816 // return type. In Java, this can be "implements", etc. 3817 if (IsCpp && FormatTok->is(tok::identifier)) 3818 return false; 3819 } 3820 } 3821 3822 // Just a declaration or something is wrong. 3823 if (FormatTok->isNot(tok::l_brace)) 3824 return true; 3825 FormatTok->setFinalizedType(TT_EnumLBrace); 3826 FormatTok->setBlockKind(BK_Block); 3827 3828 if (Style.Language == FormatStyle::LK_Java) { 3829 // Java enums are different. 3830 parseJavaEnumBody(); 3831 return true; 3832 } 3833 if (Style.Language == FormatStyle::LK_Proto) { 3834 parseBlock(/*MustBeDeclaration=*/true); 3835 return true; 3836 } 3837 3838 if (!Style.AllowShortEnumsOnASingleLine && 3839 ShouldBreakBeforeBrace(Style, InitialToken)) { 3840 addUnwrappedLine(); 3841 } 3842 // Parse enum body. 3843 nextToken(); 3844 if (!Style.AllowShortEnumsOnASingleLine) { 3845 addUnwrappedLine(); 3846 Line->Level += 1; 3847 } 3848 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3849 if (!Style.AllowShortEnumsOnASingleLine) 3850 Line->Level -= 1; 3851 if (HasError) { 3852 if (FormatTok->is(tok::semi)) 3853 nextToken(); 3854 addUnwrappedLine(); 3855 } 3856 setPreviousRBraceType(TT_EnumRBrace); 3857 return true; 3858 3859 // There is no addUnwrappedLine() here so that we fall through to parsing a 3860 // structural element afterwards. Thus, in "enum A {} n, m;", 3861 // "} n, m;" will end up in one unwrapped line. 3862 } 3863 3864 bool UnwrappedLineParser::parseStructLike() { 3865 // parseRecord falls through and does not yet add an unwrapped line as a 3866 // record declaration or definition can start a structural element. 3867 parseRecord(); 3868 // This does not apply to Java, JavaScript and C#. 3869 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3870 Style.isCSharp()) { 3871 if (FormatTok->is(tok::semi)) 3872 nextToken(); 3873 addUnwrappedLine(); 3874 return true; 3875 } 3876 return false; 3877 } 3878 3879 namespace { 3880 // A class used to set and restore the Token position when peeking 3881 // ahead in the token source. 3882 class ScopedTokenPosition { 3883 unsigned StoredPosition; 3884 FormatTokenSource *Tokens; 3885 3886 public: 3887 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3888 assert(Tokens && "Tokens expected to not be null"); 3889 StoredPosition = Tokens->getPosition(); 3890 } 3891 3892 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3893 }; 3894 } // namespace 3895 3896 // Look to see if we have [[ by looking ahead, if 3897 // its not then rewind to the original position. 3898 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3899 ScopedTokenPosition AutoPosition(Tokens); 3900 FormatToken *Tok = Tokens->getNextToken(); 3901 // We already read the first [ check for the second. 3902 if (Tok->isNot(tok::l_square)) 3903 return false; 3904 // Double check that the attribute is just something 3905 // fairly simple. 3906 while (Tok->isNot(tok::eof)) { 3907 if (Tok->is(tok::r_square)) 3908 break; 3909 Tok = Tokens->getNextToken(); 3910 } 3911 if (Tok->is(tok::eof)) 3912 return false; 3913 Tok = Tokens->getNextToken(); 3914 if (Tok->isNot(tok::r_square)) 3915 return false; 3916 Tok = Tokens->getNextToken(); 3917 if (Tok->is(tok::semi)) 3918 return false; 3919 return true; 3920 } 3921 3922 void UnwrappedLineParser::parseJavaEnumBody() { 3923 assert(FormatTok->is(tok::l_brace)); 3924 const FormatToken *OpeningBrace = FormatTok; 3925 3926 // Determine whether the enum is simple, i.e. does not have a semicolon or 3927 // constants with class bodies. Simple enums can be formatted like braced 3928 // lists, contracted to a single line, etc. 3929 unsigned StoredPosition = Tokens->getPosition(); 3930 bool IsSimple = true; 3931 FormatToken *Tok = Tokens->getNextToken(); 3932 while (Tok->isNot(tok::eof)) { 3933 if (Tok->is(tok::r_brace)) 3934 break; 3935 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3936 IsSimple = false; 3937 break; 3938 } 3939 // FIXME: This will also mark enums with braces in the arguments to enum 3940 // constants as "not simple". This is probably fine in practice, though. 3941 Tok = Tokens->getNextToken(); 3942 } 3943 FormatTok = Tokens->setPosition(StoredPosition); 3944 3945 if (IsSimple) { 3946 nextToken(); 3947 parseBracedList(); 3948 addUnwrappedLine(); 3949 return; 3950 } 3951 3952 // Parse the body of a more complex enum. 3953 // First add a line for everything up to the "{". 3954 nextToken(); 3955 addUnwrappedLine(); 3956 ++Line->Level; 3957 3958 // Parse the enum constants. 3959 while (!eof()) { 3960 if (FormatTok->is(tok::l_brace)) { 3961 // Parse the constant's class body. 3962 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3963 /*MunchSemi=*/false); 3964 } else if (FormatTok->is(tok::l_paren)) { 3965 parseParens(); 3966 } else if (FormatTok->is(tok::comma)) { 3967 nextToken(); 3968 addUnwrappedLine(); 3969 } else if (FormatTok->is(tok::semi)) { 3970 nextToken(); 3971 addUnwrappedLine(); 3972 break; 3973 } else if (FormatTok->is(tok::r_brace)) { 3974 addUnwrappedLine(); 3975 break; 3976 } else { 3977 nextToken(); 3978 } 3979 } 3980 3981 // Parse the class body after the enum's ";" if any. 3982 parseLevel(OpeningBrace); 3983 nextToken(); 3984 --Line->Level; 3985 addUnwrappedLine(); 3986 } 3987 3988 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3989 const FormatToken &InitialToken = *FormatTok; 3990 nextToken(); 3991 3992 const FormatToken *ClassName = nullptr; 3993 bool IsDerived = false; 3994 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 3995 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 3996 }; 3997 // JavaScript/TypeScript supports anonymous classes like: 3998 // a = class extends foo { } 3999 bool JSPastExtendsOrImplements = false; 4000 // The actual identifier can be a nested name specifier, and in macros 4001 // it is often token-pasted. 4002 // An [[attribute]] can be before the identifier. 4003 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 4004 tok::kw_alignas, tok::l_square) || 4005 FormatTok->isAttribute() || 4006 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 4007 FormatTok->isOneOf(tok::period, tok::comma))) { 4008 if (Style.isJavaScript() && 4009 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 4010 JSPastExtendsOrImplements = true; 4011 // JavaScript/TypeScript supports inline object types in 4012 // extends/implements positions: 4013 // class Foo implements {bar: number} { } 4014 nextToken(); 4015 if (FormatTok->is(tok::l_brace)) { 4016 tryToParseBracedList(); 4017 continue; 4018 } 4019 } 4020 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 4021 continue; 4022 const auto *Previous = FormatTok; 4023 nextToken(); 4024 switch (FormatTok->Tok.getKind()) { 4025 case tok::l_paren: 4026 // We can have macros in between 'class' and the class name. 4027 if (!IsNonMacroIdentifier(Previous) || 4028 // e.g. `struct macro(a) S { int i; };` 4029 Previous->Previous == &InitialToken) { 4030 parseParens(); 4031 } 4032 break; 4033 case tok::coloncolon: 4034 case tok::hashhash: 4035 break; 4036 default: 4037 if (!JSPastExtendsOrImplements && !ClassName && 4038 Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) { 4039 ClassName = Previous; 4040 } 4041 } 4042 } 4043 4044 auto IsListInitialization = [&] { 4045 if (!ClassName || IsDerived) 4046 return false; 4047 assert(FormatTok->is(tok::l_brace)); 4048 const auto *Prev = FormatTok->getPreviousNonComment(); 4049 assert(Prev); 4050 return Prev != ClassName && Prev->is(tok::identifier) && 4051 Prev->isNot(Keywords.kw_final) && tryToParseBracedList(); 4052 }; 4053 4054 if (FormatTok->isOneOf(tok::colon, tok::less)) { 4055 int AngleNestingLevel = 0; 4056 do { 4057 if (FormatTok->is(tok::less)) 4058 ++AngleNestingLevel; 4059 else if (FormatTok->is(tok::greater)) 4060 --AngleNestingLevel; 4061 4062 if (AngleNestingLevel == 0) { 4063 if (FormatTok->is(tok::colon)) { 4064 IsDerived = true; 4065 } else if (FormatTok->is(tok::identifier) && 4066 FormatTok->Previous->is(tok::coloncolon)) { 4067 ClassName = FormatTok; 4068 } else if (FormatTok->is(tok::l_paren) && 4069 IsNonMacroIdentifier(FormatTok->Previous)) { 4070 break; 4071 } 4072 } 4073 if (FormatTok->is(tok::l_brace)) { 4074 if (AngleNestingLevel == 0 && IsListInitialization()) 4075 return; 4076 calculateBraceTypes(/*ExpectClassBody=*/true); 4077 if (!tryToParseBracedList()) 4078 break; 4079 } 4080 if (FormatTok->is(tok::l_square)) { 4081 FormatToken *Previous = FormatTok->Previous; 4082 if (!Previous || (Previous->isNot(tok::r_paren) && 4083 !Previous->isTypeOrIdentifier(LangOpts))) { 4084 // Don't try parsing a lambda if we had a closing parenthesis before, 4085 // it was probably a pointer to an array: int (*)[]. 4086 if (!tryToParseLambda()) 4087 continue; 4088 } else { 4089 parseSquare(); 4090 continue; 4091 } 4092 } 4093 if (FormatTok->is(tok::semi)) 4094 return; 4095 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 4096 addUnwrappedLine(); 4097 nextToken(); 4098 parseCSharpGenericTypeConstraint(); 4099 break; 4100 } 4101 nextToken(); 4102 } while (!eof()); 4103 } 4104 4105 auto GetBraceTypes = 4106 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 4107 switch (RecordTok.Tok.getKind()) { 4108 case tok::kw_class: 4109 return {TT_ClassLBrace, TT_ClassRBrace}; 4110 case tok::kw_struct: 4111 return {TT_StructLBrace, TT_StructRBrace}; 4112 case tok::kw_union: 4113 return {TT_UnionLBrace, TT_UnionRBrace}; 4114 default: 4115 // Useful for e.g. interface. 4116 return {TT_RecordLBrace, TT_RecordRBrace}; 4117 } 4118 }; 4119 if (FormatTok->is(tok::l_brace)) { 4120 if (IsListInitialization()) 4121 return; 4122 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4123 FormatTok->setFinalizedType(OpenBraceType); 4124 if (ParseAsExpr) { 4125 parseChildBlock(); 4126 } else { 4127 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4128 addUnwrappedLine(); 4129 4130 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4131 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4132 } 4133 setPreviousRBraceType(ClosingBraceType); 4134 } 4135 // There is no addUnwrappedLine() here so that we fall through to parsing a 4136 // structural element afterwards. Thus, in "class A {} n, m;", 4137 // "} n, m;" will end up in one unwrapped line. 4138 } 4139 4140 void UnwrappedLineParser::parseObjCMethod() { 4141 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4142 "'(' or identifier expected."); 4143 do { 4144 if (FormatTok->is(tok::semi)) { 4145 nextToken(); 4146 addUnwrappedLine(); 4147 return; 4148 } else if (FormatTok->is(tok::l_brace)) { 4149 if (Style.BraceWrapping.AfterFunction) 4150 addUnwrappedLine(); 4151 parseBlock(); 4152 addUnwrappedLine(); 4153 return; 4154 } else { 4155 nextToken(); 4156 } 4157 } while (!eof()); 4158 } 4159 4160 void UnwrappedLineParser::parseObjCProtocolList() { 4161 assert(FormatTok->is(tok::less) && "'<' expected."); 4162 do { 4163 nextToken(); 4164 // Early exit in case someone forgot a close angle. 4165 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4166 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4167 return; 4168 } 4169 } while (!eof() && FormatTok->isNot(tok::greater)); 4170 nextToken(); // Skip '>'. 4171 } 4172 4173 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4174 do { 4175 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4176 nextToken(); 4177 addUnwrappedLine(); 4178 break; 4179 } 4180 if (FormatTok->is(tok::l_brace)) { 4181 parseBlock(); 4182 // In ObjC interfaces, nothing should be following the "}". 4183 addUnwrappedLine(); 4184 } else if (FormatTok->is(tok::r_brace)) { 4185 // Ignore stray "}". parseStructuralElement doesn't consume them. 4186 nextToken(); 4187 addUnwrappedLine(); 4188 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4189 nextToken(); 4190 parseObjCMethod(); 4191 } else { 4192 parseStructuralElement(); 4193 } 4194 } while (!eof()); 4195 } 4196 4197 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4198 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4199 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4200 nextToken(); 4201 nextToken(); // interface name 4202 4203 // @interface can be followed by a lightweight generic 4204 // specialization list, then either a base class or a category. 4205 if (FormatTok->is(tok::less)) 4206 parseObjCLightweightGenerics(); 4207 if (FormatTok->is(tok::colon)) { 4208 nextToken(); 4209 nextToken(); // base class name 4210 // The base class can also have lightweight generics applied to it. 4211 if (FormatTok->is(tok::less)) 4212 parseObjCLightweightGenerics(); 4213 } else if (FormatTok->is(tok::l_paren)) { 4214 // Skip category, if present. 4215 parseParens(); 4216 } 4217 4218 if (FormatTok->is(tok::less)) 4219 parseObjCProtocolList(); 4220 4221 if (FormatTok->is(tok::l_brace)) { 4222 if (Style.BraceWrapping.AfterObjCDeclaration) 4223 addUnwrappedLine(); 4224 parseBlock(/*MustBeDeclaration=*/true); 4225 } 4226 4227 // With instance variables, this puts '}' on its own line. Without instance 4228 // variables, this ends the @interface line. 4229 addUnwrappedLine(); 4230 4231 parseObjCUntilAtEnd(); 4232 } 4233 4234 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4235 assert(FormatTok->is(tok::less)); 4236 // Unlike protocol lists, generic parameterizations support 4237 // nested angles: 4238 // 4239 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4240 // NSObject <NSCopying, NSSecureCoding> 4241 // 4242 // so we need to count how many open angles we have left. 4243 unsigned NumOpenAngles = 1; 4244 do { 4245 nextToken(); 4246 // Early exit in case someone forgot a close angle. 4247 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4248 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4249 break; 4250 } 4251 if (FormatTok->is(tok::less)) { 4252 ++NumOpenAngles; 4253 } else if (FormatTok->is(tok::greater)) { 4254 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4255 --NumOpenAngles; 4256 } 4257 } while (!eof() && NumOpenAngles != 0); 4258 nextToken(); // Skip '>'. 4259 } 4260 4261 // Returns true for the declaration/definition form of @protocol, 4262 // false for the expression form. 4263 bool UnwrappedLineParser::parseObjCProtocol() { 4264 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4265 nextToken(); 4266 4267 if (FormatTok->is(tok::l_paren)) { 4268 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4269 return false; 4270 } 4271 4272 // The definition/declaration form, 4273 // @protocol Foo 4274 // - (int)someMethod; 4275 // @end 4276 4277 nextToken(); // protocol name 4278 4279 if (FormatTok->is(tok::less)) 4280 parseObjCProtocolList(); 4281 4282 // Check for protocol declaration. 4283 if (FormatTok->is(tok::semi)) { 4284 nextToken(); 4285 addUnwrappedLine(); 4286 return true; 4287 } 4288 4289 addUnwrappedLine(); 4290 parseObjCUntilAtEnd(); 4291 return true; 4292 } 4293 4294 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4295 bool IsImport = FormatTok->is(Keywords.kw_import); 4296 assert(IsImport || FormatTok->is(tok::kw_export)); 4297 nextToken(); 4298 4299 // Consume the "default" in "export default class/function". 4300 if (FormatTok->is(tok::kw_default)) 4301 nextToken(); 4302 4303 // Consume "async function", "function" and "default function", so that these 4304 // get parsed as free-standing JS functions, i.e. do not require a trailing 4305 // semicolon. 4306 if (FormatTok->is(Keywords.kw_async)) 4307 nextToken(); 4308 if (FormatTok->is(Keywords.kw_function)) { 4309 nextToken(); 4310 return; 4311 } 4312 4313 // For imports, `export *`, `export {...}`, consume the rest of the line up 4314 // to the terminating `;`. For everything else, just return and continue 4315 // parsing the structural element, i.e. the declaration or expression for 4316 // `export default`. 4317 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4318 !FormatTok->isStringLiteral() && 4319 !(FormatTok->is(Keywords.kw_type) && 4320 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4321 return; 4322 } 4323 4324 while (!eof()) { 4325 if (FormatTok->is(tok::semi)) 4326 return; 4327 if (Line->Tokens.empty()) { 4328 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4329 // import statement should terminate. 4330 return; 4331 } 4332 if (FormatTok->is(tok::l_brace)) { 4333 FormatTok->setBlockKind(BK_Block); 4334 nextToken(); 4335 parseBracedList(); 4336 } else { 4337 nextToken(); 4338 } 4339 } 4340 } 4341 4342 void UnwrappedLineParser::parseStatementMacro() { 4343 nextToken(); 4344 if (FormatTok->is(tok::l_paren)) 4345 parseParens(); 4346 if (FormatTok->is(tok::semi)) 4347 nextToken(); 4348 addUnwrappedLine(); 4349 } 4350 4351 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4352 // consume things like a::`b.c[d:e] or a::* 4353 while (true) { 4354 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4355 tok::coloncolon, tok::hash) || 4356 Keywords.isVerilogIdentifier(*FormatTok)) { 4357 nextToken(); 4358 } else if (FormatTok->is(tok::l_square)) { 4359 parseSquare(); 4360 } else { 4361 break; 4362 } 4363 } 4364 } 4365 4366 void UnwrappedLineParser::parseVerilogSensitivityList() { 4367 if (FormatTok->isNot(tok::at)) 4368 return; 4369 nextToken(); 4370 // A block event expression has 2 at signs. 4371 if (FormatTok->is(tok::at)) 4372 nextToken(); 4373 switch (FormatTok->Tok.getKind()) { 4374 case tok::star: 4375 nextToken(); 4376 break; 4377 case tok::l_paren: 4378 parseParens(); 4379 break; 4380 default: 4381 parseVerilogHierarchyIdentifier(); 4382 break; 4383 } 4384 } 4385 4386 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4387 unsigned AddLevels = 0; 4388 4389 if (FormatTok->is(Keywords.kw_clocking)) { 4390 nextToken(); 4391 if (Keywords.isVerilogIdentifier(*FormatTok)) 4392 nextToken(); 4393 parseVerilogSensitivityList(); 4394 if (FormatTok->is(tok::semi)) 4395 nextToken(); 4396 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4397 Keywords.kw_casez, Keywords.kw_randcase, 4398 Keywords.kw_randsequence)) { 4399 if (Style.IndentCaseLabels) 4400 AddLevels++; 4401 nextToken(); 4402 if (FormatTok->is(tok::l_paren)) { 4403 FormatTok->setFinalizedType(TT_ConditionLParen); 4404 parseParens(); 4405 } 4406 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4407 nextToken(); 4408 // The case header has no semicolon. 4409 } else { 4410 // "module" etc. 4411 nextToken(); 4412 // all the words like the name of the module and specifiers like 4413 // "automatic" and the width of function return type 4414 while (true) { 4415 if (FormatTok->is(tok::l_square)) { 4416 auto Prev = FormatTok->getPreviousNonComment(); 4417 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4418 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4419 parseSquare(); 4420 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4421 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4422 nextToken(); 4423 } else { 4424 break; 4425 } 4426 } 4427 4428 auto NewLine = [this]() { 4429 addUnwrappedLine(); 4430 Line->IsContinuation = true; 4431 }; 4432 4433 // package imports 4434 while (FormatTok->is(Keywords.kw_import)) { 4435 NewLine(); 4436 nextToken(); 4437 parseVerilogHierarchyIdentifier(); 4438 if (FormatTok->is(tok::semi)) 4439 nextToken(); 4440 } 4441 4442 // parameters and ports 4443 if (FormatTok->is(Keywords.kw_verilogHash)) { 4444 NewLine(); 4445 nextToken(); 4446 if (FormatTok->is(tok::l_paren)) { 4447 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4448 parseParens(); 4449 } 4450 } 4451 if (FormatTok->is(tok::l_paren)) { 4452 NewLine(); 4453 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4454 parseParens(); 4455 } 4456 4457 // extends and implements 4458 if (FormatTok->is(Keywords.kw_extends)) { 4459 NewLine(); 4460 nextToken(); 4461 parseVerilogHierarchyIdentifier(); 4462 if (FormatTok->is(tok::l_paren)) 4463 parseParens(); 4464 } 4465 if (FormatTok->is(Keywords.kw_implements)) { 4466 NewLine(); 4467 do { 4468 nextToken(); 4469 parseVerilogHierarchyIdentifier(); 4470 } while (FormatTok->is(tok::comma)); 4471 } 4472 4473 // Coverage event for cover groups. 4474 if (FormatTok->is(tok::at)) { 4475 NewLine(); 4476 parseVerilogSensitivityList(); 4477 } 4478 4479 if (FormatTok->is(tok::semi)) 4480 nextToken(/*LevelDifference=*/1); 4481 addUnwrappedLine(); 4482 } 4483 4484 return AddLevels; 4485 } 4486 4487 void UnwrappedLineParser::parseVerilogTable() { 4488 assert(FormatTok->is(Keywords.kw_table)); 4489 nextToken(/*LevelDifference=*/1); 4490 addUnwrappedLine(); 4491 4492 auto InitialLevel = Line->Level++; 4493 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4494 FormatToken *Tok = FormatTok; 4495 nextToken(); 4496 if (Tok->is(tok::semi)) 4497 addUnwrappedLine(); 4498 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4499 Tok->setFinalizedType(TT_VerilogTableItem); 4500 } 4501 Line->Level = InitialLevel; 4502 nextToken(/*LevelDifference=*/-1); 4503 addUnwrappedLine(); 4504 } 4505 4506 void UnwrappedLineParser::parseVerilogCaseLabel() { 4507 // The label will get unindented in AnnotatingParser. If there are no leading 4508 // spaces, indent the rest here so that things inside the block will be 4509 // indented relative to things outside. We don't use parseLabel because we 4510 // don't know whether this colon is a label or a ternary expression at this 4511 // point. 4512 auto OrigLevel = Line->Level; 4513 auto FirstLine = CurrentLines->size(); 4514 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4515 ++Line->Level; 4516 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4517 --Line->Level; 4518 parseStructuralElement(); 4519 // Restore the indentation in both the new line and the line that has the 4520 // label. 4521 if (CurrentLines->size() > FirstLine) 4522 (*CurrentLines)[FirstLine].Level = OrigLevel; 4523 Line->Level = OrigLevel; 4524 } 4525 4526 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4527 for (const auto &N : Line.Tokens) { 4528 if (N.Tok->MacroCtx) 4529 return true; 4530 for (const UnwrappedLine &Child : N.Children) 4531 if (containsExpansion(Child)) 4532 return true; 4533 } 4534 return false; 4535 } 4536 4537 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4538 if (Line->Tokens.empty()) 4539 return; 4540 LLVM_DEBUG({ 4541 if (!parsingPPDirective()) { 4542 llvm::dbgs() << "Adding unwrapped line:\n"; 4543 printDebugInfo(*Line); 4544 } 4545 }); 4546 4547 // If this line closes a block when in Whitesmiths mode, remember that 4548 // information so that the level can be decreased after the line is added. 4549 // This has to happen after the addition of the line since the line itself 4550 // needs to be indented. 4551 bool ClosesWhitesmithsBlock = 4552 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4553 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4554 4555 // If the current line was expanded from a macro call, we use it to 4556 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4557 // line and the unexpanded token stream. 4558 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4559 if (!Reconstruct) 4560 Reconstruct.emplace(Line->Level, Unexpanded); 4561 Reconstruct->addLine(*Line); 4562 4563 // While the reconstructed unexpanded lines are stored in the normal 4564 // flow of lines, the expanded lines are stored on the side to be analyzed 4565 // in an extra step. 4566 CurrentExpandedLines.push_back(std::move(*Line)); 4567 4568 if (Reconstruct->finished()) { 4569 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4570 assert(!Reconstructed.Tokens.empty() && 4571 "Reconstructed must at least contain the macro identifier."); 4572 assert(!parsingPPDirective()); 4573 LLVM_DEBUG({ 4574 llvm::dbgs() << "Adding unexpanded line:\n"; 4575 printDebugInfo(Reconstructed); 4576 }); 4577 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4578 Lines.push_back(std::move(Reconstructed)); 4579 CurrentExpandedLines.clear(); 4580 Reconstruct.reset(); 4581 } 4582 } else { 4583 // At the top level we only get here when no unexpansion is going on, or 4584 // when conditional formatting led to unfinished macro reconstructions. 4585 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4586 CurrentLines->push_back(std::move(*Line)); 4587 } 4588 Line->Tokens.clear(); 4589 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4590 Line->FirstStartColumn = 0; 4591 Line->IsContinuation = false; 4592 Line->SeenDecltypeAuto = false; 4593 4594 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4595 --Line->Level; 4596 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4597 CurrentLines->append( 4598 std::make_move_iterator(PreprocessorDirectives.begin()), 4599 std::make_move_iterator(PreprocessorDirectives.end())); 4600 PreprocessorDirectives.clear(); 4601 } 4602 // Disconnect the current token from the last token on the previous line. 4603 FormatTok->Previous = nullptr; 4604 } 4605 4606 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4607 4608 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4609 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4610 FormatTok.NewlinesBefore > 0; 4611 } 4612 4613 // Checks if \p FormatTok is a line comment that continues the line comment 4614 // section on \p Line. 4615 static bool 4616 continuesLineCommentSection(const FormatToken &FormatTok, 4617 const UnwrappedLine &Line, 4618 const llvm::Regex &CommentPragmasRegex) { 4619 if (Line.Tokens.empty()) 4620 return false; 4621 4622 StringRef IndentContent = FormatTok.TokenText; 4623 if (FormatTok.TokenText.starts_with("//") || 4624 FormatTok.TokenText.starts_with("/*")) { 4625 IndentContent = FormatTok.TokenText.substr(2); 4626 } 4627 if (CommentPragmasRegex.match(IndentContent)) 4628 return false; 4629 4630 // If Line starts with a line comment, then FormatTok continues the comment 4631 // section if its original column is greater or equal to the original start 4632 // column of the line. 4633 // 4634 // Define the min column token of a line as follows: if a line ends in '{' or 4635 // contains a '{' followed by a line comment, then the min column token is 4636 // that '{'. Otherwise, the min column token of the line is the first token of 4637 // the line. 4638 // 4639 // If Line starts with a token other than a line comment, then FormatTok 4640 // continues the comment section if its original column is greater than the 4641 // original start column of the min column token of the line. 4642 // 4643 // For example, the second line comment continues the first in these cases: 4644 // 4645 // // first line 4646 // // second line 4647 // 4648 // and: 4649 // 4650 // // first line 4651 // // second line 4652 // 4653 // and: 4654 // 4655 // int i; // first line 4656 // // second line 4657 // 4658 // and: 4659 // 4660 // do { // first line 4661 // // second line 4662 // int i; 4663 // } while (true); 4664 // 4665 // and: 4666 // 4667 // enum { 4668 // a, // first line 4669 // // second line 4670 // b 4671 // }; 4672 // 4673 // The second line comment doesn't continue the first in these cases: 4674 // 4675 // // first line 4676 // // second line 4677 // 4678 // and: 4679 // 4680 // int i; // first line 4681 // // second line 4682 // 4683 // and: 4684 // 4685 // do { // first line 4686 // // second line 4687 // int i; 4688 // } while (true); 4689 // 4690 // and: 4691 // 4692 // enum { 4693 // a, // first line 4694 // // second line 4695 // }; 4696 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4697 4698 // Scan for '{//'. If found, use the column of '{' as a min column for line 4699 // comment section continuation. 4700 const FormatToken *PreviousToken = nullptr; 4701 for (const UnwrappedLineNode &Node : Line.Tokens) { 4702 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4703 isLineComment(*Node.Tok)) { 4704 MinColumnToken = PreviousToken; 4705 break; 4706 } 4707 PreviousToken = Node.Tok; 4708 4709 // Grab the last newline preceding a token in this unwrapped line. 4710 if (Node.Tok->NewlinesBefore > 0) 4711 MinColumnToken = Node.Tok; 4712 } 4713 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4714 MinColumnToken = PreviousToken; 4715 4716 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4717 MinColumnToken); 4718 } 4719 4720 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4721 bool JustComments = Line->Tokens.empty(); 4722 for (FormatToken *Tok : CommentsBeforeNextToken) { 4723 // Line comments that belong to the same line comment section are put on the 4724 // same line since later we might want to reflow content between them. 4725 // Additional fine-grained breaking of line comment sections is controlled 4726 // by the class BreakableLineCommentSection in case it is desirable to keep 4727 // several line comment sections in the same unwrapped line. 4728 // 4729 // FIXME: Consider putting separate line comment sections as children to the 4730 // unwrapped line instead. 4731 Tok->ContinuesLineCommentSection = 4732 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4733 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4734 addUnwrappedLine(); 4735 pushToken(Tok); 4736 } 4737 if (NewlineBeforeNext && JustComments) 4738 addUnwrappedLine(); 4739 CommentsBeforeNextToken.clear(); 4740 } 4741 4742 void UnwrappedLineParser::nextToken(int LevelDifference) { 4743 if (eof()) 4744 return; 4745 flushComments(isOnNewLine(*FormatTok)); 4746 pushToken(FormatTok); 4747 FormatToken *Previous = FormatTok; 4748 if (!Style.isJavaScript()) 4749 readToken(LevelDifference); 4750 else 4751 readTokenWithJavaScriptASI(); 4752 FormatTok->Previous = Previous; 4753 if (Style.isVerilog()) { 4754 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4755 // keywords like `begin`, we can't treat them the same as left braces 4756 // because some contexts require one of them. For example structs use 4757 // braces and if blocks use keywords, and a left brace can occur in an if 4758 // statement, but it is not a block. For keywords like `end`, we simply 4759 // treat them the same as right braces. 4760 if (Keywords.isVerilogEnd(*FormatTok)) 4761 FormatTok->Tok.setKind(tok::r_brace); 4762 } 4763 } 4764 4765 void UnwrappedLineParser::distributeComments( 4766 const SmallVectorImpl<FormatToken *> &Comments, 4767 const FormatToken *NextTok) { 4768 // Whether or not a line comment token continues a line is controlled by 4769 // the method continuesLineCommentSection, with the following caveat: 4770 // 4771 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4772 // that each comment line from the trail is aligned with the next token, if 4773 // the next token exists. If a trail exists, the beginning of the maximal 4774 // trail is marked as a start of a new comment section. 4775 // 4776 // For example in this code: 4777 // 4778 // int a; // line about a 4779 // // line 1 about b 4780 // // line 2 about b 4781 // int b; 4782 // 4783 // the two lines about b form a maximal trail, so there are two sections, the 4784 // first one consisting of the single comment "// line about a" and the 4785 // second one consisting of the next two comments. 4786 if (Comments.empty()) 4787 return; 4788 bool ShouldPushCommentsInCurrentLine = true; 4789 bool HasTrailAlignedWithNextToken = false; 4790 unsigned StartOfTrailAlignedWithNextToken = 0; 4791 if (NextTok) { 4792 // We are skipping the first element intentionally. 4793 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4794 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4795 HasTrailAlignedWithNextToken = true; 4796 StartOfTrailAlignedWithNextToken = i; 4797 } 4798 } 4799 } 4800 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4801 FormatToken *FormatTok = Comments[i]; 4802 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4803 FormatTok->ContinuesLineCommentSection = false; 4804 } else { 4805 FormatTok->ContinuesLineCommentSection = 4806 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4807 } 4808 if (!FormatTok->ContinuesLineCommentSection && 4809 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4810 ShouldPushCommentsInCurrentLine = false; 4811 } 4812 if (ShouldPushCommentsInCurrentLine) 4813 pushToken(FormatTok); 4814 else 4815 CommentsBeforeNextToken.push_back(FormatTok); 4816 } 4817 } 4818 4819 void UnwrappedLineParser::readToken(int LevelDifference) { 4820 SmallVector<FormatToken *, 1> Comments; 4821 bool PreviousWasComment = false; 4822 bool FirstNonCommentOnLine = false; 4823 do { 4824 FormatTok = Tokens->getNextToken(); 4825 assert(FormatTok); 4826 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, 4827 TT_ConflictAlternative)) { 4828 if (FormatTok->is(TT_ConflictStart)) 4829 conditionalCompilationStart(/*Unreachable=*/false); 4830 else if (FormatTok->is(TT_ConflictAlternative)) 4831 conditionalCompilationAlternative(); 4832 else if (FormatTok->is(TT_ConflictEnd)) 4833 conditionalCompilationEnd(); 4834 FormatTok = Tokens->getNextToken(); 4835 FormatTok->MustBreakBefore = true; 4836 FormatTok->MustBreakBeforeFinalized = true; 4837 } 4838 4839 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4840 const FormatToken &Tok, 4841 bool PreviousWasComment) { 4842 auto IsFirstOnLine = [](const FormatToken &Tok) { 4843 return Tok.HasUnescapedNewline || Tok.IsFirst; 4844 }; 4845 4846 // Consider preprocessor directives preceded by block comments as first 4847 // on line. 4848 if (PreviousWasComment) 4849 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4850 return IsFirstOnLine(Tok); 4851 }; 4852 4853 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4854 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4855 PreviousWasComment = FormatTok->is(tok::comment); 4856 4857 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4858 (!Style.isVerilog() || 4859 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4860 FirstNonCommentOnLine) { 4861 distributeComments(Comments, FormatTok); 4862 Comments.clear(); 4863 // If there is an unfinished unwrapped line, we flush the preprocessor 4864 // directives only after that unwrapped line was finished later. 4865 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4866 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4867 assert((LevelDifference >= 0 || 4868 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4869 "LevelDifference makes Line->Level negative"); 4870 Line->Level += LevelDifference; 4871 // Comments stored before the preprocessor directive need to be output 4872 // before the preprocessor directive, at the same level as the 4873 // preprocessor directive, as we consider them to apply to the directive. 4874 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4875 PPBranchLevel > 0) { 4876 Line->Level += PPBranchLevel; 4877 } 4878 assert(Line->Level >= Line->UnbracedBodyLevel); 4879 Line->Level -= Line->UnbracedBodyLevel; 4880 flushComments(isOnNewLine(*FormatTok)); 4881 parsePPDirective(); 4882 PreviousWasComment = FormatTok->is(tok::comment); 4883 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4884 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4885 } 4886 4887 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4888 !Line->InPPDirective) { 4889 continue; 4890 } 4891 4892 if (FormatTok->is(tok::identifier) && 4893 Macros.defined(FormatTok->TokenText) && 4894 // FIXME: Allow expanding macros in preprocessor directives. 4895 !Line->InPPDirective) { 4896 FormatToken *ID = FormatTok; 4897 unsigned Position = Tokens->getPosition(); 4898 4899 // To correctly parse the code, we need to replace the tokens of the macro 4900 // call with its expansion. 4901 auto PreCall = std::move(Line); 4902 Line.reset(new UnwrappedLine); 4903 bool OldInExpansion = InExpansion; 4904 InExpansion = true; 4905 // We parse the macro call into a new line. 4906 auto Args = parseMacroCall(); 4907 InExpansion = OldInExpansion; 4908 assert(Line->Tokens.front().Tok == ID); 4909 // And remember the unexpanded macro call tokens. 4910 auto UnexpandedLine = std::move(Line); 4911 // Reset to the old line. 4912 Line = std::move(PreCall); 4913 4914 LLVM_DEBUG({ 4915 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4916 if (Args) { 4917 llvm::dbgs() << "("; 4918 for (const auto &Arg : Args.value()) 4919 for (const auto &T : Arg) 4920 llvm::dbgs() << T->TokenText << " "; 4921 llvm::dbgs() << ")"; 4922 } 4923 llvm::dbgs() << "\n"; 4924 }); 4925 if (Macros.objectLike(ID->TokenText) && Args && 4926 !Macros.hasArity(ID->TokenText, Args->size())) { 4927 // The macro is either 4928 // - object-like, but we got argumnets, or 4929 // - overloaded to be both object-like and function-like, but none of 4930 // the function-like arities match the number of arguments. 4931 // Thus, expand as object-like macro. 4932 LLVM_DEBUG(llvm::dbgs() 4933 << "Macro \"" << ID->TokenText 4934 << "\" not overloaded for arity " << Args->size() 4935 << "or not function-like, using object-like overload."); 4936 Args.reset(); 4937 UnexpandedLine->Tokens.resize(1); 4938 Tokens->setPosition(Position); 4939 nextToken(); 4940 assert(!Args && Macros.objectLike(ID->TokenText)); 4941 } 4942 if ((!Args && Macros.objectLike(ID->TokenText)) || 4943 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4944 // Next, we insert the expanded tokens in the token stream at the 4945 // current position, and continue parsing. 4946 Unexpanded[ID] = std::move(UnexpandedLine); 4947 SmallVector<FormatToken *, 8> Expansion = 4948 Macros.expand(ID, std::move(Args)); 4949 if (!Expansion.empty()) 4950 FormatTok = Tokens->insertTokens(Expansion); 4951 4952 LLVM_DEBUG({ 4953 llvm::dbgs() << "Expanded: "; 4954 for (const auto &T : Expansion) 4955 llvm::dbgs() << T->TokenText << " "; 4956 llvm::dbgs() << "\n"; 4957 }); 4958 } else { 4959 LLVM_DEBUG({ 4960 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4961 << "\", because it was used "; 4962 if (Args) 4963 llvm::dbgs() << "with " << Args->size(); 4964 else 4965 llvm::dbgs() << "without"; 4966 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4967 }); 4968 Tokens->setPosition(Position); 4969 FormatTok = ID; 4970 } 4971 } 4972 4973 if (FormatTok->isNot(tok::comment)) { 4974 distributeComments(Comments, FormatTok); 4975 Comments.clear(); 4976 return; 4977 } 4978 4979 Comments.push_back(FormatTok); 4980 } while (!eof()); 4981 4982 distributeComments(Comments, nullptr); 4983 Comments.clear(); 4984 } 4985 4986 namespace { 4987 template <typename Iterator> 4988 void pushTokens(Iterator Begin, Iterator End, 4989 llvm::SmallVectorImpl<FormatToken *> &Into) { 4990 for (auto I = Begin; I != End; ++I) { 4991 Into.push_back(I->Tok); 4992 for (const auto &Child : I->Children) 4993 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4994 } 4995 } 4996 } // namespace 4997 4998 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4999 UnwrappedLineParser::parseMacroCall() { 5000 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 5001 assert(Line->Tokens.empty()); 5002 nextToken(); 5003 if (FormatTok->isNot(tok::l_paren)) 5004 return Args; 5005 unsigned Position = Tokens->getPosition(); 5006 FormatToken *Tok = FormatTok; 5007 nextToken(); 5008 Args.emplace(); 5009 auto ArgStart = std::prev(Line->Tokens.end()); 5010 5011 int Parens = 0; 5012 do { 5013 switch (FormatTok->Tok.getKind()) { 5014 case tok::l_paren: 5015 ++Parens; 5016 nextToken(); 5017 break; 5018 case tok::r_paren: { 5019 if (Parens > 0) { 5020 --Parens; 5021 nextToken(); 5022 break; 5023 } 5024 Args->push_back({}); 5025 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5026 nextToken(); 5027 return Args; 5028 } 5029 case tok::comma: { 5030 if (Parens > 0) { 5031 nextToken(); 5032 break; 5033 } 5034 Args->push_back({}); 5035 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5036 nextToken(); 5037 ArgStart = std::prev(Line->Tokens.end()); 5038 break; 5039 } 5040 default: 5041 nextToken(); 5042 break; 5043 } 5044 } while (!eof()); 5045 Line->Tokens.resize(1); 5046 Tokens->setPosition(Position); 5047 FormatTok = Tok; 5048 return {}; 5049 } 5050 5051 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 5052 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 5053 if (MustBreakBeforeNextToken) { 5054 Line->Tokens.back().Tok->MustBreakBefore = true; 5055 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 5056 MustBreakBeforeNextToken = false; 5057 } 5058 } 5059 5060 } // end namespace format 5061 } // end namespace clang 5062