1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (SmallVectorImpl<UnwrappedLine>::const_iterator 55 CI = I->Children.begin(), 56 CE = I->Children.end(); 57 CI != CE; ++CI) { 58 OS << "\n"; 59 printLine(OS, *CI, (Prefix + " ").str()); 60 NewLine = true; 61 } 62 } 63 if (!NewLine) 64 OS << "\n"; 65 } 66 67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 68 printLine(llvm::dbgs(), Line); 69 } 70 71 class ScopedDeclarationState { 72 public: 73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 74 bool MustBeDeclaration) 75 : Line(Line), Stack(Stack) { 76 Line.MustBeDeclaration = MustBeDeclaration; 77 Stack.push_back(MustBeDeclaration); 78 } 79 ~ScopedDeclarationState() { 80 Stack.pop_back(); 81 if (!Stack.empty()) 82 Line.MustBeDeclaration = Stack.back(); 83 else 84 Line.MustBeDeclaration = true; 85 } 86 87 private: 88 UnwrappedLine &Line; 89 llvm::BitVector &Stack; 90 }; 91 92 } // end anonymous namespace 93 94 std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) { 95 llvm::raw_os_ostream OS(Stream); 96 printLine(OS, Line); 97 return Stream; 98 } 99 100 class ScopedLineState { 101 public: 102 ScopedLineState(UnwrappedLineParser &Parser, 103 bool SwitchToPreprocessorLines = false) 104 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 105 if (SwitchToPreprocessorLines) 106 Parser.CurrentLines = &Parser.PreprocessorDirectives; 107 else if (!Parser.Line->Tokens.empty()) 108 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 109 PreBlockLine = std::move(Parser.Line); 110 Parser.Line = std::make_unique<UnwrappedLine>(); 111 Parser.Line->Level = PreBlockLine->Level; 112 Parser.Line->PPLevel = PreBlockLine->PPLevel; 113 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 114 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 115 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; 116 } 117 118 ~ScopedLineState() { 119 if (!Parser.Line->Tokens.empty()) 120 Parser.addUnwrappedLine(); 121 assert(Parser.Line->Tokens.empty()); 122 Parser.Line = std::move(PreBlockLine); 123 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 124 Parser.MustBreakBeforeNextToken = true; 125 Parser.CurrentLines = OriginalLines; 126 } 127 128 private: 129 UnwrappedLineParser &Parser; 130 131 std::unique_ptr<UnwrappedLine> PreBlockLine; 132 SmallVectorImpl<UnwrappedLine> *OriginalLines; 133 }; 134 135 class CompoundStatementIndenter { 136 public: 137 CompoundStatementIndenter(UnwrappedLineParser *Parser, 138 const FormatStyle &Style, unsigned &LineLevel) 139 : CompoundStatementIndenter(Parser, LineLevel, 140 Style.BraceWrapping.AfterControlStatement, 141 Style.BraceWrapping.IndentBraces) {} 142 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 143 bool WrapBrace, bool IndentBrace) 144 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 145 if (WrapBrace) 146 Parser->addUnwrappedLine(); 147 if (IndentBrace) 148 ++LineLevel; 149 } 150 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 151 152 private: 153 unsigned &LineLevel; 154 unsigned OldLineLevel; 155 }; 156 157 UnwrappedLineParser::UnwrappedLineParser( 158 SourceManager &SourceMgr, const FormatStyle &Style, 159 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 160 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 161 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 162 IdentifierTable &IdentTable) 163 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 164 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), 165 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), 166 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 167 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 168 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 169 ? IG_Rejected 170 : IG_Inited), 171 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 172 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { 173 assert(IsCpp == LangOpts.CXXOperatorNames); 174 } 175 176 void UnwrappedLineParser::reset() { 177 PPBranchLevel = -1; 178 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 179 ? IG_Rejected 180 : IG_Inited; 181 IncludeGuardToken = nullptr; 182 Line.reset(new UnwrappedLine); 183 CommentsBeforeNextToken.clear(); 184 FormatTok = nullptr; 185 MustBreakBeforeNextToken = false; 186 IsDecltypeAutoFunction = false; 187 PreprocessorDirectives.clear(); 188 CurrentLines = &Lines; 189 DeclarationScopeStack.clear(); 190 NestedTooDeep.clear(); 191 NestedLambdas.clear(); 192 PPStack.clear(); 193 Line->FirstStartColumn = FirstStartColumn; 194 195 if (!Unexpanded.empty()) 196 for (FormatToken *Token : AllTokens) 197 Token->MacroCtx.reset(); 198 CurrentExpandedLines.clear(); 199 ExpandedLines.clear(); 200 Unexpanded.clear(); 201 InExpansion = false; 202 Reconstruct.reset(); 203 } 204 205 void UnwrappedLineParser::parse() { 206 IndexedTokenSource TokenSource(AllTokens); 207 Line->FirstStartColumn = FirstStartColumn; 208 do { 209 LLVM_DEBUG(llvm::dbgs() << "----\n"); 210 reset(); 211 Tokens = &TokenSource; 212 TokenSource.reset(); 213 214 readToken(); 215 parseFile(); 216 217 // If we found an include guard then all preprocessor directives (other than 218 // the guard) are over-indented by one. 219 if (IncludeGuard == IG_Found) { 220 for (auto &Line : Lines) 221 if (Line.InPPDirective && Line.Level > 0) 222 --Line.Level; 223 } 224 225 // Create line with eof token. 226 assert(eof()); 227 pushToken(FormatTok); 228 addUnwrappedLine(); 229 230 // In a first run, format everything with the lines containing macro calls 231 // replaced by the expansion. 232 if (!ExpandedLines.empty()) { 233 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 234 for (const auto &Line : Lines) { 235 if (!Line.Tokens.empty()) { 236 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 237 if (it != ExpandedLines.end()) { 238 for (const auto &Expanded : it->second) { 239 LLVM_DEBUG(printDebugInfo(Expanded)); 240 Callback.consumeUnwrappedLine(Expanded); 241 } 242 continue; 243 } 244 } 245 LLVM_DEBUG(printDebugInfo(Line)); 246 Callback.consumeUnwrappedLine(Line); 247 } 248 Callback.finishRun(); 249 } 250 251 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 252 for (const UnwrappedLine &Line : Lines) { 253 LLVM_DEBUG(printDebugInfo(Line)); 254 Callback.consumeUnwrappedLine(Line); 255 } 256 Callback.finishRun(); 257 Lines.clear(); 258 while (!PPLevelBranchIndex.empty() && 259 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 260 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 261 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 262 } 263 if (!PPLevelBranchIndex.empty()) { 264 ++PPLevelBranchIndex.back(); 265 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 266 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 267 } 268 } while (!PPLevelBranchIndex.empty()); 269 } 270 271 void UnwrappedLineParser::parseFile() { 272 // The top-level context in a file always has declarations, except for pre- 273 // processor directives and JavaScript files. 274 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 275 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 276 MustBeDeclaration); 277 if (Style.Language == FormatStyle::LK_TextProto) 278 parseBracedList(); 279 else 280 parseLevel(); 281 // Make sure to format the remaining tokens. 282 // 283 // LK_TextProto is special since its top-level is parsed as the body of a 284 // braced list, which does not necessarily have natural line separators such 285 // as a semicolon. Comments after the last entry that have been determined to 286 // not belong to that line, as in: 287 // key: value 288 // // endfile comment 289 // do not have a chance to be put on a line of their own until this point. 290 // Here we add this newline before end-of-file comments. 291 if (Style.Language == FormatStyle::LK_TextProto && 292 !CommentsBeforeNextToken.empty()) { 293 addUnwrappedLine(); 294 } 295 flushComments(true); 296 addUnwrappedLine(); 297 } 298 299 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 300 do { 301 switch (FormatTok->Tok.getKind()) { 302 case tok::l_brace: 303 return; 304 default: 305 if (FormatTok->is(Keywords.kw_where)) { 306 addUnwrappedLine(); 307 nextToken(); 308 parseCSharpGenericTypeConstraint(); 309 break; 310 } 311 nextToken(); 312 break; 313 } 314 } while (!eof()); 315 } 316 317 void UnwrappedLineParser::parseCSharpAttribute() { 318 int UnpairedSquareBrackets = 1; 319 do { 320 switch (FormatTok->Tok.getKind()) { 321 case tok::r_square: 322 nextToken(); 323 --UnpairedSquareBrackets; 324 if (UnpairedSquareBrackets == 0) { 325 addUnwrappedLine(); 326 return; 327 } 328 break; 329 case tok::l_square: 330 ++UnpairedSquareBrackets; 331 nextToken(); 332 break; 333 default: 334 nextToken(); 335 break; 336 } 337 } while (!eof()); 338 } 339 340 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 341 if (!Lines.empty() && Lines.back().InPPDirective) 342 return true; 343 344 const FormatToken *Previous = Tokens->getPreviousToken(); 345 return Previous && Previous->is(tok::comment) && 346 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 347 } 348 349 /// \brief Parses a level, that is ???. 350 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 351 /// \param IfKind The \p if statement kind in the level. 352 /// \param IfLeftBrace The left brace of the \p if block in the level. 353 /// \returns true if a simple block of if/else/for/while, or false otherwise. 354 /// (A simple block has a single statement.) 355 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 356 IfStmtKind *IfKind, 357 FormatToken **IfLeftBrace) { 358 const bool InRequiresExpression = 359 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 360 const bool IsPrecededByCommentOrPPDirective = 361 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 362 FormatToken *IfLBrace = nullptr; 363 bool HasDoWhile = false; 364 bool HasLabel = false; 365 unsigned StatementCount = 0; 366 bool SwitchLabelEncountered = false; 367 368 do { 369 if (FormatTok->isAttribute()) { 370 nextToken(); 371 if (FormatTok->is(tok::l_paren)) 372 parseParens(); 373 continue; 374 } 375 tok::TokenKind Kind = FormatTok->Tok.getKind(); 376 if (FormatTok->is(TT_MacroBlockBegin)) 377 Kind = tok::l_brace; 378 else if (FormatTok->is(TT_MacroBlockEnd)) 379 Kind = tok::r_brace; 380 381 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 382 &HasLabel, &StatementCount] { 383 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 384 HasDoWhile ? nullptr : &HasDoWhile, 385 HasLabel ? nullptr : &HasLabel); 386 ++StatementCount; 387 assert(StatementCount > 0 && "StatementCount overflow!"); 388 }; 389 390 switch (Kind) { 391 case tok::comment: 392 nextToken(); 393 addUnwrappedLine(); 394 break; 395 case tok::l_brace: 396 if (InRequiresExpression) { 397 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 398 } else if (FormatTok->Previous && 399 FormatTok->Previous->ClosesRequiresClause) { 400 // We need the 'default' case here to correctly parse a function 401 // l_brace. 402 ParseDefault(); 403 continue; 404 } 405 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { 406 if (tryToParseBracedList()) 407 continue; 408 FormatTok->setFinalizedType(TT_BlockLBrace); 409 } 410 parseBlock(); 411 ++StatementCount; 412 assert(StatementCount > 0 && "StatementCount overflow!"); 413 addUnwrappedLine(); 414 break; 415 case tok::r_brace: 416 if (OpeningBrace) { 417 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 418 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 419 return false; 420 } 421 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 422 HasDoWhile || IsPrecededByCommentOrPPDirective || 423 precededByCommentOrPPDirective()) { 424 return false; 425 } 426 const FormatToken *Next = Tokens->peekNextToken(); 427 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 428 return false; 429 if (IfLeftBrace) 430 *IfLeftBrace = IfLBrace; 431 return true; 432 } 433 nextToken(); 434 addUnwrappedLine(); 435 break; 436 case tok::kw_default: { 437 unsigned StoredPosition = Tokens->getPosition(); 438 auto *Next = Tokens->getNextNonComment(); 439 FormatTok = Tokens->setPosition(StoredPosition); 440 if (!Next->isOneOf(tok::colon, tok::arrow)) { 441 // default not followed by `:` or `->` is not a case label; treat it 442 // like an identifier. 443 parseStructuralElement(); 444 break; 445 } 446 // Else, if it is 'default:', fall through to the case handling. 447 [[fallthrough]]; 448 } 449 case tok::kw_case: 450 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 451 (Style.isJavaScript() && Line->MustBeDeclaration)) { 452 // Proto: there are no switch/case statements 453 // Verilog: Case labels don't have this word. We handle case 454 // labels including default in TokenAnnotator. 455 // JavaScript: A 'case: string' style field declaration. 456 ParseDefault(); 457 break; 458 } 459 if (!SwitchLabelEncountered && 460 (Style.IndentCaseLabels || 461 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) || 462 (Line->InPPDirective && Line->Level == 1))) { 463 ++Line->Level; 464 } 465 SwitchLabelEncountered = true; 466 parseStructuralElement(); 467 break; 468 case tok::l_square: 469 if (Style.isCSharp()) { 470 nextToken(); 471 parseCSharpAttribute(); 472 break; 473 } 474 if (handleCppAttributes()) 475 break; 476 [[fallthrough]]; 477 default: 478 ParseDefault(); 479 break; 480 } 481 } while (!eof()); 482 483 return false; 484 } 485 486 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 487 // We'll parse forward through the tokens until we hit 488 // a closing brace or eof - note that getNextToken() will 489 // parse macros, so this will magically work inside macro 490 // definitions, too. 491 unsigned StoredPosition = Tokens->getPosition(); 492 FormatToken *Tok = FormatTok; 493 const FormatToken *PrevTok = Tok->Previous; 494 // Keep a stack of positions of lbrace tokens. We will 495 // update information about whether an lbrace starts a 496 // braced init list or a different block during the loop. 497 struct StackEntry { 498 FormatToken *Tok; 499 const FormatToken *PrevTok; 500 }; 501 SmallVector<StackEntry, 8> LBraceStack; 502 assert(Tok->is(tok::l_brace)); 503 504 do { 505 auto *NextTok = Tokens->getNextNonComment(); 506 507 if (!Line->InMacroBody && !Style.isTableGen()) { 508 // Skip PPDirective lines and comments. 509 while (NextTok->is(tok::hash)) { 510 NextTok = Tokens->getNextToken(); 511 if (NextTok->is(tok::pp_not_keyword)) 512 break; 513 do { 514 NextTok = Tokens->getNextToken(); 515 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); 516 517 while (NextTok->is(tok::comment)) 518 NextTok = Tokens->getNextToken(); 519 } 520 } 521 522 switch (Tok->Tok.getKind()) { 523 case tok::l_brace: 524 if (Style.isJavaScript() && PrevTok) { 525 if (PrevTok->isOneOf(tok::colon, tok::less)) { 526 // A ':' indicates this code is in a type, or a braced list 527 // following a label in an object literal ({a: {b: 1}}). 528 // A '<' could be an object used in a comparison, but that is nonsense 529 // code (can never return true), so more likely it is a generic type 530 // argument (`X<{a: string; b: number}>`). 531 // The code below could be confused by semicolons between the 532 // individual members in a type member list, which would normally 533 // trigger BK_Block. In both cases, this must be parsed as an inline 534 // braced init. 535 Tok->setBlockKind(BK_BracedInit); 536 } else if (PrevTok->is(tok::r_paren)) { 537 // `) { }` can only occur in function or method declarations in JS. 538 Tok->setBlockKind(BK_Block); 539 } 540 } else { 541 Tok->setBlockKind(BK_Unknown); 542 } 543 LBraceStack.push_back({Tok, PrevTok}); 544 break; 545 case tok::r_brace: 546 if (LBraceStack.empty()) 547 break; 548 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) { 549 bool ProbablyBracedList = false; 550 if (Style.Language == FormatStyle::LK_Proto) { 551 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 552 } else if (LBrace->isNot(TT_EnumLBrace)) { 553 // Using OriginalColumn to distinguish between ObjC methods and 554 // binary operators is a bit hacky. 555 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 556 NextTok->OriginalColumn == 0; 557 558 // Try to detect a braced list. Note that regardless how we mark inner 559 // braces here, we will overwrite the BlockKind later if we parse a 560 // braced list (where all blocks inside are by default braced lists), 561 // or when we explicitly detect blocks (for example while parsing 562 // lambdas). 563 564 // If we already marked the opening brace as braced list, the closing 565 // must also be part of it. 566 ProbablyBracedList = LBrace->is(TT_BracedListLBrace); 567 568 ProbablyBracedList = ProbablyBracedList || 569 (Style.isJavaScript() && 570 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 571 Keywords.kw_as)); 572 ProbablyBracedList = 573 ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || 574 NextTok->is(tok::l_paren))); 575 576 // If there is a comma, semicolon or right paren after the closing 577 // brace, we assume this is a braced initializer list. 578 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 579 // braced list in JS. 580 ProbablyBracedList = 581 ProbablyBracedList || 582 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 583 tok::r_paren, tok::r_square, tok::ellipsis); 584 585 // Distinguish between braced list in a constructor initializer list 586 // followed by constructor body, or just adjacent blocks. 587 ProbablyBracedList = 588 ProbablyBracedList || 589 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 590 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 591 tok::greater)); 592 593 ProbablyBracedList = 594 ProbablyBracedList || 595 (NextTok->is(tok::identifier) && 596 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 597 598 ProbablyBracedList = ProbablyBracedList || 599 (NextTok->is(tok::semi) && 600 (!ExpectClassBody || LBraceStack.size() != 1)); 601 602 ProbablyBracedList = 603 ProbablyBracedList || 604 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 605 606 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 607 // We can have an array subscript after a braced init 608 // list, but C++11 attributes are expected after blocks. 609 NextTok = Tokens->getNextToken(); 610 ProbablyBracedList = NextTok->isNot(tok::l_square); 611 } 612 613 // Cpp macro definition body that is a nonempty braced list or block: 614 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && 615 !FormatTok->Previous && NextTok->is(tok::eof) && 616 // A statement can end with only `;` (simple statement), a block 617 // closing brace (compound statement), or `:` (label statement). 618 // If PrevTok is a block opening brace, Tok ends an empty block. 619 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { 620 ProbablyBracedList = true; 621 } 622 } 623 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block; 624 Tok->setBlockKind(BlockKind); 625 LBrace->setBlockKind(BlockKind); 626 } 627 LBraceStack.pop_back(); 628 break; 629 case tok::identifier: 630 if (Tok->isNot(TT_StatementMacro)) 631 break; 632 [[fallthrough]]; 633 case tok::at: 634 case tok::semi: 635 case tok::kw_if: 636 case tok::kw_while: 637 case tok::kw_for: 638 case tok::kw_switch: 639 case tok::kw_try: 640 case tok::kw___try: 641 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 642 LBraceStack.back().Tok->setBlockKind(BK_Block); 643 break; 644 default: 645 break; 646 } 647 648 PrevTok = Tok; 649 Tok = NextTok; 650 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 651 652 // Assume other blocks for all unclosed opening braces. 653 for (const auto &Entry : LBraceStack) 654 if (Entry.Tok->is(BK_Unknown)) 655 Entry.Tok->setBlockKind(BK_Block); 656 657 FormatTok = Tokens->setPosition(StoredPosition); 658 } 659 660 // Sets the token type of the directly previous right brace. 661 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 662 if (auto Prev = FormatTok->getPreviousNonComment(); 663 Prev && Prev->is(tok::r_brace)) { 664 Prev->setFinalizedType(Type); 665 } 666 } 667 668 template <class T> 669 static inline void hash_combine(std::size_t &seed, const T &v) { 670 std::hash<T> hasher; 671 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 672 } 673 674 size_t UnwrappedLineParser::computePPHash() const { 675 size_t h = 0; 676 for (const auto &i : PPStack) { 677 hash_combine(h, size_t(i.Kind)); 678 hash_combine(h, i.Line); 679 } 680 return h; 681 } 682 683 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 684 // is not null, subtracts its length (plus the preceding space) when computing 685 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 686 // running the token annotator on it so that we can restore them afterward. 687 bool UnwrappedLineParser::mightFitOnOneLine( 688 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 689 const auto ColumnLimit = Style.ColumnLimit; 690 if (ColumnLimit == 0) 691 return true; 692 693 auto &Tokens = ParsedLine.Tokens; 694 assert(!Tokens.empty()); 695 696 const auto *LastToken = Tokens.back().Tok; 697 assert(LastToken); 698 699 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 700 701 int Index = 0; 702 for (const auto &Token : Tokens) { 703 assert(Token.Tok); 704 auto &SavedToken = SavedTokens[Index++]; 705 SavedToken.Tok = new FormatToken; 706 SavedToken.Tok->copyFrom(*Token.Tok); 707 SavedToken.Children = std::move(Token.Children); 708 } 709 710 AnnotatedLine Line(ParsedLine); 711 assert(Line.Last == LastToken); 712 713 TokenAnnotator Annotator(Style, Keywords); 714 Annotator.annotate(Line); 715 Annotator.calculateFormattingInformation(Line); 716 717 auto Length = LastToken->TotalLength; 718 if (OpeningBrace) { 719 assert(OpeningBrace != Tokens.front().Tok); 720 if (auto Prev = OpeningBrace->Previous; 721 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 722 Length -= ColumnLimit; 723 } 724 Length -= OpeningBrace->TokenText.size() + 1; 725 } 726 727 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 728 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 729 Length -= FirstToken->TokenText.size() + 1; 730 } 731 732 Index = 0; 733 for (auto &Token : Tokens) { 734 const auto &SavedToken = SavedTokens[Index++]; 735 Token.Tok->copyFrom(*SavedToken.Tok); 736 Token.Children = std::move(SavedToken.Children); 737 delete SavedToken.Tok; 738 } 739 740 // If these change PPLevel needs to be used for get correct indentation. 741 assert(!Line.InMacroBody); 742 assert(!Line.InPPDirective); 743 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 744 } 745 746 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 747 unsigned AddLevels, bool MunchSemi, 748 bool KeepBraces, 749 IfStmtKind *IfKind, 750 bool UnindentWhitesmithsBraces) { 751 auto HandleVerilogBlockLabel = [this]() { 752 // ":" name 753 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 754 nextToken(); 755 if (Keywords.isVerilogIdentifier(*FormatTok)) 756 nextToken(); 757 } 758 }; 759 760 // Whether this is a Verilog-specific block that has a special header like a 761 // module. 762 const bool VerilogHierarchy = 763 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 764 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 765 (Style.isVerilog() && 766 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 767 "'{' or macro block token expected"); 768 FormatToken *Tok = FormatTok; 769 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 770 auto Index = CurrentLines->size(); 771 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 772 FormatTok->setBlockKind(BK_Block); 773 774 // For Whitesmiths mode, jump to the next level prior to skipping over the 775 // braces. 776 if (!VerilogHierarchy && AddLevels > 0 && 777 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 778 ++Line->Level; 779 } 780 781 size_t PPStartHash = computePPHash(); 782 783 const unsigned InitialLevel = Line->Level; 784 if (VerilogHierarchy) { 785 AddLevels += parseVerilogHierarchyHeader(); 786 } else { 787 nextToken(/*LevelDifference=*/AddLevels); 788 HandleVerilogBlockLabel(); 789 } 790 791 // Bail out if there are too many levels. Otherwise, the stack might overflow. 792 if (Line->Level > 300) 793 return nullptr; 794 795 if (MacroBlock && FormatTok->is(tok::l_paren)) 796 parseParens(); 797 798 size_t NbPreprocessorDirectives = 799 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 800 addUnwrappedLine(); 801 size_t OpeningLineIndex = 802 CurrentLines->empty() 803 ? (UnwrappedLine::kInvalidIndex) 804 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 805 806 // Whitesmiths is weird here. The brace needs to be indented for the namespace 807 // block, but the block itself may not be indented depending on the style 808 // settings. This allows the format to back up one level in those cases. 809 if (UnindentWhitesmithsBraces) 810 --Line->Level; 811 812 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 813 MustBeDeclaration); 814 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 815 Line->Level += AddLevels; 816 817 FormatToken *IfLBrace = nullptr; 818 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 819 820 if (eof()) 821 return IfLBrace; 822 823 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 824 : FormatTok->isNot(tok::r_brace)) { 825 Line->Level = InitialLevel; 826 FormatTok->setBlockKind(BK_Block); 827 return IfLBrace; 828 } 829 830 if (FormatTok->is(tok::r_brace)) { 831 FormatTok->setBlockKind(BK_Block); 832 if (Tok->is(TT_NamespaceLBrace)) 833 FormatTok->setFinalizedType(TT_NamespaceRBrace); 834 } 835 836 const bool IsFunctionRBrace = 837 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 838 839 auto RemoveBraces = [=]() mutable { 840 if (!SimpleBlock) 841 return false; 842 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 843 assert(FormatTok->is(tok::r_brace)); 844 const bool WrappedOpeningBrace = !Tok->Previous; 845 if (WrappedOpeningBrace && FollowedByComment) 846 return false; 847 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 848 if (KeepBraces && !HasRequiredIfBraces) 849 return false; 850 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 851 const FormatToken *Previous = Tokens->getPreviousToken(); 852 assert(Previous); 853 if (Previous->is(tok::r_brace) && !Previous->Optional) 854 return false; 855 } 856 assert(!CurrentLines->empty()); 857 auto &LastLine = CurrentLines->back(); 858 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 859 return false; 860 if (Tok->is(TT_ElseLBrace)) 861 return true; 862 if (WrappedOpeningBrace) { 863 assert(Index > 0); 864 --Index; // The line above the wrapped l_brace. 865 Tok = nullptr; 866 } 867 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 868 }; 869 if (RemoveBraces()) { 870 Tok->MatchingParen = FormatTok; 871 FormatTok->MatchingParen = Tok; 872 } 873 874 size_t PPEndHash = computePPHash(); 875 876 // Munch the closing brace. 877 nextToken(/*LevelDifference=*/-AddLevels); 878 879 // When this is a function block and there is an unnecessary semicolon 880 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 881 // it later). 882 if (Style.RemoveSemicolon && IsFunctionRBrace) { 883 while (FormatTok->is(tok::semi)) { 884 FormatTok->Optional = true; 885 nextToken(); 886 } 887 } 888 889 HandleVerilogBlockLabel(); 890 891 if (MacroBlock && FormatTok->is(tok::l_paren)) 892 parseParens(); 893 894 Line->Level = InitialLevel; 895 896 if (FormatTok->is(tok::kw_noexcept)) { 897 // A noexcept in a requires expression. 898 nextToken(); 899 } 900 901 if (FormatTok->is(tok::arrow)) { 902 // Following the } or noexcept we can find a trailing return type arrow 903 // as part of an implicit conversion constraint. 904 nextToken(); 905 parseStructuralElement(); 906 } 907 908 if (MunchSemi && FormatTok->is(tok::semi)) 909 nextToken(); 910 911 if (PPStartHash == PPEndHash) { 912 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 913 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 914 // Update the opening line to add the forward reference as well 915 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 916 CurrentLines->size() - 1; 917 } 918 } 919 920 return IfLBrace; 921 } 922 923 static bool isGoogScope(const UnwrappedLine &Line) { 924 // FIXME: Closure-library specific stuff should not be hard-coded but be 925 // configurable. 926 if (Line.Tokens.size() < 4) 927 return false; 928 auto I = Line.Tokens.begin(); 929 if (I->Tok->TokenText != "goog") 930 return false; 931 ++I; 932 if (I->Tok->isNot(tok::period)) 933 return false; 934 ++I; 935 if (I->Tok->TokenText != "scope") 936 return false; 937 ++I; 938 return I->Tok->is(tok::l_paren); 939 } 940 941 static bool isIIFE(const UnwrappedLine &Line, 942 const AdditionalKeywords &Keywords) { 943 // Look for the start of an immediately invoked anonymous function. 944 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 945 // This is commonly done in JavaScript to create a new, anonymous scope. 946 // Example: (function() { ... })() 947 if (Line.Tokens.size() < 3) 948 return false; 949 auto I = Line.Tokens.begin(); 950 if (I->Tok->isNot(tok::l_paren)) 951 return false; 952 ++I; 953 if (I->Tok->isNot(Keywords.kw_function)) 954 return false; 955 ++I; 956 return I->Tok->is(tok::l_paren); 957 } 958 959 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 960 const FormatToken &InitialToken) { 961 tok::TokenKind Kind = InitialToken.Tok.getKind(); 962 if (InitialToken.is(TT_NamespaceMacro)) 963 Kind = tok::kw_namespace; 964 965 switch (Kind) { 966 case tok::kw_namespace: 967 return Style.BraceWrapping.AfterNamespace; 968 case tok::kw_class: 969 return Style.BraceWrapping.AfterClass; 970 case tok::kw_union: 971 return Style.BraceWrapping.AfterUnion; 972 case tok::kw_struct: 973 return Style.BraceWrapping.AfterStruct; 974 case tok::kw_enum: 975 return Style.BraceWrapping.AfterEnum; 976 default: 977 return false; 978 } 979 } 980 981 void UnwrappedLineParser::parseChildBlock() { 982 assert(FormatTok->is(tok::l_brace)); 983 FormatTok->setBlockKind(BK_Block); 984 const FormatToken *OpeningBrace = FormatTok; 985 nextToken(); 986 { 987 bool SkipIndent = (Style.isJavaScript() && 988 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 989 ScopedLineState LineState(*this); 990 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 991 /*MustBeDeclaration=*/false); 992 Line->Level += SkipIndent ? 0 : 1; 993 parseLevel(OpeningBrace); 994 flushComments(isOnNewLine(*FormatTok)); 995 Line->Level -= SkipIndent ? 0 : 1; 996 } 997 nextToken(); 998 } 999 1000 void UnwrappedLineParser::parsePPDirective() { 1001 assert(FormatTok->is(tok::hash) && "'#' expected"); 1002 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1003 1004 nextToken(); 1005 1006 if (!FormatTok->Tok.getIdentifierInfo()) { 1007 parsePPUnknown(); 1008 return; 1009 } 1010 1011 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1012 case tok::pp_define: 1013 parsePPDefine(); 1014 return; 1015 case tok::pp_if: 1016 parsePPIf(/*IfDef=*/false); 1017 break; 1018 case tok::pp_ifdef: 1019 case tok::pp_ifndef: 1020 parsePPIf(/*IfDef=*/true); 1021 break; 1022 case tok::pp_else: 1023 case tok::pp_elifdef: 1024 case tok::pp_elifndef: 1025 case tok::pp_elif: 1026 parsePPElse(); 1027 break; 1028 case tok::pp_endif: 1029 parsePPEndIf(); 1030 break; 1031 case tok::pp_pragma: 1032 parsePPPragma(); 1033 break; 1034 default: 1035 parsePPUnknown(); 1036 break; 1037 } 1038 } 1039 1040 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1041 size_t Line = CurrentLines->size(); 1042 if (CurrentLines == &PreprocessorDirectives) 1043 Line += Lines.size(); 1044 1045 if (Unreachable || 1046 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1047 PPStack.push_back({PP_Unreachable, Line}); 1048 } else { 1049 PPStack.push_back({PP_Conditional, Line}); 1050 } 1051 } 1052 1053 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1054 ++PPBranchLevel; 1055 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1056 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1057 PPLevelBranchIndex.push_back(0); 1058 PPLevelBranchCount.push_back(0); 1059 } 1060 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1061 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1062 conditionalCompilationCondition(Unreachable || Skip); 1063 } 1064 1065 void UnwrappedLineParser::conditionalCompilationAlternative() { 1066 if (!PPStack.empty()) 1067 PPStack.pop_back(); 1068 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1069 if (!PPChainBranchIndex.empty()) 1070 ++PPChainBranchIndex.top(); 1071 conditionalCompilationCondition( 1072 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1073 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1074 } 1075 1076 void UnwrappedLineParser::conditionalCompilationEnd() { 1077 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1078 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1079 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1080 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1081 } 1082 // Guard against #endif's without #if. 1083 if (PPBranchLevel > -1) 1084 --PPBranchLevel; 1085 if (!PPChainBranchIndex.empty()) 1086 PPChainBranchIndex.pop(); 1087 if (!PPStack.empty()) 1088 PPStack.pop_back(); 1089 } 1090 1091 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1092 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1093 nextToken(); 1094 bool Unreachable = false; 1095 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1096 Unreachable = true; 1097 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1098 Unreachable = true; 1099 conditionalCompilationStart(Unreachable); 1100 FormatToken *IfCondition = FormatTok; 1101 // If there's a #ifndef on the first line, and the only lines before it are 1102 // comments, it could be an include guard. 1103 bool MaybeIncludeGuard = IfNDef; 1104 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1105 for (auto &Line : Lines) { 1106 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1107 MaybeIncludeGuard = false; 1108 IncludeGuard = IG_Rejected; 1109 break; 1110 } 1111 } 1112 } 1113 --PPBranchLevel; 1114 parsePPUnknown(); 1115 ++PPBranchLevel; 1116 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1117 IncludeGuard = IG_IfNdefed; 1118 IncludeGuardToken = IfCondition; 1119 } 1120 } 1121 1122 void UnwrappedLineParser::parsePPElse() { 1123 // If a potential include guard has an #else, it's not an include guard. 1124 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1125 IncludeGuard = IG_Rejected; 1126 // Don't crash when there is an #else without an #if. 1127 assert(PPBranchLevel >= -1); 1128 if (PPBranchLevel == -1) 1129 conditionalCompilationStart(/*Unreachable=*/true); 1130 conditionalCompilationAlternative(); 1131 --PPBranchLevel; 1132 parsePPUnknown(); 1133 ++PPBranchLevel; 1134 } 1135 1136 void UnwrappedLineParser::parsePPEndIf() { 1137 conditionalCompilationEnd(); 1138 parsePPUnknown(); 1139 // If the #endif of a potential include guard is the last thing in the file, 1140 // then we found an include guard. 1141 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1142 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1143 IncludeGuard = IG_Found; 1144 } 1145 } 1146 1147 void UnwrappedLineParser::parsePPDefine() { 1148 nextToken(); 1149 1150 if (!FormatTok->Tok.getIdentifierInfo()) { 1151 IncludeGuard = IG_Rejected; 1152 IncludeGuardToken = nullptr; 1153 parsePPUnknown(); 1154 return; 1155 } 1156 1157 if (IncludeGuard == IG_IfNdefed && 1158 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1159 IncludeGuard = IG_Defined; 1160 IncludeGuardToken = nullptr; 1161 for (auto &Line : Lines) { 1162 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1163 IncludeGuard = IG_Rejected; 1164 break; 1165 } 1166 } 1167 } 1168 1169 // In the context of a define, even keywords should be treated as normal 1170 // identifiers. Setting the kind to identifier is not enough, because we need 1171 // to treat additional keywords like __except as well, which are already 1172 // identifiers. Setting the identifier info to null interferes with include 1173 // guard processing above, and changes preprocessing nesting. 1174 FormatTok->Tok.setKind(tok::identifier); 1175 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1176 nextToken(); 1177 if (FormatTok->Tok.getKind() == tok::l_paren && 1178 !FormatTok->hasWhitespaceBefore()) { 1179 parseParens(); 1180 } 1181 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1182 Line->Level += PPBranchLevel + 1; 1183 addUnwrappedLine(); 1184 ++Line->Level; 1185 1186 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1187 assert((int)Line->PPLevel >= 0); 1188 Line->InMacroBody = true; 1189 1190 if (Style.SkipMacroDefinitionBody) { 1191 while (!eof()) { 1192 FormatTok->Finalized = true; 1193 FormatTok = Tokens->getNextToken(); 1194 } 1195 addUnwrappedLine(); 1196 return; 1197 } 1198 1199 // Errors during a preprocessor directive can only affect the layout of the 1200 // preprocessor directive, and thus we ignore them. An alternative approach 1201 // would be to use the same approach we use on the file level (no 1202 // re-indentation if there was a structural error) within the macro 1203 // definition. 1204 parseFile(); 1205 } 1206 1207 void UnwrappedLineParser::parsePPPragma() { 1208 Line->InPragmaDirective = true; 1209 parsePPUnknown(); 1210 } 1211 1212 void UnwrappedLineParser::parsePPUnknown() { 1213 do { 1214 nextToken(); 1215 } while (!eof()); 1216 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1217 Line->Level += PPBranchLevel + 1; 1218 addUnwrappedLine(); 1219 } 1220 1221 // Here we exclude certain tokens that are not usually the first token in an 1222 // unwrapped line. This is used in attempt to distinguish macro calls without 1223 // trailing semicolons from other constructs split to several lines. 1224 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1225 // Semicolon can be a null-statement, l_square can be a start of a macro or 1226 // a C++11 attribute, but this doesn't seem to be common. 1227 return !Tok.isOneOf(tok::semi, tok::l_brace, 1228 // Tokens that can only be used as binary operators and a 1229 // part of overloaded operator names. 1230 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1231 tok::less, tok::greater, tok::slash, tok::percent, 1232 tok::lessless, tok::greatergreater, tok::equal, 1233 tok::plusequal, tok::minusequal, tok::starequal, 1234 tok::slashequal, tok::percentequal, tok::ampequal, 1235 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1236 tok::lesslessequal, 1237 // Colon is used in labels, base class lists, initializer 1238 // lists, range-based for loops, ternary operator, but 1239 // should never be the first token in an unwrapped line. 1240 tok::colon, 1241 // 'noexcept' is a trailing annotation. 1242 tok::kw_noexcept); 1243 } 1244 1245 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1246 const FormatToken *FormatTok) { 1247 // FIXME: This returns true for C/C++ keywords like 'struct'. 1248 return FormatTok->is(tok::identifier) && 1249 (!FormatTok->Tok.getIdentifierInfo() || 1250 !FormatTok->isOneOf( 1251 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1252 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1253 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1254 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1255 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1256 Keywords.kw_instanceof, Keywords.kw_interface, 1257 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1258 } 1259 1260 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1261 const FormatToken *FormatTok) { 1262 return FormatTok->Tok.isLiteral() || 1263 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1264 mustBeJSIdent(Keywords, FormatTok); 1265 } 1266 1267 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1268 // when encountered after a value (see mustBeJSIdentOrValue). 1269 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1270 const FormatToken *FormatTok) { 1271 return FormatTok->isOneOf( 1272 tok::kw_return, Keywords.kw_yield, 1273 // conditionals 1274 tok::kw_if, tok::kw_else, 1275 // loops 1276 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1277 // switch/case 1278 tok::kw_switch, tok::kw_case, 1279 // exceptions 1280 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1281 // declaration 1282 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1283 Keywords.kw_async, Keywords.kw_function, 1284 // import/export 1285 Keywords.kw_import, tok::kw_export); 1286 } 1287 1288 // Checks whether a token is a type in K&R C (aka C78). 1289 static bool isC78Type(const FormatToken &Tok) { 1290 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1291 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1292 tok::identifier); 1293 } 1294 1295 // This function checks whether a token starts the first parameter declaration 1296 // in a K&R C (aka C78) function definition, e.g.: 1297 // int f(a, b) 1298 // short a, b; 1299 // { 1300 // return a + b; 1301 // } 1302 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1303 const FormatToken *FuncName) { 1304 assert(Tok); 1305 assert(Next); 1306 assert(FuncName); 1307 1308 if (FuncName->isNot(tok::identifier)) 1309 return false; 1310 1311 const FormatToken *Prev = FuncName->Previous; 1312 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1313 return false; 1314 1315 if (!isC78Type(*Tok) && 1316 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1317 return false; 1318 } 1319 1320 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1321 return false; 1322 1323 Tok = Tok->Previous; 1324 if (!Tok || Tok->isNot(tok::r_paren)) 1325 return false; 1326 1327 Tok = Tok->Previous; 1328 if (!Tok || Tok->isNot(tok::identifier)) 1329 return false; 1330 1331 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1332 } 1333 1334 bool UnwrappedLineParser::parseModuleImport() { 1335 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1336 1337 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1338 !Token->Tok.getIdentifierInfo() && 1339 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1340 return false; 1341 } 1342 1343 nextToken(); 1344 while (!eof()) { 1345 if (FormatTok->is(tok::colon)) { 1346 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1347 } 1348 // Handle import <foo/bar.h> as we would an include statement. 1349 else if (FormatTok->is(tok::less)) { 1350 nextToken(); 1351 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1352 // Mark tokens up to the trailing line comments as implicit string 1353 // literals. 1354 if (FormatTok->isNot(tok::comment) && 1355 !FormatTok->TokenText.starts_with("//")) { 1356 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1357 } 1358 nextToken(); 1359 } 1360 } 1361 if (FormatTok->is(tok::semi)) { 1362 nextToken(); 1363 break; 1364 } 1365 nextToken(); 1366 } 1367 1368 addUnwrappedLine(); 1369 return true; 1370 } 1371 1372 // readTokenWithJavaScriptASI reads the next token and terminates the current 1373 // line if JavaScript Automatic Semicolon Insertion must 1374 // happen between the current token and the next token. 1375 // 1376 // This method is conservative - it cannot cover all edge cases of JavaScript, 1377 // but only aims to correctly handle certain well known cases. It *must not* 1378 // return true in speculative cases. 1379 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1380 FormatToken *Previous = FormatTok; 1381 readToken(); 1382 FormatToken *Next = FormatTok; 1383 1384 bool IsOnSameLine = 1385 CommentsBeforeNextToken.empty() 1386 ? Next->NewlinesBefore == 0 1387 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1388 if (IsOnSameLine) 1389 return; 1390 1391 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1392 bool PreviousStartsTemplateExpr = 1393 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1394 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1395 // If the line contains an '@' sign, the previous token might be an 1396 // annotation, which can precede another identifier/value. 1397 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1398 return LineNode.Tok->is(tok::at); 1399 }); 1400 if (HasAt) 1401 return; 1402 } 1403 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1404 return addUnwrappedLine(); 1405 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1406 bool NextEndsTemplateExpr = 1407 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1408 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1409 (PreviousMustBeValue || 1410 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1411 tok::minusminus))) { 1412 return addUnwrappedLine(); 1413 } 1414 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1415 isJSDeclOrStmt(Keywords, Next)) { 1416 return addUnwrappedLine(); 1417 } 1418 } 1419 1420 void UnwrappedLineParser::parseStructuralElement( 1421 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1422 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1423 if (Style.Language == FormatStyle::LK_TableGen && 1424 FormatTok->is(tok::pp_include)) { 1425 nextToken(); 1426 if (FormatTok->is(tok::string_literal)) 1427 nextToken(); 1428 addUnwrappedLine(); 1429 return; 1430 } 1431 1432 if (IsCpp) { 1433 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1434 } 1435 } else if (Style.isVerilog()) { 1436 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1437 parseForOrWhileLoop(/*HasParens=*/false); 1438 return; 1439 } 1440 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1441 parseForOrWhileLoop(); 1442 return; 1443 } 1444 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1445 Keywords.kw_assume, Keywords.kw_cover)) { 1446 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1447 return; 1448 } 1449 1450 // Skip things that can exist before keywords like 'if' and 'case'. 1451 while (true) { 1452 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1453 Keywords.kw_unique0)) { 1454 nextToken(); 1455 } else if (FormatTok->is(tok::l_paren) && 1456 Tokens->peekNextToken()->is(tok::star)) { 1457 parseParens(); 1458 } else { 1459 break; 1460 } 1461 } 1462 } 1463 1464 // Tokens that only make sense at the beginning of a line. 1465 if (FormatTok->isAccessSpecifierKeyword()) { 1466 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1467 Style.isCSharp()) { 1468 nextToken(); 1469 } else { 1470 parseAccessSpecifier(); 1471 } 1472 return; 1473 } 1474 switch (FormatTok->Tok.getKind()) { 1475 case tok::kw_asm: 1476 nextToken(); 1477 if (FormatTok->is(tok::l_brace)) { 1478 FormatTok->setFinalizedType(TT_InlineASMBrace); 1479 nextToken(); 1480 while (FormatTok && !eof()) { 1481 if (FormatTok->is(tok::r_brace)) { 1482 FormatTok->setFinalizedType(TT_InlineASMBrace); 1483 nextToken(); 1484 addUnwrappedLine(); 1485 break; 1486 } 1487 FormatTok->Finalized = true; 1488 nextToken(); 1489 } 1490 } 1491 break; 1492 case tok::kw_namespace: 1493 parseNamespace(); 1494 return; 1495 case tok::kw_if: { 1496 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1497 // field/method declaration. 1498 break; 1499 } 1500 FormatToken *Tok = parseIfThenElse(IfKind); 1501 if (IfLeftBrace) 1502 *IfLeftBrace = Tok; 1503 return; 1504 } 1505 case tok::kw_for: 1506 case tok::kw_while: 1507 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1508 // field/method declaration. 1509 break; 1510 } 1511 parseForOrWhileLoop(); 1512 return; 1513 case tok::kw_do: 1514 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1515 // field/method declaration. 1516 break; 1517 } 1518 parseDoWhile(); 1519 if (HasDoWhile) 1520 *HasDoWhile = true; 1521 return; 1522 case tok::kw_switch: 1523 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1524 // 'switch: string' field declaration. 1525 break; 1526 } 1527 parseSwitch(/*IsExpr=*/false); 1528 return; 1529 case tok::kw_default: { 1530 // In Verilog default along with other labels are handled in the next loop. 1531 if (Style.isVerilog()) 1532 break; 1533 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1534 // 'default: string' field declaration. 1535 break; 1536 } 1537 auto *Default = FormatTok; 1538 nextToken(); 1539 if (FormatTok->is(tok::colon)) { 1540 FormatTok->setFinalizedType(TT_CaseLabelColon); 1541 parseLabel(); 1542 return; 1543 } 1544 if (FormatTok->is(tok::arrow)) { 1545 FormatTok->setFinalizedType(TT_CaseLabelArrow); 1546 Default->setFinalizedType(TT_SwitchExpressionLabel); 1547 parseLabel(); 1548 return; 1549 } 1550 // e.g. "default void f() {}" in a Java interface. 1551 break; 1552 } 1553 case tok::kw_case: 1554 // Proto: there are no switch/case statements. 1555 if (Style.Language == FormatStyle::LK_Proto) { 1556 nextToken(); 1557 return; 1558 } 1559 if (Style.isVerilog()) { 1560 parseBlock(); 1561 addUnwrappedLine(); 1562 return; 1563 } 1564 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1565 // 'case: string' field declaration. 1566 nextToken(); 1567 break; 1568 } 1569 parseCaseLabel(); 1570 return; 1571 case tok::kw_try: 1572 case tok::kw___try: 1573 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1574 // field/method declaration. 1575 break; 1576 } 1577 parseTryCatch(); 1578 return; 1579 case tok::kw_extern: 1580 nextToken(); 1581 if (Style.isVerilog()) { 1582 // In Verilog and extern module declaration looks like a start of module. 1583 // But there is no body and endmodule. So we handle it separately. 1584 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1585 parseVerilogHierarchyHeader(); 1586 return; 1587 } 1588 } else if (FormatTok->is(tok::string_literal)) { 1589 nextToken(); 1590 if (FormatTok->is(tok::l_brace)) { 1591 if (Style.BraceWrapping.AfterExternBlock) 1592 addUnwrappedLine(); 1593 // Either we indent or for backwards compatibility we follow the 1594 // AfterExternBlock style. 1595 unsigned AddLevels = 1596 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1597 (Style.BraceWrapping.AfterExternBlock && 1598 Style.IndentExternBlock == 1599 FormatStyle::IEBS_AfterExternBlock) 1600 ? 1u 1601 : 0u; 1602 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1603 addUnwrappedLine(); 1604 return; 1605 } 1606 } 1607 break; 1608 case tok::kw_export: 1609 if (Style.isJavaScript()) { 1610 parseJavaScriptEs6ImportExport(); 1611 return; 1612 } 1613 if (IsCpp) { 1614 nextToken(); 1615 if (FormatTok->is(tok::kw_namespace)) { 1616 parseNamespace(); 1617 return; 1618 } 1619 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1620 return; 1621 } 1622 break; 1623 case tok::kw_inline: 1624 nextToken(); 1625 if (FormatTok->is(tok::kw_namespace)) { 1626 parseNamespace(); 1627 return; 1628 } 1629 break; 1630 case tok::identifier: 1631 if (FormatTok->is(TT_ForEachMacro)) { 1632 parseForOrWhileLoop(); 1633 return; 1634 } 1635 if (FormatTok->is(TT_MacroBlockBegin)) { 1636 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1637 /*MunchSemi=*/false); 1638 return; 1639 } 1640 if (FormatTok->is(Keywords.kw_import)) { 1641 if (Style.isJavaScript()) { 1642 parseJavaScriptEs6ImportExport(); 1643 return; 1644 } 1645 if (Style.Language == FormatStyle::LK_Proto) { 1646 nextToken(); 1647 if (FormatTok->is(tok::kw_public)) 1648 nextToken(); 1649 if (FormatTok->isNot(tok::string_literal)) 1650 return; 1651 nextToken(); 1652 if (FormatTok->is(tok::semi)) 1653 nextToken(); 1654 addUnwrappedLine(); 1655 return; 1656 } 1657 if (IsCpp && parseModuleImport()) 1658 return; 1659 } 1660 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1661 Keywords.kw_slots, Keywords.kw_qslots)) { 1662 nextToken(); 1663 if (FormatTok->is(tok::colon)) { 1664 nextToken(); 1665 addUnwrappedLine(); 1666 return; 1667 } 1668 } 1669 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 1670 parseStatementMacro(); 1671 return; 1672 } 1673 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) { 1674 parseNamespace(); 1675 return; 1676 } 1677 // In Verilog labels can be any expression, so we don't do them here. 1678 // JS doesn't have macros, and within classes colons indicate fields, not 1679 // labels. 1680 // TableGen doesn't have labels. 1681 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1682 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1683 nextToken(); 1684 if (!Line->InMacroBody || CurrentLines->size() > 1) 1685 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1686 FormatTok->setFinalizedType(TT_GotoLabelColon); 1687 parseLabel(!Style.IndentGotoLabels); 1688 if (HasLabel) 1689 *HasLabel = true; 1690 return; 1691 } 1692 // In all other cases, parse the declaration. 1693 break; 1694 default: 1695 break; 1696 } 1697 1698 for (const bool InRequiresExpression = 1699 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 1700 !eof();) { 1701 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { 1702 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); 1703 Next && Next->isBinaryOperator()) { 1704 FormatTok->Tok.setKind(tok::identifier); 1705 } 1706 } 1707 const FormatToken *Previous = FormatTok->Previous; 1708 switch (FormatTok->Tok.getKind()) { 1709 case tok::at: 1710 nextToken(); 1711 if (FormatTok->is(tok::l_brace)) { 1712 nextToken(); 1713 parseBracedList(); 1714 break; 1715 } else if (Style.Language == FormatStyle::LK_Java && 1716 FormatTok->is(Keywords.kw_interface)) { 1717 nextToken(); 1718 break; 1719 } 1720 switch (FormatTok->Tok.getObjCKeywordID()) { 1721 case tok::objc_public: 1722 case tok::objc_protected: 1723 case tok::objc_package: 1724 case tok::objc_private: 1725 return parseAccessSpecifier(); 1726 case tok::objc_interface: 1727 case tok::objc_implementation: 1728 return parseObjCInterfaceOrImplementation(); 1729 case tok::objc_protocol: 1730 if (parseObjCProtocol()) 1731 return; 1732 break; 1733 case tok::objc_end: 1734 return; // Handled by the caller. 1735 case tok::objc_optional: 1736 case tok::objc_required: 1737 nextToken(); 1738 addUnwrappedLine(); 1739 return; 1740 case tok::objc_autoreleasepool: 1741 nextToken(); 1742 if (FormatTok->is(tok::l_brace)) { 1743 if (Style.BraceWrapping.AfterControlStatement == 1744 FormatStyle::BWACS_Always) { 1745 addUnwrappedLine(); 1746 } 1747 parseBlock(); 1748 } 1749 addUnwrappedLine(); 1750 return; 1751 case tok::objc_synchronized: 1752 nextToken(); 1753 if (FormatTok->is(tok::l_paren)) { 1754 // Skip synchronization object 1755 parseParens(); 1756 } 1757 if (FormatTok->is(tok::l_brace)) { 1758 if (Style.BraceWrapping.AfterControlStatement == 1759 FormatStyle::BWACS_Always) { 1760 addUnwrappedLine(); 1761 } 1762 parseBlock(); 1763 } 1764 addUnwrappedLine(); 1765 return; 1766 case tok::objc_try: 1767 // This branch isn't strictly necessary (the kw_try case below would 1768 // do this too after the tok::at is parsed above). But be explicit. 1769 parseTryCatch(); 1770 return; 1771 default: 1772 break; 1773 } 1774 break; 1775 case tok::kw_requires: { 1776 if (IsCpp) { 1777 bool ParsedClause = parseRequires(); 1778 if (ParsedClause) 1779 return; 1780 } else { 1781 nextToken(); 1782 } 1783 break; 1784 } 1785 case tok::kw_enum: 1786 // Ignore if this is part of "template <enum ..." or "... -> enum" or 1787 // "template <..., enum ...>". 1788 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) { 1789 nextToken(); 1790 break; 1791 } 1792 1793 // parseEnum falls through and does not yet add an unwrapped line as an 1794 // enum definition can start a structural element. 1795 if (!parseEnum()) 1796 break; 1797 // This only applies to C++ and Verilog. 1798 if (!IsCpp && !Style.isVerilog()) { 1799 addUnwrappedLine(); 1800 return; 1801 } 1802 break; 1803 case tok::kw_typedef: 1804 nextToken(); 1805 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1806 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1807 Keywords.kw_CF_CLOSED_ENUM, 1808 Keywords.kw_NS_CLOSED_ENUM)) { 1809 parseEnum(); 1810 } 1811 break; 1812 case tok::kw_class: 1813 if (Style.isVerilog()) { 1814 parseBlock(); 1815 addUnwrappedLine(); 1816 return; 1817 } 1818 if (Style.isTableGen()) { 1819 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1820 // This is same as def and so on. 1821 nextToken(); 1822 break; 1823 } 1824 [[fallthrough]]; 1825 case tok::kw_struct: 1826 case tok::kw_union: 1827 if (parseStructLike()) 1828 return; 1829 break; 1830 case tok::kw_decltype: 1831 nextToken(); 1832 if (FormatTok->is(tok::l_paren)) { 1833 parseParens(); 1834 assert(FormatTok->Previous); 1835 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1836 tok::l_paren)) { 1837 Line->SeenDecltypeAuto = true; 1838 } 1839 } 1840 break; 1841 case tok::period: 1842 nextToken(); 1843 // In Java, classes have an implicit static member "class". 1844 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1845 FormatTok->is(tok::kw_class)) { 1846 nextToken(); 1847 } 1848 if (Style.isJavaScript() && FormatTok && 1849 FormatTok->Tok.getIdentifierInfo()) { 1850 // JavaScript only has pseudo keywords, all keywords are allowed to 1851 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1852 nextToken(); 1853 } 1854 break; 1855 case tok::semi: 1856 nextToken(); 1857 addUnwrappedLine(); 1858 return; 1859 case tok::r_brace: 1860 addUnwrappedLine(); 1861 return; 1862 case tok::l_paren: { 1863 parseParens(); 1864 // Break the unwrapped line if a K&R C function definition has a parameter 1865 // declaration. 1866 if (OpeningBrace || !IsCpp || !Previous || eof()) 1867 break; 1868 if (isC78ParameterDecl(FormatTok, 1869 Tokens->peekNextToken(/*SkipComment=*/true), 1870 Previous)) { 1871 addUnwrappedLine(); 1872 return; 1873 } 1874 break; 1875 } 1876 case tok::kw_operator: 1877 nextToken(); 1878 if (FormatTok->isBinaryOperator()) 1879 nextToken(); 1880 break; 1881 case tok::caret: 1882 nextToken(); 1883 // Block return type. 1884 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) { 1885 nextToken(); 1886 // Return types: pointers are ok too. 1887 while (FormatTok->is(tok::star)) 1888 nextToken(); 1889 } 1890 // Block argument list. 1891 if (FormatTok->is(tok::l_paren)) 1892 parseParens(); 1893 // Block body. 1894 if (FormatTok->is(tok::l_brace)) 1895 parseChildBlock(); 1896 break; 1897 case tok::l_brace: 1898 if (InRequiresExpression) 1899 FormatTok->setFinalizedType(TT_BracedListLBrace); 1900 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1901 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1902 // A block outside of parentheses must be the last part of a 1903 // structural element. 1904 // FIXME: Figure out cases where this is not true, and add projections 1905 // for them (the one we know is missing are lambdas). 1906 if (Style.Language == FormatStyle::LK_Java && 1907 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1908 // If necessary, we could set the type to something different than 1909 // TT_FunctionLBrace. 1910 if (Style.BraceWrapping.AfterControlStatement == 1911 FormatStyle::BWACS_Always) { 1912 addUnwrappedLine(); 1913 } 1914 } else if (Style.BraceWrapping.AfterFunction) { 1915 addUnwrappedLine(); 1916 } 1917 if (!Previous || Previous->isNot(TT_TypeDeclarationParen)) 1918 FormatTok->setFinalizedType(TT_FunctionLBrace); 1919 parseBlock(); 1920 IsDecltypeAutoFunction = false; 1921 addUnwrappedLine(); 1922 return; 1923 } 1924 // Otherwise this was a braced init list, and the structural 1925 // element continues. 1926 break; 1927 case tok::kw_try: 1928 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1929 // field/method declaration. 1930 nextToken(); 1931 break; 1932 } 1933 // We arrive here when parsing function-try blocks. 1934 if (Style.BraceWrapping.AfterFunction) 1935 addUnwrappedLine(); 1936 parseTryCatch(); 1937 return; 1938 case tok::identifier: { 1939 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1940 Line->MustBeDeclaration) { 1941 addUnwrappedLine(); 1942 parseCSharpGenericTypeConstraint(); 1943 break; 1944 } 1945 if (FormatTok->is(TT_MacroBlockEnd)) { 1946 addUnwrappedLine(); 1947 return; 1948 } 1949 1950 // Function declarations (as opposed to function expressions) are parsed 1951 // on their own unwrapped line by continuing this loop. Function 1952 // expressions (functions that are not on their own line) must not create 1953 // a new unwrapped line, so they are special cased below. 1954 size_t TokenCount = Line->Tokens.size(); 1955 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1956 (TokenCount > 1 || 1957 (TokenCount == 1 && 1958 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1959 tryToParseJSFunction(); 1960 break; 1961 } 1962 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1963 FormatTok->is(Keywords.kw_interface)) { 1964 if (Style.isJavaScript()) { 1965 // In JavaScript/TypeScript, "interface" can be used as a standalone 1966 // identifier, e.g. in `var interface = 1;`. If "interface" is 1967 // followed by another identifier, it is very like to be an actual 1968 // interface declaration. 1969 unsigned StoredPosition = Tokens->getPosition(); 1970 FormatToken *Next = Tokens->getNextToken(); 1971 FormatTok = Tokens->setPosition(StoredPosition); 1972 if (!mustBeJSIdent(Keywords, Next)) { 1973 nextToken(); 1974 break; 1975 } 1976 } 1977 parseRecord(); 1978 addUnwrappedLine(); 1979 return; 1980 } 1981 1982 if (Style.isVerilog()) { 1983 if (FormatTok->is(Keywords.kw_table)) { 1984 parseVerilogTable(); 1985 return; 1986 } 1987 if (Keywords.isVerilogBegin(*FormatTok) || 1988 Keywords.isVerilogHierarchy(*FormatTok)) { 1989 parseBlock(); 1990 addUnwrappedLine(); 1991 return; 1992 } 1993 } 1994 1995 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) { 1996 if (parseStructLike()) 1997 return; 1998 break; 1999 } 2000 2001 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 2002 parseStatementMacro(); 2003 return; 2004 } 2005 2006 // See if the following token should start a new unwrapped line. 2007 StringRef Text = FormatTok->TokenText; 2008 2009 FormatToken *PreviousToken = FormatTok; 2010 nextToken(); 2011 2012 // JS doesn't have macros, and within classes colons indicate fields, not 2013 // labels. 2014 if (Style.isJavaScript()) 2015 break; 2016 2017 auto OneTokenSoFar = [&]() { 2018 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2019 while (I != E && I->Tok->is(tok::comment)) 2020 ++I; 2021 if (Style.isVerilog()) 2022 while (I != E && I->Tok->is(tok::hash)) 2023 ++I; 2024 return I != E && (++I == E); 2025 }; 2026 if (OneTokenSoFar()) { 2027 // Recognize function-like macro usages without trailing semicolon as 2028 // well as free-standing macros like Q_OBJECT. 2029 bool FunctionLike = FormatTok->is(tok::l_paren); 2030 if (FunctionLike) 2031 parseParens(); 2032 2033 bool FollowedByNewline = 2034 CommentsBeforeNextToken.empty() 2035 ? FormatTok->NewlinesBefore > 0 2036 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2037 2038 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2039 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2040 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2041 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2042 addUnwrappedLine(); 2043 return; 2044 } 2045 } 2046 break; 2047 } 2048 case tok::equal: 2049 if ((Style.isJavaScript() || Style.isCSharp()) && 2050 FormatTok->is(TT_FatArrow)) { 2051 tryToParseChildBlock(); 2052 break; 2053 } 2054 2055 nextToken(); 2056 if (FormatTok->is(tok::l_brace)) { 2057 // Block kind should probably be set to BK_BracedInit for any language. 2058 // C# needs this change to ensure that array initialisers and object 2059 // initialisers are indented the same way. 2060 if (Style.isCSharp()) 2061 FormatTok->setBlockKind(BK_BracedInit); 2062 // TableGen's defset statement has syntax of the form, 2063 // `defset <type> <name> = { <statement>... }` 2064 if (Style.isTableGen() && 2065 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2066 FormatTok->setFinalizedType(TT_FunctionLBrace); 2067 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2068 /*MunchSemi=*/false); 2069 addUnwrappedLine(); 2070 break; 2071 } 2072 nextToken(); 2073 parseBracedList(); 2074 } else if (Style.Language == FormatStyle::LK_Proto && 2075 FormatTok->is(tok::less)) { 2076 nextToken(); 2077 parseBracedList(/*IsAngleBracket=*/true); 2078 } 2079 break; 2080 case tok::l_square: 2081 parseSquare(); 2082 break; 2083 case tok::kw_new: 2084 parseNew(); 2085 break; 2086 case tok::kw_switch: 2087 if (Style.Language == FormatStyle::LK_Java) 2088 parseSwitch(/*IsExpr=*/true); 2089 nextToken(); 2090 break; 2091 case tok::kw_case: 2092 // Proto: there are no switch/case statements. 2093 if (Style.Language == FormatStyle::LK_Proto) { 2094 nextToken(); 2095 return; 2096 } 2097 // In Verilog switch is called case. 2098 if (Style.isVerilog()) { 2099 parseBlock(); 2100 addUnwrappedLine(); 2101 return; 2102 } 2103 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2104 // 'case: string' field declaration. 2105 nextToken(); 2106 break; 2107 } 2108 parseCaseLabel(); 2109 break; 2110 case tok::kw_default: 2111 nextToken(); 2112 if (Style.isVerilog()) { 2113 if (FormatTok->is(tok::colon)) { 2114 // The label will be handled in the next iteration. 2115 break; 2116 } 2117 if (FormatTok->is(Keywords.kw_clocking)) { 2118 // A default clocking block. 2119 parseBlock(); 2120 addUnwrappedLine(); 2121 return; 2122 } 2123 parseVerilogCaseLabel(); 2124 return; 2125 } 2126 break; 2127 case tok::colon: 2128 nextToken(); 2129 if (Style.isVerilog()) { 2130 parseVerilogCaseLabel(); 2131 return; 2132 } 2133 break; 2134 case tok::greater: 2135 nextToken(); 2136 if (FormatTok->is(tok::l_brace)) 2137 FormatTok->Previous->setFinalizedType(TT_TemplateCloser); 2138 break; 2139 default: 2140 nextToken(); 2141 break; 2142 } 2143 } 2144 } 2145 2146 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2147 assert(FormatTok->is(tok::l_brace)); 2148 if (!Style.isCSharp()) 2149 return false; 2150 // See if it's a property accessor. 2151 if (FormatTok->Previous->isNot(tok::identifier)) 2152 return false; 2153 2154 // See if we are inside a property accessor. 2155 // 2156 // Record the current tokenPosition so that we can advance and 2157 // reset the current token. `Next` is not set yet so we need 2158 // another way to advance along the token stream. 2159 unsigned int StoredPosition = Tokens->getPosition(); 2160 FormatToken *Tok = Tokens->getNextToken(); 2161 2162 // A trivial property accessor is of the form: 2163 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2164 // Track these as they do not require line breaks to be introduced. 2165 bool HasSpecialAccessor = false; 2166 bool IsTrivialPropertyAccessor = true; 2167 while (!eof()) { 2168 if (Tok->isAccessSpecifierKeyword() || 2169 Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get, 2170 Keywords.kw_init, Keywords.kw_set)) { 2171 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2172 HasSpecialAccessor = true; 2173 Tok = Tokens->getNextToken(); 2174 continue; 2175 } 2176 if (Tok->isNot(tok::r_brace)) 2177 IsTrivialPropertyAccessor = false; 2178 break; 2179 } 2180 2181 if (!HasSpecialAccessor) { 2182 Tokens->setPosition(StoredPosition); 2183 return false; 2184 } 2185 2186 // Try to parse the property accessor: 2187 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2188 Tokens->setPosition(StoredPosition); 2189 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2190 addUnwrappedLine(); 2191 nextToken(); 2192 do { 2193 switch (FormatTok->Tok.getKind()) { 2194 case tok::r_brace: 2195 nextToken(); 2196 if (FormatTok->is(tok::equal)) { 2197 while (!eof() && FormatTok->isNot(tok::semi)) 2198 nextToken(); 2199 nextToken(); 2200 } 2201 addUnwrappedLine(); 2202 return true; 2203 case tok::l_brace: 2204 ++Line->Level; 2205 parseBlock(/*MustBeDeclaration=*/true); 2206 addUnwrappedLine(); 2207 --Line->Level; 2208 break; 2209 case tok::equal: 2210 if (FormatTok->is(TT_FatArrow)) { 2211 ++Line->Level; 2212 do { 2213 nextToken(); 2214 } while (!eof() && FormatTok->isNot(tok::semi)); 2215 nextToken(); 2216 addUnwrappedLine(); 2217 --Line->Level; 2218 break; 2219 } 2220 nextToken(); 2221 break; 2222 default: 2223 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2224 Keywords.kw_set) && 2225 !IsTrivialPropertyAccessor) { 2226 // Non-trivial get/set needs to be on its own line. 2227 addUnwrappedLine(); 2228 } 2229 nextToken(); 2230 } 2231 } while (!eof()); 2232 2233 // Unreachable for well-formed code (paired '{' and '}'). 2234 return true; 2235 } 2236 2237 bool UnwrappedLineParser::tryToParseLambda() { 2238 assert(FormatTok->is(tok::l_square)); 2239 if (!IsCpp) { 2240 nextToken(); 2241 return false; 2242 } 2243 FormatToken &LSquare = *FormatTok; 2244 if (!tryToParseLambdaIntroducer()) 2245 return false; 2246 2247 bool SeenArrow = false; 2248 bool InTemplateParameterList = false; 2249 2250 while (FormatTok->isNot(tok::l_brace)) { 2251 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) { 2252 nextToken(); 2253 continue; 2254 } 2255 switch (FormatTok->Tok.getKind()) { 2256 case tok::l_brace: 2257 break; 2258 case tok::l_paren: 2259 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2260 break; 2261 case tok::l_square: 2262 parseSquare(); 2263 break; 2264 case tok::less: 2265 assert(FormatTok->Previous); 2266 if (FormatTok->Previous->is(tok::r_square)) 2267 InTemplateParameterList = true; 2268 nextToken(); 2269 break; 2270 case tok::kw_auto: 2271 case tok::kw_class: 2272 case tok::kw_struct: 2273 case tok::kw_union: 2274 case tok::kw_template: 2275 case tok::kw_typename: 2276 case tok::amp: 2277 case tok::star: 2278 case tok::kw_const: 2279 case tok::kw_constexpr: 2280 case tok::kw_consteval: 2281 case tok::comma: 2282 case tok::greater: 2283 case tok::identifier: 2284 case tok::numeric_constant: 2285 case tok::coloncolon: 2286 case tok::kw_mutable: 2287 case tok::kw_noexcept: 2288 case tok::kw_static: 2289 nextToken(); 2290 break; 2291 // Specialization of a template with an integer parameter can contain 2292 // arithmetic, logical, comparison and ternary operators. 2293 // 2294 // FIXME: This also accepts sequences of operators that are not in the scope 2295 // of a template argument list. 2296 // 2297 // In a C++ lambda a template type can only occur after an arrow. We use 2298 // this as an heuristic to distinguish between Objective-C expressions 2299 // followed by an `a->b` expression, such as: 2300 // ([obj func:arg] + a->b) 2301 // Otherwise the code below would parse as a lambda. 2302 case tok::plus: 2303 case tok::minus: 2304 case tok::exclaim: 2305 case tok::tilde: 2306 case tok::slash: 2307 case tok::percent: 2308 case tok::lessless: 2309 case tok::pipe: 2310 case tok::pipepipe: 2311 case tok::ampamp: 2312 case tok::caret: 2313 case tok::equalequal: 2314 case tok::exclaimequal: 2315 case tok::greaterequal: 2316 case tok::lessequal: 2317 case tok::question: 2318 case tok::colon: 2319 case tok::ellipsis: 2320 case tok::kw_true: 2321 case tok::kw_false: 2322 if (SeenArrow || InTemplateParameterList) { 2323 nextToken(); 2324 break; 2325 } 2326 return true; 2327 case tok::arrow: 2328 // This might or might not actually be a lambda arrow (this could be an 2329 // ObjC method invocation followed by a dereferencing arrow). We might 2330 // reset this back to TT_Unknown in TokenAnnotator. 2331 FormatTok->setFinalizedType(TT_LambdaArrow); 2332 SeenArrow = true; 2333 nextToken(); 2334 break; 2335 case tok::kw_requires: { 2336 auto *RequiresToken = FormatTok; 2337 nextToken(); 2338 parseRequiresClause(RequiresToken); 2339 break; 2340 } 2341 case tok::equal: 2342 if (!InTemplateParameterList) 2343 return true; 2344 nextToken(); 2345 break; 2346 default: 2347 return true; 2348 } 2349 } 2350 2351 FormatTok->setFinalizedType(TT_LambdaLBrace); 2352 LSquare.setFinalizedType(TT_LambdaLSquare); 2353 2354 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2355 parseChildBlock(); 2356 assert(!NestedLambdas.empty()); 2357 NestedLambdas.pop_back(); 2358 2359 return true; 2360 } 2361 2362 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2363 const FormatToken *Previous = FormatTok->Previous; 2364 const FormatToken *LeftSquare = FormatTok; 2365 nextToken(); 2366 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2367 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2368 tok::kw_co_yield, tok::kw_co_return)) || 2369 Previous->closesScope())) || 2370 LeftSquare->isCppStructuredBinding(IsCpp)) { 2371 return false; 2372 } 2373 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2374 return false; 2375 if (FormatTok->is(tok::r_square)) { 2376 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2377 if (Next->is(tok::greater)) 2378 return false; 2379 } 2380 parseSquare(/*LambdaIntroducer=*/true); 2381 return true; 2382 } 2383 2384 void UnwrappedLineParser::tryToParseJSFunction() { 2385 assert(FormatTok->is(Keywords.kw_function)); 2386 if (FormatTok->is(Keywords.kw_async)) 2387 nextToken(); 2388 // Consume "function". 2389 nextToken(); 2390 2391 // Consume * (generator function). Treat it like C++'s overloaded operators. 2392 if (FormatTok->is(tok::star)) { 2393 FormatTok->setFinalizedType(TT_OverloadedOperator); 2394 nextToken(); 2395 } 2396 2397 // Consume function name. 2398 if (FormatTok->is(tok::identifier)) 2399 nextToken(); 2400 2401 if (FormatTok->isNot(tok::l_paren)) 2402 return; 2403 2404 // Parse formal parameter list. 2405 parseParens(); 2406 2407 if (FormatTok->is(tok::colon)) { 2408 // Parse a type definition. 2409 nextToken(); 2410 2411 // Eat the type declaration. For braced inline object types, balance braces, 2412 // otherwise just parse until finding an l_brace for the function body. 2413 if (FormatTok->is(tok::l_brace)) 2414 tryToParseBracedList(); 2415 else 2416 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2417 nextToken(); 2418 } 2419 2420 if (FormatTok->is(tok::semi)) 2421 return; 2422 2423 parseChildBlock(); 2424 } 2425 2426 bool UnwrappedLineParser::tryToParseBracedList() { 2427 if (FormatTok->is(BK_Unknown)) 2428 calculateBraceTypes(); 2429 assert(FormatTok->isNot(BK_Unknown)); 2430 if (FormatTok->is(BK_Block)) 2431 return false; 2432 nextToken(); 2433 parseBracedList(); 2434 return true; 2435 } 2436 2437 bool UnwrappedLineParser::tryToParseChildBlock() { 2438 assert(Style.isJavaScript() || Style.isCSharp()); 2439 assert(FormatTok->is(TT_FatArrow)); 2440 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2441 // They always start an expression or a child block if followed by a curly 2442 // brace. 2443 nextToken(); 2444 if (FormatTok->isNot(tok::l_brace)) 2445 return false; 2446 parseChildBlock(); 2447 return true; 2448 } 2449 2450 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2451 assert(!IsAngleBracket || !IsEnum); 2452 bool HasError = false; 2453 2454 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2455 // replace this by using parseAssignmentExpression() inside. 2456 do { 2457 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2458 tryToParseChildBlock()) { 2459 continue; 2460 } 2461 if (Style.isJavaScript()) { 2462 if (FormatTok->is(Keywords.kw_function)) { 2463 tryToParseJSFunction(); 2464 continue; 2465 } 2466 if (FormatTok->is(tok::l_brace)) { 2467 // Could be a method inside of a braced list `{a() { return 1; }}`. 2468 if (tryToParseBracedList()) 2469 continue; 2470 parseChildBlock(); 2471 } 2472 } 2473 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2474 if (IsEnum) { 2475 FormatTok->setBlockKind(BK_Block); 2476 if (!Style.AllowShortEnumsOnASingleLine) 2477 addUnwrappedLine(); 2478 } 2479 nextToken(); 2480 return !HasError; 2481 } 2482 switch (FormatTok->Tok.getKind()) { 2483 case tok::l_square: 2484 if (Style.isCSharp()) 2485 parseSquare(); 2486 else 2487 tryToParseLambda(); 2488 break; 2489 case tok::l_paren: 2490 parseParens(); 2491 // JavaScript can just have free standing methods and getters/setters in 2492 // object literals. Detect them by a "{" following ")". 2493 if (Style.isJavaScript()) { 2494 if (FormatTok->is(tok::l_brace)) 2495 parseChildBlock(); 2496 break; 2497 } 2498 break; 2499 case tok::l_brace: 2500 // Assume there are no blocks inside a braced init list apart 2501 // from the ones we explicitly parse out (like lambdas). 2502 FormatTok->setBlockKind(BK_BracedInit); 2503 nextToken(); 2504 parseBracedList(); 2505 break; 2506 case tok::less: 2507 nextToken(); 2508 if (IsAngleBracket) 2509 parseBracedList(/*IsAngleBracket=*/true); 2510 break; 2511 case tok::semi: 2512 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2513 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2514 // used for error recovery if we have otherwise determined that this is 2515 // a braced list. 2516 if (Style.isJavaScript()) { 2517 nextToken(); 2518 break; 2519 } 2520 HasError = true; 2521 if (!IsEnum) 2522 return false; 2523 nextToken(); 2524 break; 2525 case tok::comma: 2526 nextToken(); 2527 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2528 addUnwrappedLine(); 2529 break; 2530 default: 2531 nextToken(); 2532 break; 2533 } 2534 } while (!eof()); 2535 return false; 2536 } 2537 2538 /// \brief Parses a pair of parentheses (and everything between them). 2539 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2540 /// double ampersands. This applies for all nested scopes as well. 2541 /// 2542 /// Returns whether there is a `=` token between the parentheses. 2543 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2544 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2545 auto *LeftParen = FormatTok; 2546 bool SeenEqual = false; 2547 bool MightBeFoldExpr = false; 2548 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2549 nextToken(); 2550 do { 2551 switch (FormatTok->Tok.getKind()) { 2552 case tok::l_paren: 2553 if (parseParens(AmpAmpTokenType)) 2554 SeenEqual = true; 2555 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2556 parseChildBlock(); 2557 break; 2558 case tok::r_paren: { 2559 auto *Prev = LeftParen->Previous; 2560 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && 2561 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2562 const auto *Next = Tokens->peekNextToken(); 2563 const bool DoubleParens = 2564 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2565 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2566 const bool Blacklisted = 2567 PrevPrev && 2568 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2569 (SeenEqual && 2570 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2571 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2572 const bool ReturnParens = 2573 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2574 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2575 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2576 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2577 Next->is(tok::semi); 2578 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2579 LeftParen->Optional = true; 2580 FormatTok->Optional = true; 2581 } 2582 } 2583 if (Prev) { 2584 if (Prev->is(TT_TypenameMacro)) { 2585 LeftParen->setFinalizedType(TT_TypeDeclarationParen); 2586 FormatTok->setFinalizedType(TT_TypeDeclarationParen); 2587 } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) { 2588 Prev->setFinalizedType(TT_TemplateCloser); 2589 } 2590 } 2591 nextToken(); 2592 return SeenEqual; 2593 } 2594 case tok::r_brace: 2595 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2596 return SeenEqual; 2597 case tok::l_square: 2598 tryToParseLambda(); 2599 break; 2600 case tok::l_brace: 2601 if (!tryToParseBracedList()) 2602 parseChildBlock(); 2603 break; 2604 case tok::at: 2605 nextToken(); 2606 if (FormatTok->is(tok::l_brace)) { 2607 nextToken(); 2608 parseBracedList(); 2609 } 2610 break; 2611 case tok::ellipsis: 2612 MightBeFoldExpr = true; 2613 nextToken(); 2614 break; 2615 case tok::equal: 2616 SeenEqual = true; 2617 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2618 tryToParseChildBlock(); 2619 else 2620 nextToken(); 2621 break; 2622 case tok::kw_class: 2623 if (Style.isJavaScript()) 2624 parseRecord(/*ParseAsExpr=*/true); 2625 else 2626 nextToken(); 2627 break; 2628 case tok::identifier: 2629 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2630 tryToParseJSFunction(); 2631 else 2632 nextToken(); 2633 break; 2634 case tok::kw_switch: 2635 parseSwitch(/*IsExpr=*/true); 2636 break; 2637 case tok::kw_requires: { 2638 auto RequiresToken = FormatTok; 2639 nextToken(); 2640 parseRequiresExpression(RequiresToken); 2641 break; 2642 } 2643 case tok::ampamp: 2644 if (AmpAmpTokenType != TT_Unknown) 2645 FormatTok->setFinalizedType(AmpAmpTokenType); 2646 [[fallthrough]]; 2647 default: 2648 nextToken(); 2649 break; 2650 } 2651 } while (!eof()); 2652 return SeenEqual; 2653 } 2654 2655 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2656 if (!LambdaIntroducer) { 2657 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2658 if (tryToParseLambda()) 2659 return; 2660 } 2661 do { 2662 switch (FormatTok->Tok.getKind()) { 2663 case tok::l_paren: 2664 parseParens(); 2665 break; 2666 case tok::r_square: 2667 nextToken(); 2668 return; 2669 case tok::r_brace: 2670 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2671 return; 2672 case tok::l_square: 2673 parseSquare(); 2674 break; 2675 case tok::l_brace: { 2676 if (!tryToParseBracedList()) 2677 parseChildBlock(); 2678 break; 2679 } 2680 case tok::at: 2681 case tok::colon: 2682 nextToken(); 2683 if (FormatTok->is(tok::l_brace)) { 2684 nextToken(); 2685 parseBracedList(); 2686 } 2687 break; 2688 default: 2689 nextToken(); 2690 break; 2691 } 2692 } while (!eof()); 2693 } 2694 2695 void UnwrappedLineParser::keepAncestorBraces() { 2696 if (!Style.RemoveBracesLLVM) 2697 return; 2698 2699 const int MaxNestingLevels = 2; 2700 const int Size = NestedTooDeep.size(); 2701 if (Size >= MaxNestingLevels) 2702 NestedTooDeep[Size - MaxNestingLevels] = true; 2703 NestedTooDeep.push_back(false); 2704 } 2705 2706 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2707 for (const auto &Token : llvm::reverse(Line.Tokens)) 2708 if (Token.Tok->isNot(tok::comment)) 2709 return Token.Tok; 2710 2711 return nullptr; 2712 } 2713 2714 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2715 FormatToken *Tok = nullptr; 2716 2717 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2718 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2719 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2720 ? getLastNonComment(*Line) 2721 : Line->Tokens.back().Tok; 2722 assert(Tok); 2723 if (Tok->BraceCount < 0) { 2724 assert(Tok->BraceCount == -1); 2725 Tok = nullptr; 2726 } else { 2727 Tok->BraceCount = -1; 2728 } 2729 } 2730 2731 addUnwrappedLine(); 2732 ++Line->Level; 2733 ++Line->UnbracedBodyLevel; 2734 parseStructuralElement(); 2735 --Line->UnbracedBodyLevel; 2736 2737 if (Tok) { 2738 assert(!Line->InPPDirective); 2739 Tok = nullptr; 2740 for (const auto &L : llvm::reverse(*CurrentLines)) { 2741 if (!L.InPPDirective && getLastNonComment(L)) { 2742 Tok = L.Tokens.back().Tok; 2743 break; 2744 } 2745 } 2746 assert(Tok); 2747 ++Tok->BraceCount; 2748 } 2749 2750 if (CheckEOF && eof()) 2751 addUnwrappedLine(); 2752 2753 --Line->Level; 2754 } 2755 2756 static void markOptionalBraces(FormatToken *LeftBrace) { 2757 if (!LeftBrace) 2758 return; 2759 2760 assert(LeftBrace->is(tok::l_brace)); 2761 2762 FormatToken *RightBrace = LeftBrace->MatchingParen; 2763 if (!RightBrace) { 2764 assert(!LeftBrace->Optional); 2765 return; 2766 } 2767 2768 assert(RightBrace->is(tok::r_brace)); 2769 assert(RightBrace->MatchingParen == LeftBrace); 2770 assert(LeftBrace->Optional == RightBrace->Optional); 2771 2772 LeftBrace->Optional = true; 2773 RightBrace->Optional = true; 2774 } 2775 2776 void UnwrappedLineParser::handleAttributes() { 2777 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2778 if (FormatTok->isAttribute()) 2779 nextToken(); 2780 else if (FormatTok->is(tok::l_square)) 2781 handleCppAttributes(); 2782 } 2783 2784 bool UnwrappedLineParser::handleCppAttributes() { 2785 // Handle [[likely]] / [[unlikely]] attributes. 2786 assert(FormatTok->is(tok::l_square)); 2787 if (!tryToParseSimpleAttribute()) 2788 return false; 2789 parseSquare(); 2790 return true; 2791 } 2792 2793 /// Returns whether \c Tok begins a block. 2794 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2795 // FIXME: rename the function or make 2796 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2797 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2798 : Tok.is(tok::l_brace); 2799 } 2800 2801 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2802 bool KeepBraces, 2803 bool IsVerilogAssert) { 2804 assert((FormatTok->is(tok::kw_if) || 2805 (Style.isVerilog() && 2806 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2807 Keywords.kw_assume, Keywords.kw_cover))) && 2808 "'if' expected"); 2809 nextToken(); 2810 2811 if (IsVerilogAssert) { 2812 // Handle `assert #0` and `assert final`. 2813 if (FormatTok->is(Keywords.kw_verilogHash)) { 2814 nextToken(); 2815 if (FormatTok->is(tok::numeric_constant)) 2816 nextToken(); 2817 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2818 Keywords.kw_sequence)) { 2819 nextToken(); 2820 } 2821 } 2822 2823 // TableGen's if statement has the form of `if <cond> then { ... }`. 2824 if (Style.isTableGen()) { 2825 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2826 // Simply skip until then. This range only contains a value. 2827 nextToken(); 2828 } 2829 } 2830 2831 // Handle `if !consteval`. 2832 if (FormatTok->is(tok::exclaim)) 2833 nextToken(); 2834 2835 bool KeepIfBraces = true; 2836 if (FormatTok->is(tok::kw_consteval)) { 2837 nextToken(); 2838 } else { 2839 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2840 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2841 nextToken(); 2842 if (FormatTok->is(tok::l_paren)) { 2843 FormatTok->setFinalizedType(TT_ConditionLParen); 2844 parseParens(); 2845 } 2846 } 2847 handleAttributes(); 2848 // The then action is optional in Verilog assert statements. 2849 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2850 nextToken(); 2851 addUnwrappedLine(); 2852 return nullptr; 2853 } 2854 2855 bool NeedsUnwrappedLine = false; 2856 keepAncestorBraces(); 2857 2858 FormatToken *IfLeftBrace = nullptr; 2859 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2860 2861 if (isBlockBegin(*FormatTok)) { 2862 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2863 IfLeftBrace = FormatTok; 2864 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2865 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2866 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2867 setPreviousRBraceType(TT_ControlStatementRBrace); 2868 if (Style.BraceWrapping.BeforeElse) 2869 addUnwrappedLine(); 2870 else 2871 NeedsUnwrappedLine = true; 2872 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2873 addUnwrappedLine(); 2874 } else { 2875 parseUnbracedBody(); 2876 } 2877 2878 if (Style.RemoveBracesLLVM) { 2879 assert(!NestedTooDeep.empty()); 2880 KeepIfBraces = KeepIfBraces || 2881 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2882 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2883 IfBlockKind == IfStmtKind::IfElseIf; 2884 } 2885 2886 bool KeepElseBraces = KeepIfBraces; 2887 FormatToken *ElseLeftBrace = nullptr; 2888 IfStmtKind Kind = IfStmtKind::IfOnly; 2889 2890 if (FormatTok->is(tok::kw_else)) { 2891 if (Style.RemoveBracesLLVM) { 2892 NestedTooDeep.back() = false; 2893 Kind = IfStmtKind::IfElse; 2894 } 2895 nextToken(); 2896 handleAttributes(); 2897 if (isBlockBegin(*FormatTok)) { 2898 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2899 FormatTok->setFinalizedType(TT_ElseLBrace); 2900 ElseLeftBrace = FormatTok; 2901 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2902 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2903 FormatToken *IfLBrace = 2904 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2905 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2906 setPreviousRBraceType(TT_ElseRBrace); 2907 if (FormatTok->is(tok::kw_else)) { 2908 KeepElseBraces = KeepElseBraces || 2909 ElseBlockKind == IfStmtKind::IfOnly || 2910 ElseBlockKind == IfStmtKind::IfElseIf; 2911 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2912 KeepElseBraces = true; 2913 assert(ElseLeftBrace->MatchingParen); 2914 markOptionalBraces(ElseLeftBrace); 2915 } 2916 addUnwrappedLine(); 2917 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2918 const FormatToken *Previous = Tokens->getPreviousToken(); 2919 assert(Previous); 2920 const bool IsPrecededByComment = Previous->is(tok::comment); 2921 if (IsPrecededByComment) { 2922 addUnwrappedLine(); 2923 ++Line->Level; 2924 } 2925 bool TooDeep = true; 2926 if (Style.RemoveBracesLLVM) { 2927 Kind = IfStmtKind::IfElseIf; 2928 TooDeep = NestedTooDeep.pop_back_val(); 2929 } 2930 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2931 if (Style.RemoveBracesLLVM) 2932 NestedTooDeep.push_back(TooDeep); 2933 if (IsPrecededByComment) 2934 --Line->Level; 2935 } else { 2936 parseUnbracedBody(/*CheckEOF=*/true); 2937 } 2938 } else { 2939 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2940 if (NeedsUnwrappedLine) 2941 addUnwrappedLine(); 2942 } 2943 2944 if (!Style.RemoveBracesLLVM) 2945 return nullptr; 2946 2947 assert(!NestedTooDeep.empty()); 2948 KeepElseBraces = KeepElseBraces || 2949 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2950 NestedTooDeep.back(); 2951 2952 NestedTooDeep.pop_back(); 2953 2954 if (!KeepIfBraces && !KeepElseBraces) { 2955 markOptionalBraces(IfLeftBrace); 2956 markOptionalBraces(ElseLeftBrace); 2957 } else if (IfLeftBrace) { 2958 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2959 if (IfRightBrace) { 2960 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2961 assert(!IfLeftBrace->Optional); 2962 assert(!IfRightBrace->Optional); 2963 IfLeftBrace->MatchingParen = nullptr; 2964 IfRightBrace->MatchingParen = nullptr; 2965 } 2966 } 2967 2968 if (IfKind) 2969 *IfKind = Kind; 2970 2971 return IfLeftBrace; 2972 } 2973 2974 void UnwrappedLineParser::parseTryCatch() { 2975 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2976 nextToken(); 2977 bool NeedsUnwrappedLine = false; 2978 bool HasCtorInitializer = false; 2979 if (FormatTok->is(tok::colon)) { 2980 auto *Colon = FormatTok; 2981 // We are in a function try block, what comes is an initializer list. 2982 nextToken(); 2983 if (FormatTok->is(tok::identifier)) { 2984 HasCtorInitializer = true; 2985 Colon->setFinalizedType(TT_CtorInitializerColon); 2986 } 2987 2988 // In case identifiers were removed by clang-tidy, what might follow is 2989 // multiple commas in sequence - before the first identifier. 2990 while (FormatTok->is(tok::comma)) 2991 nextToken(); 2992 2993 while (FormatTok->is(tok::identifier)) { 2994 nextToken(); 2995 if (FormatTok->is(tok::l_paren)) { 2996 parseParens(); 2997 } else if (FormatTok->is(tok::l_brace)) { 2998 nextToken(); 2999 parseBracedList(); 3000 } 3001 3002 // In case identifiers were removed by clang-tidy, what might follow is 3003 // multiple commas in sequence - after the first identifier. 3004 while (FormatTok->is(tok::comma)) 3005 nextToken(); 3006 } 3007 } 3008 // Parse try with resource. 3009 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 3010 parseParens(); 3011 3012 keepAncestorBraces(); 3013 3014 if (FormatTok->is(tok::l_brace)) { 3015 if (HasCtorInitializer) 3016 FormatTok->setFinalizedType(TT_FunctionLBrace); 3017 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3018 parseBlock(); 3019 if (Style.BraceWrapping.BeforeCatch) 3020 addUnwrappedLine(); 3021 else 3022 NeedsUnwrappedLine = true; 3023 } else if (FormatTok->isNot(tok::kw_catch)) { 3024 // The C++ standard requires a compound-statement after a try. 3025 // If there's none, we try to assume there's a structuralElement 3026 // and try to continue. 3027 addUnwrappedLine(); 3028 ++Line->Level; 3029 parseStructuralElement(); 3030 --Line->Level; 3031 } 3032 while (true) { 3033 if (FormatTok->is(tok::at)) 3034 nextToken(); 3035 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 3036 tok::kw___finally) || 3037 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3038 FormatTok->is(Keywords.kw_finally)) || 3039 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 3040 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 3041 break; 3042 } 3043 nextToken(); 3044 while (FormatTok->isNot(tok::l_brace)) { 3045 if (FormatTok->is(tok::l_paren)) { 3046 parseParens(); 3047 continue; 3048 } 3049 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 3050 if (Style.RemoveBracesLLVM) 3051 NestedTooDeep.pop_back(); 3052 return; 3053 } 3054 nextToken(); 3055 } 3056 NeedsUnwrappedLine = false; 3057 Line->MustBeDeclaration = false; 3058 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3059 parseBlock(); 3060 if (Style.BraceWrapping.BeforeCatch) 3061 addUnwrappedLine(); 3062 else 3063 NeedsUnwrappedLine = true; 3064 } 3065 3066 if (Style.RemoveBracesLLVM) 3067 NestedTooDeep.pop_back(); 3068 3069 if (NeedsUnwrappedLine) 3070 addUnwrappedLine(); 3071 } 3072 3073 void UnwrappedLineParser::parseNamespace() { 3074 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3075 "'namespace' expected"); 3076 3077 const FormatToken &InitialToken = *FormatTok; 3078 nextToken(); 3079 if (InitialToken.is(TT_NamespaceMacro)) { 3080 parseParens(); 3081 } else { 3082 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3083 tok::l_square, tok::period, tok::l_paren) || 3084 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3085 if (FormatTok->is(tok::l_square)) 3086 parseSquare(); 3087 else if (FormatTok->is(tok::l_paren)) 3088 parseParens(); 3089 else 3090 nextToken(); 3091 } 3092 } 3093 if (FormatTok->is(tok::l_brace)) { 3094 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3095 3096 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3097 addUnwrappedLine(); 3098 3099 unsigned AddLevels = 3100 Style.NamespaceIndentation == FormatStyle::NI_All || 3101 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3102 DeclarationScopeStack.size() > 1) 3103 ? 1u 3104 : 0u; 3105 bool ManageWhitesmithsBraces = 3106 AddLevels == 0u && 3107 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3108 3109 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3110 // the whole block. 3111 if (ManageWhitesmithsBraces) 3112 ++Line->Level; 3113 3114 // Munch the semicolon after a namespace. This is more common than one would 3115 // think. Putting the semicolon into its own line is very ugly. 3116 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3117 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3118 ManageWhitesmithsBraces); 3119 3120 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3121 3122 if (ManageWhitesmithsBraces) 3123 --Line->Level; 3124 } 3125 // FIXME: Add error handling. 3126 } 3127 3128 void UnwrappedLineParser::parseNew() { 3129 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3130 nextToken(); 3131 3132 if (Style.isCSharp()) { 3133 do { 3134 // Handle constructor invocation, e.g. `new(field: value)`. 3135 if (FormatTok->is(tok::l_paren)) 3136 parseParens(); 3137 3138 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3139 if (FormatTok->is(tok::l_brace)) 3140 parseBracedList(); 3141 3142 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3143 return; 3144 3145 nextToken(); 3146 } while (!eof()); 3147 } 3148 3149 if (Style.Language != FormatStyle::LK_Java) 3150 return; 3151 3152 // In Java, we can parse everything up to the parens, which aren't optional. 3153 do { 3154 // There should not be a ;, { or } before the new's open paren. 3155 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3156 return; 3157 3158 // Consume the parens. 3159 if (FormatTok->is(tok::l_paren)) { 3160 parseParens(); 3161 3162 // If there is a class body of an anonymous class, consume that as child. 3163 if (FormatTok->is(tok::l_brace)) 3164 parseChildBlock(); 3165 return; 3166 } 3167 nextToken(); 3168 } while (!eof()); 3169 } 3170 3171 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3172 keepAncestorBraces(); 3173 3174 if (isBlockBegin(*FormatTok)) { 3175 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3176 FormatToken *LeftBrace = FormatTok; 3177 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3178 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3179 /*MunchSemi=*/true, KeepBraces); 3180 setPreviousRBraceType(TT_ControlStatementRBrace); 3181 if (!KeepBraces) { 3182 assert(!NestedTooDeep.empty()); 3183 if (!NestedTooDeep.back()) 3184 markOptionalBraces(LeftBrace); 3185 } 3186 if (WrapRightBrace) 3187 addUnwrappedLine(); 3188 } else { 3189 parseUnbracedBody(); 3190 } 3191 3192 if (!KeepBraces) 3193 NestedTooDeep.pop_back(); 3194 } 3195 3196 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3197 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3198 (Style.isVerilog() && 3199 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3200 Keywords.kw_always_ff, Keywords.kw_always_latch, 3201 Keywords.kw_final, Keywords.kw_initial, 3202 Keywords.kw_foreach, Keywords.kw_forever, 3203 Keywords.kw_repeat))) && 3204 "'for', 'while' or foreach macro expected"); 3205 const bool KeepBraces = !Style.RemoveBracesLLVM || 3206 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3207 3208 nextToken(); 3209 // JS' for await ( ... 3210 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3211 nextToken(); 3212 if (IsCpp && FormatTok->is(tok::kw_co_await)) 3213 nextToken(); 3214 if (HasParens && FormatTok->is(tok::l_paren)) { 3215 // The type is only set for Verilog basically because we were afraid to 3216 // change the existing behavior for loops. See the discussion on D121756 for 3217 // details. 3218 if (Style.isVerilog()) 3219 FormatTok->setFinalizedType(TT_ConditionLParen); 3220 parseParens(); 3221 } 3222 3223 if (Style.isVerilog()) { 3224 // Event control. 3225 parseVerilogSensitivityList(); 3226 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3227 Tokens->getPreviousToken()->is(tok::r_paren)) { 3228 nextToken(); 3229 addUnwrappedLine(); 3230 return; 3231 } 3232 3233 handleAttributes(); 3234 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3235 } 3236 3237 void UnwrappedLineParser::parseDoWhile() { 3238 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3239 nextToken(); 3240 3241 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3242 3243 // FIXME: Add error handling. 3244 if (FormatTok->isNot(tok::kw_while)) { 3245 addUnwrappedLine(); 3246 return; 3247 } 3248 3249 FormatTok->setFinalizedType(TT_DoWhile); 3250 3251 // If in Whitesmiths mode, the line with the while() needs to be indented 3252 // to the same level as the block. 3253 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3254 ++Line->Level; 3255 3256 nextToken(); 3257 parseStructuralElement(); 3258 } 3259 3260 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3261 nextToken(); 3262 unsigned OldLineLevel = Line->Level; 3263 3264 if (LeftAlignLabel) 3265 Line->Level = 0; 3266 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3267 --Line->Level; 3268 3269 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3270 FormatTok->is(tok::l_brace)) { 3271 3272 CompoundStatementIndenter Indenter(this, Line->Level, 3273 Style.BraceWrapping.AfterCaseLabel, 3274 Style.BraceWrapping.IndentBraces); 3275 parseBlock(); 3276 if (FormatTok->is(tok::kw_break)) { 3277 if (Style.BraceWrapping.AfterControlStatement == 3278 FormatStyle::BWACS_Always) { 3279 addUnwrappedLine(); 3280 if (!Style.IndentCaseBlocks && 3281 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3282 ++Line->Level; 3283 } 3284 } 3285 parseStructuralElement(); 3286 } 3287 addUnwrappedLine(); 3288 } else { 3289 if (FormatTok->is(tok::semi)) 3290 nextToken(); 3291 addUnwrappedLine(); 3292 } 3293 Line->Level = OldLineLevel; 3294 if (FormatTok->isNot(tok::l_brace)) { 3295 parseStructuralElement(); 3296 addUnwrappedLine(); 3297 } 3298 } 3299 3300 void UnwrappedLineParser::parseCaseLabel() { 3301 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3302 auto *Case = FormatTok; 3303 3304 // FIXME: fix handling of complex expressions here. 3305 do { 3306 nextToken(); 3307 if (FormatTok->is(tok::colon)) { 3308 FormatTok->setFinalizedType(TT_CaseLabelColon); 3309 break; 3310 } 3311 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) { 3312 FormatTok->setFinalizedType(TT_CaseLabelArrow); 3313 Case->setFinalizedType(TT_SwitchExpressionLabel); 3314 break; 3315 } 3316 } while (!eof()); 3317 parseLabel(); 3318 } 3319 3320 void UnwrappedLineParser::parseSwitch(bool IsExpr) { 3321 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3322 nextToken(); 3323 if (FormatTok->is(tok::l_paren)) 3324 parseParens(); 3325 3326 keepAncestorBraces(); 3327 3328 if (FormatTok->is(tok::l_brace)) { 3329 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3330 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace 3331 : TT_ControlStatementLBrace); 3332 if (IsExpr) 3333 parseChildBlock(); 3334 else 3335 parseBlock(); 3336 setPreviousRBraceType(TT_ControlStatementRBrace); 3337 if (!IsExpr) 3338 addUnwrappedLine(); 3339 } else { 3340 addUnwrappedLine(); 3341 ++Line->Level; 3342 parseStructuralElement(); 3343 --Line->Level; 3344 } 3345 3346 if (Style.RemoveBracesLLVM) 3347 NestedTooDeep.pop_back(); 3348 } 3349 3350 // Operators that can follow a C variable. 3351 static bool isCOperatorFollowingVar(tok::TokenKind Kind) { 3352 switch (Kind) { 3353 case tok::ampamp: 3354 case tok::ampequal: 3355 case tok::arrow: 3356 case tok::caret: 3357 case tok::caretequal: 3358 case tok::comma: 3359 case tok::ellipsis: 3360 case tok::equal: 3361 case tok::equalequal: 3362 case tok::exclaim: 3363 case tok::exclaimequal: 3364 case tok::greater: 3365 case tok::greaterequal: 3366 case tok::greatergreater: 3367 case tok::greatergreaterequal: 3368 case tok::l_paren: 3369 case tok::l_square: 3370 case tok::less: 3371 case tok::lessequal: 3372 case tok::lessless: 3373 case tok::lesslessequal: 3374 case tok::minus: 3375 case tok::minusequal: 3376 case tok::minusminus: 3377 case tok::percent: 3378 case tok::percentequal: 3379 case tok::period: 3380 case tok::pipe: 3381 case tok::pipeequal: 3382 case tok::pipepipe: 3383 case tok::plus: 3384 case tok::plusequal: 3385 case tok::plusplus: 3386 case tok::question: 3387 case tok::r_brace: 3388 case tok::r_paren: 3389 case tok::r_square: 3390 case tok::semi: 3391 case tok::slash: 3392 case tok::slashequal: 3393 case tok::star: 3394 case tok::starequal: 3395 return true; 3396 default: 3397 return false; 3398 } 3399 } 3400 3401 void UnwrappedLineParser::parseAccessSpecifier() { 3402 FormatToken *AccessSpecifierCandidate = FormatTok; 3403 nextToken(); 3404 // Understand Qt's slots. 3405 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3406 nextToken(); 3407 // Otherwise, we don't know what it is, and we'd better keep the next token. 3408 if (FormatTok->is(tok::colon)) { 3409 nextToken(); 3410 addUnwrappedLine(); 3411 } else if (FormatTok->isNot(tok::coloncolon) && 3412 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3413 // Not a variable name nor namespace name. 3414 addUnwrappedLine(); 3415 } else if (AccessSpecifierCandidate) { 3416 // Consider the access specifier to be a C identifier. 3417 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3418 } 3419 } 3420 3421 /// \brief Parses a requires, decides if it is a clause or an expression. 3422 /// \pre The current token has to be the requires keyword. 3423 /// \returns true if it parsed a clause. 3424 bool UnwrappedLineParser::parseRequires() { 3425 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3426 auto RequiresToken = FormatTok; 3427 3428 // We try to guess if it is a requires clause, or a requires expression. For 3429 // that we first consume the keyword and check the next token. 3430 nextToken(); 3431 3432 switch (FormatTok->Tok.getKind()) { 3433 case tok::l_brace: 3434 // This can only be an expression, never a clause. 3435 parseRequiresExpression(RequiresToken); 3436 return false; 3437 case tok::l_paren: 3438 // Clauses and expression can start with a paren, it's unclear what we have. 3439 break; 3440 default: 3441 // All other tokens can only be a clause. 3442 parseRequiresClause(RequiresToken); 3443 return true; 3444 } 3445 3446 // Looking forward we would have to decide if there are function declaration 3447 // like arguments to the requires expression: 3448 // requires (T t) { 3449 // Or there is a constraint expression for the requires clause: 3450 // requires (C<T> && ... 3451 3452 // But first let's look behind. 3453 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3454 3455 if (!PreviousNonComment || 3456 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3457 // If there is no token, or an expression left brace, we are a requires 3458 // clause within a requires expression. 3459 parseRequiresClause(RequiresToken); 3460 return true; 3461 } 3462 3463 switch (PreviousNonComment->Tok.getKind()) { 3464 case tok::greater: 3465 case tok::r_paren: 3466 case tok::kw_noexcept: 3467 case tok::kw_const: 3468 // This is a requires clause. 3469 parseRequiresClause(RequiresToken); 3470 return true; 3471 case tok::amp: 3472 case tok::ampamp: { 3473 // This can be either: 3474 // if (... && requires (T t) ...) 3475 // Or 3476 // void member(...) && requires (C<T> ... 3477 // We check the one token before that for a const: 3478 // void member(...) const && requires (C<T> ... 3479 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3480 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3481 parseRequiresClause(RequiresToken); 3482 return true; 3483 } 3484 break; 3485 } 3486 default: 3487 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { 3488 // This is a requires clause. 3489 parseRequiresClause(RequiresToken); 3490 return true; 3491 } 3492 // It's an expression. 3493 parseRequiresExpression(RequiresToken); 3494 return false; 3495 } 3496 3497 // Now we look forward and try to check if the paren content is a parameter 3498 // list. The parameters can be cv-qualified and contain references or 3499 // pointers. 3500 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3501 // of stuff: typename, const, *, &, &&, ::, identifiers. 3502 3503 unsigned StoredPosition = Tokens->getPosition(); 3504 FormatToken *NextToken = Tokens->getNextToken(); 3505 int Lookahead = 0; 3506 auto PeekNext = [&Lookahead, &NextToken, this] { 3507 ++Lookahead; 3508 NextToken = Tokens->getNextToken(); 3509 }; 3510 3511 bool FoundType = false; 3512 bool LastWasColonColon = false; 3513 int OpenAngles = 0; 3514 3515 for (; Lookahead < 50; PeekNext()) { 3516 switch (NextToken->Tok.getKind()) { 3517 case tok::kw_volatile: 3518 case tok::kw_const: 3519 case tok::comma: 3520 if (OpenAngles == 0) { 3521 FormatTok = Tokens->setPosition(StoredPosition); 3522 parseRequiresExpression(RequiresToken); 3523 return false; 3524 } 3525 break; 3526 case tok::eof: 3527 // Break out of the loop. 3528 Lookahead = 50; 3529 break; 3530 case tok::coloncolon: 3531 LastWasColonColon = true; 3532 break; 3533 case tok::kw_decltype: 3534 case tok::identifier: 3535 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3536 FormatTok = Tokens->setPosition(StoredPosition); 3537 parseRequiresExpression(RequiresToken); 3538 return false; 3539 } 3540 FoundType = true; 3541 LastWasColonColon = false; 3542 break; 3543 case tok::less: 3544 ++OpenAngles; 3545 break; 3546 case tok::greater: 3547 --OpenAngles; 3548 break; 3549 default: 3550 if (NextToken->isTypeName(LangOpts)) { 3551 FormatTok = Tokens->setPosition(StoredPosition); 3552 parseRequiresExpression(RequiresToken); 3553 return false; 3554 } 3555 break; 3556 } 3557 } 3558 // This seems to be a complicated expression, just assume it's a clause. 3559 FormatTok = Tokens->setPosition(StoredPosition); 3560 parseRequiresClause(RequiresToken); 3561 return true; 3562 } 3563 3564 /// \brief Parses a requires clause. 3565 /// \param RequiresToken The requires keyword token, which starts this clause. 3566 /// \pre We need to be on the next token after the requires keyword. 3567 /// \sa parseRequiresExpression 3568 /// 3569 /// Returns if it either has finished parsing the clause, or it detects, that 3570 /// the clause is incorrect. 3571 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3572 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3573 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3574 3575 // If there is no previous token, we are within a requires expression, 3576 // otherwise we will always have the template or function declaration in front 3577 // of it. 3578 bool InRequiresExpression = 3579 !RequiresToken->Previous || 3580 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3581 3582 RequiresToken->setFinalizedType(InRequiresExpression 3583 ? TT_RequiresClauseInARequiresExpression 3584 : TT_RequiresClause); 3585 3586 // NOTE: parseConstraintExpression is only ever called from this function. 3587 // It could be inlined into here. 3588 parseConstraintExpression(); 3589 3590 if (!InRequiresExpression) 3591 FormatTok->Previous->ClosesRequiresClause = true; 3592 } 3593 3594 /// \brief Parses a requires expression. 3595 /// \param RequiresToken The requires keyword token, which starts this clause. 3596 /// \pre We need to be on the next token after the requires keyword. 3597 /// \sa parseRequiresClause 3598 /// 3599 /// Returns if it either has finished parsing the expression, or it detects, 3600 /// that the expression is incorrect. 3601 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3602 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3603 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3604 3605 RequiresToken->setFinalizedType(TT_RequiresExpression); 3606 3607 if (FormatTok->is(tok::l_paren)) { 3608 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3609 parseParens(); 3610 } 3611 3612 if (FormatTok->is(tok::l_brace)) { 3613 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3614 parseChildBlock(); 3615 } 3616 } 3617 3618 /// \brief Parses a constraint expression. 3619 /// 3620 /// This is the body of a requires clause. It returns, when the parsing is 3621 /// complete, or the expression is incorrect. 3622 void UnwrappedLineParser::parseConstraintExpression() { 3623 // The special handling for lambdas is needed since tryToParseLambda() eats a 3624 // token and if a requires expression is the last part of a requires clause 3625 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3626 // not set on the correct token. Thus we need to be aware if we even expect a 3627 // lambda to be possible. 3628 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3629 bool LambdaNextTimeAllowed = true; 3630 3631 // Within lambda declarations, it is permitted to put a requires clause after 3632 // its template parameter list, which would place the requires clause right 3633 // before the parentheses of the parameters of the lambda declaration. Thus, 3634 // we track if we expect to see grouping parentheses at all. 3635 // Without this check, `requires foo<T> (T t)` in the below example would be 3636 // seen as the whole requires clause, accidentally eating the parameters of 3637 // the lambda. 3638 // [&]<typename T> requires foo<T> (T t) { ... }; 3639 bool TopLevelParensAllowed = true; 3640 3641 do { 3642 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3643 3644 switch (FormatTok->Tok.getKind()) { 3645 case tok::kw_requires: { 3646 auto RequiresToken = FormatTok; 3647 nextToken(); 3648 parseRequiresExpression(RequiresToken); 3649 break; 3650 } 3651 3652 case tok::l_paren: 3653 if (!TopLevelParensAllowed) 3654 return; 3655 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3656 TopLevelParensAllowed = false; 3657 break; 3658 3659 case tok::l_square: 3660 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3661 return; 3662 break; 3663 3664 case tok::kw_const: 3665 case tok::semi: 3666 case tok::kw_class: 3667 case tok::kw_struct: 3668 case tok::kw_union: 3669 return; 3670 3671 case tok::l_brace: 3672 // Potential function body. 3673 return; 3674 3675 case tok::ampamp: 3676 case tok::pipepipe: 3677 FormatTok->setFinalizedType(TT_BinaryOperator); 3678 nextToken(); 3679 LambdaNextTimeAllowed = true; 3680 TopLevelParensAllowed = true; 3681 break; 3682 3683 case tok::comma: 3684 case tok::comment: 3685 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3686 nextToken(); 3687 break; 3688 3689 case tok::kw_sizeof: 3690 case tok::greater: 3691 case tok::greaterequal: 3692 case tok::greatergreater: 3693 case tok::less: 3694 case tok::lessequal: 3695 case tok::lessless: 3696 case tok::equalequal: 3697 case tok::exclaim: 3698 case tok::exclaimequal: 3699 case tok::plus: 3700 case tok::minus: 3701 case tok::star: 3702 case tok::slash: 3703 LambdaNextTimeAllowed = true; 3704 TopLevelParensAllowed = true; 3705 // Just eat them. 3706 nextToken(); 3707 break; 3708 3709 case tok::numeric_constant: 3710 case tok::coloncolon: 3711 case tok::kw_true: 3712 case tok::kw_false: 3713 TopLevelParensAllowed = false; 3714 // Just eat them. 3715 nextToken(); 3716 break; 3717 3718 case tok::kw_static_cast: 3719 case tok::kw_const_cast: 3720 case tok::kw_reinterpret_cast: 3721 case tok::kw_dynamic_cast: 3722 nextToken(); 3723 if (FormatTok->isNot(tok::less)) 3724 return; 3725 3726 nextToken(); 3727 parseBracedList(/*IsAngleBracket=*/true); 3728 break; 3729 3730 default: 3731 if (!FormatTok->Tok.getIdentifierInfo()) { 3732 // Identifiers are part of the default case, we check for more then 3733 // tok::identifier to handle builtin type traits. 3734 return; 3735 } 3736 3737 // We need to differentiate identifiers for a template deduction guide, 3738 // variables, or function return types (the constraint expression has 3739 // ended before that), and basically all other cases. But it's easier to 3740 // check the other way around. 3741 assert(FormatTok->Previous); 3742 switch (FormatTok->Previous->Tok.getKind()) { 3743 case tok::coloncolon: // Nested identifier. 3744 case tok::ampamp: // Start of a function or variable for the 3745 case tok::pipepipe: // constraint expression. (binary) 3746 case tok::exclaim: // The same as above, but unary. 3747 case tok::kw_requires: // Initial identifier of a requires clause. 3748 case tok::equal: // Initial identifier of a concept declaration. 3749 break; 3750 default: 3751 return; 3752 } 3753 3754 // Read identifier with optional template declaration. 3755 nextToken(); 3756 if (FormatTok->is(tok::less)) { 3757 nextToken(); 3758 parseBracedList(/*IsAngleBracket=*/true); 3759 } 3760 TopLevelParensAllowed = false; 3761 break; 3762 } 3763 } while (!eof()); 3764 } 3765 3766 bool UnwrappedLineParser::parseEnum() { 3767 const FormatToken &InitialToken = *FormatTok; 3768 3769 // Won't be 'enum' for NS_ENUMs. 3770 if (FormatTok->is(tok::kw_enum)) 3771 nextToken(); 3772 3773 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3774 // declarations. An "enum" keyword followed by a colon would be a syntax 3775 // error and thus assume it is just an identifier. 3776 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3777 return false; 3778 3779 // In protobuf, "enum" can be used as a field name. 3780 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3781 return false; 3782 3783 if (IsCpp) { 3784 // Eat up enum class ... 3785 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3786 nextToken(); 3787 while (FormatTok->is(tok::l_square)) 3788 if (!handleCppAttributes()) 3789 return false; 3790 } 3791 3792 while (FormatTok->Tok.getIdentifierInfo() || 3793 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3794 tok::greater, tok::comma, tok::question, 3795 tok::l_square)) { 3796 if (Style.isVerilog()) { 3797 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3798 nextToken(); 3799 // In Verilog the base type can have dimensions. 3800 while (FormatTok->is(tok::l_square)) 3801 parseSquare(); 3802 } else { 3803 nextToken(); 3804 } 3805 // We can have macros or attributes in between 'enum' and the enum name. 3806 if (FormatTok->is(tok::l_paren)) 3807 parseParens(); 3808 if (FormatTok->is(tok::identifier)) { 3809 nextToken(); 3810 // If there are two identifiers in a row, this is likely an elaborate 3811 // return type. In Java, this can be "implements", etc. 3812 if (IsCpp && FormatTok->is(tok::identifier)) 3813 return false; 3814 } 3815 } 3816 3817 // Just a declaration or something is wrong. 3818 if (FormatTok->isNot(tok::l_brace)) 3819 return true; 3820 FormatTok->setFinalizedType(TT_EnumLBrace); 3821 FormatTok->setBlockKind(BK_Block); 3822 3823 if (Style.Language == FormatStyle::LK_Java) { 3824 // Java enums are different. 3825 parseJavaEnumBody(); 3826 return true; 3827 } 3828 if (Style.Language == FormatStyle::LK_Proto) { 3829 parseBlock(/*MustBeDeclaration=*/true); 3830 return true; 3831 } 3832 3833 if (!Style.AllowShortEnumsOnASingleLine && 3834 ShouldBreakBeforeBrace(Style, InitialToken)) { 3835 addUnwrappedLine(); 3836 } 3837 // Parse enum body. 3838 nextToken(); 3839 if (!Style.AllowShortEnumsOnASingleLine) { 3840 addUnwrappedLine(); 3841 Line->Level += 1; 3842 } 3843 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3844 if (!Style.AllowShortEnumsOnASingleLine) 3845 Line->Level -= 1; 3846 if (HasError) { 3847 if (FormatTok->is(tok::semi)) 3848 nextToken(); 3849 addUnwrappedLine(); 3850 } 3851 setPreviousRBraceType(TT_EnumRBrace); 3852 return true; 3853 3854 // There is no addUnwrappedLine() here so that we fall through to parsing a 3855 // structural element afterwards. Thus, in "enum A {} n, m;", 3856 // "} n, m;" will end up in one unwrapped line. 3857 } 3858 3859 bool UnwrappedLineParser::parseStructLike() { 3860 // parseRecord falls through and does not yet add an unwrapped line as a 3861 // record declaration or definition can start a structural element. 3862 parseRecord(); 3863 // This does not apply to Java, JavaScript and C#. 3864 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3865 Style.isCSharp()) { 3866 if (FormatTok->is(tok::semi)) 3867 nextToken(); 3868 addUnwrappedLine(); 3869 return true; 3870 } 3871 return false; 3872 } 3873 3874 namespace { 3875 // A class used to set and restore the Token position when peeking 3876 // ahead in the token source. 3877 class ScopedTokenPosition { 3878 unsigned StoredPosition; 3879 FormatTokenSource *Tokens; 3880 3881 public: 3882 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3883 assert(Tokens && "Tokens expected to not be null"); 3884 StoredPosition = Tokens->getPosition(); 3885 } 3886 3887 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3888 }; 3889 } // namespace 3890 3891 // Look to see if we have [[ by looking ahead, if 3892 // its not then rewind to the original position. 3893 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3894 ScopedTokenPosition AutoPosition(Tokens); 3895 FormatToken *Tok = Tokens->getNextToken(); 3896 // We already read the first [ check for the second. 3897 if (Tok->isNot(tok::l_square)) 3898 return false; 3899 // Double check that the attribute is just something 3900 // fairly simple. 3901 while (Tok->isNot(tok::eof)) { 3902 if (Tok->is(tok::r_square)) 3903 break; 3904 Tok = Tokens->getNextToken(); 3905 } 3906 if (Tok->is(tok::eof)) 3907 return false; 3908 Tok = Tokens->getNextToken(); 3909 if (Tok->isNot(tok::r_square)) 3910 return false; 3911 Tok = Tokens->getNextToken(); 3912 if (Tok->is(tok::semi)) 3913 return false; 3914 return true; 3915 } 3916 3917 void UnwrappedLineParser::parseJavaEnumBody() { 3918 assert(FormatTok->is(tok::l_brace)); 3919 const FormatToken *OpeningBrace = FormatTok; 3920 3921 // Determine whether the enum is simple, i.e. does not have a semicolon or 3922 // constants with class bodies. Simple enums can be formatted like braced 3923 // lists, contracted to a single line, etc. 3924 unsigned StoredPosition = Tokens->getPosition(); 3925 bool IsSimple = true; 3926 FormatToken *Tok = Tokens->getNextToken(); 3927 while (Tok->isNot(tok::eof)) { 3928 if (Tok->is(tok::r_brace)) 3929 break; 3930 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3931 IsSimple = false; 3932 break; 3933 } 3934 // FIXME: This will also mark enums with braces in the arguments to enum 3935 // constants as "not simple". This is probably fine in practice, though. 3936 Tok = Tokens->getNextToken(); 3937 } 3938 FormatTok = Tokens->setPosition(StoredPosition); 3939 3940 if (IsSimple) { 3941 nextToken(); 3942 parseBracedList(); 3943 addUnwrappedLine(); 3944 return; 3945 } 3946 3947 // Parse the body of a more complex enum. 3948 // First add a line for everything up to the "{". 3949 nextToken(); 3950 addUnwrappedLine(); 3951 ++Line->Level; 3952 3953 // Parse the enum constants. 3954 while (!eof()) { 3955 if (FormatTok->is(tok::l_brace)) { 3956 // Parse the constant's class body. 3957 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3958 /*MunchSemi=*/false); 3959 } else if (FormatTok->is(tok::l_paren)) { 3960 parseParens(); 3961 } else if (FormatTok->is(tok::comma)) { 3962 nextToken(); 3963 addUnwrappedLine(); 3964 } else if (FormatTok->is(tok::semi)) { 3965 nextToken(); 3966 addUnwrappedLine(); 3967 break; 3968 } else if (FormatTok->is(tok::r_brace)) { 3969 addUnwrappedLine(); 3970 break; 3971 } else { 3972 nextToken(); 3973 } 3974 } 3975 3976 // Parse the class body after the enum's ";" if any. 3977 parseLevel(OpeningBrace); 3978 nextToken(); 3979 --Line->Level; 3980 addUnwrappedLine(); 3981 } 3982 3983 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3984 const FormatToken &InitialToken = *FormatTok; 3985 nextToken(); 3986 3987 const FormatToken *ClassName = nullptr; 3988 bool IsDerived = false; 3989 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 3990 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 3991 }; 3992 // JavaScript/TypeScript supports anonymous classes like: 3993 // a = class extends foo { } 3994 bool JSPastExtendsOrImplements = false; 3995 // The actual identifier can be a nested name specifier, and in macros 3996 // it is often token-pasted. 3997 // An [[attribute]] can be before the identifier. 3998 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3999 tok::kw_alignas, tok::l_square) || 4000 FormatTok->isAttribute() || 4001 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 4002 FormatTok->isOneOf(tok::period, tok::comma))) { 4003 if (Style.isJavaScript() && 4004 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 4005 JSPastExtendsOrImplements = true; 4006 // JavaScript/TypeScript supports inline object types in 4007 // extends/implements positions: 4008 // class Foo implements {bar: number} { } 4009 nextToken(); 4010 if (FormatTok->is(tok::l_brace)) { 4011 tryToParseBracedList(); 4012 continue; 4013 } 4014 } 4015 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 4016 continue; 4017 const auto *Previous = FormatTok; 4018 nextToken(); 4019 switch (FormatTok->Tok.getKind()) { 4020 case tok::l_paren: 4021 // We can have macros in between 'class' and the class name. 4022 if (!IsNonMacroIdentifier(Previous) || 4023 // e.g. `struct macro(a) S { int i; };` 4024 Previous->Previous == &InitialToken) { 4025 parseParens(); 4026 } 4027 break; 4028 case tok::coloncolon: 4029 case tok::hashhash: 4030 break; 4031 default: 4032 if (!JSPastExtendsOrImplements && !ClassName && 4033 Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) { 4034 ClassName = Previous; 4035 } 4036 } 4037 } 4038 4039 auto IsListInitialization = [&] { 4040 if (!ClassName || IsDerived) 4041 return false; 4042 assert(FormatTok->is(tok::l_brace)); 4043 const auto *Prev = FormatTok->getPreviousNonComment(); 4044 assert(Prev); 4045 return Prev != ClassName && Prev->is(tok::identifier) && 4046 Prev->isNot(Keywords.kw_final) && tryToParseBracedList(); 4047 }; 4048 4049 if (FormatTok->isOneOf(tok::colon, tok::less)) { 4050 int AngleNestingLevel = 0; 4051 do { 4052 if (FormatTok->is(tok::less)) 4053 ++AngleNestingLevel; 4054 else if (FormatTok->is(tok::greater)) 4055 --AngleNestingLevel; 4056 4057 if (AngleNestingLevel == 0) { 4058 if (FormatTok->is(tok::colon)) { 4059 IsDerived = true; 4060 } else if (FormatTok->is(tok::identifier) && 4061 FormatTok->Previous->is(tok::coloncolon)) { 4062 ClassName = FormatTok; 4063 } else if (FormatTok->is(tok::l_paren) && 4064 IsNonMacroIdentifier(FormatTok->Previous)) { 4065 break; 4066 } 4067 } 4068 if (FormatTok->is(tok::l_brace)) { 4069 if (AngleNestingLevel == 0 && IsListInitialization()) 4070 return; 4071 calculateBraceTypes(/*ExpectClassBody=*/true); 4072 if (!tryToParseBracedList()) 4073 break; 4074 } 4075 if (FormatTok->is(tok::l_square)) { 4076 FormatToken *Previous = FormatTok->Previous; 4077 if (!Previous || (Previous->isNot(tok::r_paren) && 4078 !Previous->isTypeOrIdentifier(LangOpts))) { 4079 // Don't try parsing a lambda if we had a closing parenthesis before, 4080 // it was probably a pointer to an array: int (*)[]. 4081 if (!tryToParseLambda()) 4082 continue; 4083 } else { 4084 parseSquare(); 4085 continue; 4086 } 4087 } 4088 if (FormatTok->is(tok::semi)) 4089 return; 4090 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 4091 addUnwrappedLine(); 4092 nextToken(); 4093 parseCSharpGenericTypeConstraint(); 4094 break; 4095 } 4096 nextToken(); 4097 } while (!eof()); 4098 } 4099 4100 auto GetBraceTypes = 4101 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 4102 switch (RecordTok.Tok.getKind()) { 4103 case tok::kw_class: 4104 return {TT_ClassLBrace, TT_ClassRBrace}; 4105 case tok::kw_struct: 4106 return {TT_StructLBrace, TT_StructRBrace}; 4107 case tok::kw_union: 4108 return {TT_UnionLBrace, TT_UnionRBrace}; 4109 default: 4110 // Useful for e.g. interface. 4111 return {TT_RecordLBrace, TT_RecordRBrace}; 4112 } 4113 }; 4114 if (FormatTok->is(tok::l_brace)) { 4115 if (IsListInitialization()) 4116 return; 4117 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4118 FormatTok->setFinalizedType(OpenBraceType); 4119 if (ParseAsExpr) { 4120 parseChildBlock(); 4121 } else { 4122 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4123 addUnwrappedLine(); 4124 4125 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4126 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4127 } 4128 setPreviousRBraceType(ClosingBraceType); 4129 } 4130 // There is no addUnwrappedLine() here so that we fall through to parsing a 4131 // structural element afterwards. Thus, in "class A {} n, m;", 4132 // "} n, m;" will end up in one unwrapped line. 4133 } 4134 4135 void UnwrappedLineParser::parseObjCMethod() { 4136 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4137 "'(' or identifier expected."); 4138 do { 4139 if (FormatTok->is(tok::semi)) { 4140 nextToken(); 4141 addUnwrappedLine(); 4142 return; 4143 } else if (FormatTok->is(tok::l_brace)) { 4144 if (Style.BraceWrapping.AfterFunction) 4145 addUnwrappedLine(); 4146 parseBlock(); 4147 addUnwrappedLine(); 4148 return; 4149 } else { 4150 nextToken(); 4151 } 4152 } while (!eof()); 4153 } 4154 4155 void UnwrappedLineParser::parseObjCProtocolList() { 4156 assert(FormatTok->is(tok::less) && "'<' expected."); 4157 do { 4158 nextToken(); 4159 // Early exit in case someone forgot a close angle. 4160 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4161 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4162 return; 4163 } 4164 } while (!eof() && FormatTok->isNot(tok::greater)); 4165 nextToken(); // Skip '>'. 4166 } 4167 4168 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4169 do { 4170 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4171 nextToken(); 4172 addUnwrappedLine(); 4173 break; 4174 } 4175 if (FormatTok->is(tok::l_brace)) { 4176 parseBlock(); 4177 // In ObjC interfaces, nothing should be following the "}". 4178 addUnwrappedLine(); 4179 } else if (FormatTok->is(tok::r_brace)) { 4180 // Ignore stray "}". parseStructuralElement doesn't consume them. 4181 nextToken(); 4182 addUnwrappedLine(); 4183 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4184 nextToken(); 4185 parseObjCMethod(); 4186 } else { 4187 parseStructuralElement(); 4188 } 4189 } while (!eof()); 4190 } 4191 4192 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4193 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4194 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4195 nextToken(); 4196 nextToken(); // interface name 4197 4198 // @interface can be followed by a lightweight generic 4199 // specialization list, then either a base class or a category. 4200 if (FormatTok->is(tok::less)) 4201 parseObjCLightweightGenerics(); 4202 if (FormatTok->is(tok::colon)) { 4203 nextToken(); 4204 nextToken(); // base class name 4205 // The base class can also have lightweight generics applied to it. 4206 if (FormatTok->is(tok::less)) 4207 parseObjCLightweightGenerics(); 4208 } else if (FormatTok->is(tok::l_paren)) { 4209 // Skip category, if present. 4210 parseParens(); 4211 } 4212 4213 if (FormatTok->is(tok::less)) 4214 parseObjCProtocolList(); 4215 4216 if (FormatTok->is(tok::l_brace)) { 4217 if (Style.BraceWrapping.AfterObjCDeclaration) 4218 addUnwrappedLine(); 4219 parseBlock(/*MustBeDeclaration=*/true); 4220 } 4221 4222 // With instance variables, this puts '}' on its own line. Without instance 4223 // variables, this ends the @interface line. 4224 addUnwrappedLine(); 4225 4226 parseObjCUntilAtEnd(); 4227 } 4228 4229 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4230 assert(FormatTok->is(tok::less)); 4231 // Unlike protocol lists, generic parameterizations support 4232 // nested angles: 4233 // 4234 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4235 // NSObject <NSCopying, NSSecureCoding> 4236 // 4237 // so we need to count how many open angles we have left. 4238 unsigned NumOpenAngles = 1; 4239 do { 4240 nextToken(); 4241 // Early exit in case someone forgot a close angle. 4242 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4243 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4244 break; 4245 } 4246 if (FormatTok->is(tok::less)) { 4247 ++NumOpenAngles; 4248 } else if (FormatTok->is(tok::greater)) { 4249 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4250 --NumOpenAngles; 4251 } 4252 } while (!eof() && NumOpenAngles != 0); 4253 nextToken(); // Skip '>'. 4254 } 4255 4256 // Returns true for the declaration/definition form of @protocol, 4257 // false for the expression form. 4258 bool UnwrappedLineParser::parseObjCProtocol() { 4259 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4260 nextToken(); 4261 4262 if (FormatTok->is(tok::l_paren)) { 4263 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4264 return false; 4265 } 4266 4267 // The definition/declaration form, 4268 // @protocol Foo 4269 // - (int)someMethod; 4270 // @end 4271 4272 nextToken(); // protocol name 4273 4274 if (FormatTok->is(tok::less)) 4275 parseObjCProtocolList(); 4276 4277 // Check for protocol declaration. 4278 if (FormatTok->is(tok::semi)) { 4279 nextToken(); 4280 addUnwrappedLine(); 4281 return true; 4282 } 4283 4284 addUnwrappedLine(); 4285 parseObjCUntilAtEnd(); 4286 return true; 4287 } 4288 4289 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4290 bool IsImport = FormatTok->is(Keywords.kw_import); 4291 assert(IsImport || FormatTok->is(tok::kw_export)); 4292 nextToken(); 4293 4294 // Consume the "default" in "export default class/function". 4295 if (FormatTok->is(tok::kw_default)) 4296 nextToken(); 4297 4298 // Consume "async function", "function" and "default function", so that these 4299 // get parsed as free-standing JS functions, i.e. do not require a trailing 4300 // semicolon. 4301 if (FormatTok->is(Keywords.kw_async)) 4302 nextToken(); 4303 if (FormatTok->is(Keywords.kw_function)) { 4304 nextToken(); 4305 return; 4306 } 4307 4308 // For imports, `export *`, `export {...}`, consume the rest of the line up 4309 // to the terminating `;`. For everything else, just return and continue 4310 // parsing the structural element, i.e. the declaration or expression for 4311 // `export default`. 4312 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4313 !FormatTok->isStringLiteral() && 4314 !(FormatTok->is(Keywords.kw_type) && 4315 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4316 return; 4317 } 4318 4319 while (!eof()) { 4320 if (FormatTok->is(tok::semi)) 4321 return; 4322 if (Line->Tokens.empty()) { 4323 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4324 // import statement should terminate. 4325 return; 4326 } 4327 if (FormatTok->is(tok::l_brace)) { 4328 FormatTok->setBlockKind(BK_Block); 4329 nextToken(); 4330 parseBracedList(); 4331 } else { 4332 nextToken(); 4333 } 4334 } 4335 } 4336 4337 void UnwrappedLineParser::parseStatementMacro() { 4338 nextToken(); 4339 if (FormatTok->is(tok::l_paren)) 4340 parseParens(); 4341 if (FormatTok->is(tok::semi)) 4342 nextToken(); 4343 addUnwrappedLine(); 4344 } 4345 4346 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4347 // consume things like a::`b.c[d:e] or a::* 4348 while (true) { 4349 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4350 tok::coloncolon, tok::hash) || 4351 Keywords.isVerilogIdentifier(*FormatTok)) { 4352 nextToken(); 4353 } else if (FormatTok->is(tok::l_square)) { 4354 parseSquare(); 4355 } else { 4356 break; 4357 } 4358 } 4359 } 4360 4361 void UnwrappedLineParser::parseVerilogSensitivityList() { 4362 if (FormatTok->isNot(tok::at)) 4363 return; 4364 nextToken(); 4365 // A block event expression has 2 at signs. 4366 if (FormatTok->is(tok::at)) 4367 nextToken(); 4368 switch (FormatTok->Tok.getKind()) { 4369 case tok::star: 4370 nextToken(); 4371 break; 4372 case tok::l_paren: 4373 parseParens(); 4374 break; 4375 default: 4376 parseVerilogHierarchyIdentifier(); 4377 break; 4378 } 4379 } 4380 4381 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4382 unsigned AddLevels = 0; 4383 4384 if (FormatTok->is(Keywords.kw_clocking)) { 4385 nextToken(); 4386 if (Keywords.isVerilogIdentifier(*FormatTok)) 4387 nextToken(); 4388 parseVerilogSensitivityList(); 4389 if (FormatTok->is(tok::semi)) 4390 nextToken(); 4391 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4392 Keywords.kw_casez, Keywords.kw_randcase, 4393 Keywords.kw_randsequence)) { 4394 if (Style.IndentCaseLabels) 4395 AddLevels++; 4396 nextToken(); 4397 if (FormatTok->is(tok::l_paren)) { 4398 FormatTok->setFinalizedType(TT_ConditionLParen); 4399 parseParens(); 4400 } 4401 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4402 nextToken(); 4403 // The case header has no semicolon. 4404 } else { 4405 // "module" etc. 4406 nextToken(); 4407 // all the words like the name of the module and specifiers like 4408 // "automatic" and the width of function return type 4409 while (true) { 4410 if (FormatTok->is(tok::l_square)) { 4411 auto Prev = FormatTok->getPreviousNonComment(); 4412 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4413 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4414 parseSquare(); 4415 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4416 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4417 nextToken(); 4418 } else { 4419 break; 4420 } 4421 } 4422 4423 auto NewLine = [this]() { 4424 addUnwrappedLine(); 4425 Line->IsContinuation = true; 4426 }; 4427 4428 // package imports 4429 while (FormatTok->is(Keywords.kw_import)) { 4430 NewLine(); 4431 nextToken(); 4432 parseVerilogHierarchyIdentifier(); 4433 if (FormatTok->is(tok::semi)) 4434 nextToken(); 4435 } 4436 4437 // parameters and ports 4438 if (FormatTok->is(Keywords.kw_verilogHash)) { 4439 NewLine(); 4440 nextToken(); 4441 if (FormatTok->is(tok::l_paren)) { 4442 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4443 parseParens(); 4444 } 4445 } 4446 if (FormatTok->is(tok::l_paren)) { 4447 NewLine(); 4448 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4449 parseParens(); 4450 } 4451 4452 // extends and implements 4453 if (FormatTok->is(Keywords.kw_extends)) { 4454 NewLine(); 4455 nextToken(); 4456 parseVerilogHierarchyIdentifier(); 4457 if (FormatTok->is(tok::l_paren)) 4458 parseParens(); 4459 } 4460 if (FormatTok->is(Keywords.kw_implements)) { 4461 NewLine(); 4462 do { 4463 nextToken(); 4464 parseVerilogHierarchyIdentifier(); 4465 } while (FormatTok->is(tok::comma)); 4466 } 4467 4468 // Coverage event for cover groups. 4469 if (FormatTok->is(tok::at)) { 4470 NewLine(); 4471 parseVerilogSensitivityList(); 4472 } 4473 4474 if (FormatTok->is(tok::semi)) 4475 nextToken(/*LevelDifference=*/1); 4476 addUnwrappedLine(); 4477 } 4478 4479 return AddLevels; 4480 } 4481 4482 void UnwrappedLineParser::parseVerilogTable() { 4483 assert(FormatTok->is(Keywords.kw_table)); 4484 nextToken(/*LevelDifference=*/1); 4485 addUnwrappedLine(); 4486 4487 auto InitialLevel = Line->Level++; 4488 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4489 FormatToken *Tok = FormatTok; 4490 nextToken(); 4491 if (Tok->is(tok::semi)) 4492 addUnwrappedLine(); 4493 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4494 Tok->setFinalizedType(TT_VerilogTableItem); 4495 } 4496 Line->Level = InitialLevel; 4497 nextToken(/*LevelDifference=*/-1); 4498 addUnwrappedLine(); 4499 } 4500 4501 void UnwrappedLineParser::parseVerilogCaseLabel() { 4502 // The label will get unindented in AnnotatingParser. If there are no leading 4503 // spaces, indent the rest here so that things inside the block will be 4504 // indented relative to things outside. We don't use parseLabel because we 4505 // don't know whether this colon is a label or a ternary expression at this 4506 // point. 4507 auto OrigLevel = Line->Level; 4508 auto FirstLine = CurrentLines->size(); 4509 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4510 ++Line->Level; 4511 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4512 --Line->Level; 4513 parseStructuralElement(); 4514 // Restore the indentation in both the new line and the line that has the 4515 // label. 4516 if (CurrentLines->size() > FirstLine) 4517 (*CurrentLines)[FirstLine].Level = OrigLevel; 4518 Line->Level = OrigLevel; 4519 } 4520 4521 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4522 for (const auto &N : Line.Tokens) { 4523 if (N.Tok->MacroCtx) 4524 return true; 4525 for (const UnwrappedLine &Child : N.Children) 4526 if (containsExpansion(Child)) 4527 return true; 4528 } 4529 return false; 4530 } 4531 4532 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4533 if (Line->Tokens.empty()) 4534 return; 4535 LLVM_DEBUG({ 4536 if (!parsingPPDirective()) { 4537 llvm::dbgs() << "Adding unwrapped line:\n"; 4538 printDebugInfo(*Line); 4539 } 4540 }); 4541 4542 // If this line closes a block when in Whitesmiths mode, remember that 4543 // information so that the level can be decreased after the line is added. 4544 // This has to happen after the addition of the line since the line itself 4545 // needs to be indented. 4546 bool ClosesWhitesmithsBlock = 4547 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4548 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4549 4550 // If the current line was expanded from a macro call, we use it to 4551 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4552 // line and the unexpanded token stream. 4553 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4554 if (!Reconstruct) 4555 Reconstruct.emplace(Line->Level, Unexpanded); 4556 Reconstruct->addLine(*Line); 4557 4558 // While the reconstructed unexpanded lines are stored in the normal 4559 // flow of lines, the expanded lines are stored on the side to be analyzed 4560 // in an extra step. 4561 CurrentExpandedLines.push_back(std::move(*Line)); 4562 4563 if (Reconstruct->finished()) { 4564 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4565 assert(!Reconstructed.Tokens.empty() && 4566 "Reconstructed must at least contain the macro identifier."); 4567 assert(!parsingPPDirective()); 4568 LLVM_DEBUG({ 4569 llvm::dbgs() << "Adding unexpanded line:\n"; 4570 printDebugInfo(Reconstructed); 4571 }); 4572 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4573 Lines.push_back(std::move(Reconstructed)); 4574 CurrentExpandedLines.clear(); 4575 Reconstruct.reset(); 4576 } 4577 } else { 4578 // At the top level we only get here when no unexpansion is going on, or 4579 // when conditional formatting led to unfinished macro reconstructions. 4580 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4581 CurrentLines->push_back(std::move(*Line)); 4582 } 4583 Line->Tokens.clear(); 4584 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4585 Line->FirstStartColumn = 0; 4586 Line->IsContinuation = false; 4587 Line->SeenDecltypeAuto = false; 4588 4589 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4590 --Line->Level; 4591 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4592 CurrentLines->append( 4593 std::make_move_iterator(PreprocessorDirectives.begin()), 4594 std::make_move_iterator(PreprocessorDirectives.end())); 4595 PreprocessorDirectives.clear(); 4596 } 4597 // Disconnect the current token from the last token on the previous line. 4598 FormatTok->Previous = nullptr; 4599 } 4600 4601 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4602 4603 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4604 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4605 FormatTok.NewlinesBefore > 0; 4606 } 4607 4608 // Checks if \p FormatTok is a line comment that continues the line comment 4609 // section on \p Line. 4610 static bool 4611 continuesLineCommentSection(const FormatToken &FormatTok, 4612 const UnwrappedLine &Line, 4613 const llvm::Regex &CommentPragmasRegex) { 4614 if (Line.Tokens.empty()) 4615 return false; 4616 4617 StringRef IndentContent = FormatTok.TokenText; 4618 if (FormatTok.TokenText.starts_with("//") || 4619 FormatTok.TokenText.starts_with("/*")) { 4620 IndentContent = FormatTok.TokenText.substr(2); 4621 } 4622 if (CommentPragmasRegex.match(IndentContent)) 4623 return false; 4624 4625 // If Line starts with a line comment, then FormatTok continues the comment 4626 // section if its original column is greater or equal to the original start 4627 // column of the line. 4628 // 4629 // Define the min column token of a line as follows: if a line ends in '{' or 4630 // contains a '{' followed by a line comment, then the min column token is 4631 // that '{'. Otherwise, the min column token of the line is the first token of 4632 // the line. 4633 // 4634 // If Line starts with a token other than a line comment, then FormatTok 4635 // continues the comment section if its original column is greater than the 4636 // original start column of the min column token of the line. 4637 // 4638 // For example, the second line comment continues the first in these cases: 4639 // 4640 // // first line 4641 // // second line 4642 // 4643 // and: 4644 // 4645 // // first line 4646 // // second line 4647 // 4648 // and: 4649 // 4650 // int i; // first line 4651 // // second line 4652 // 4653 // and: 4654 // 4655 // do { // first line 4656 // // second line 4657 // int i; 4658 // } while (true); 4659 // 4660 // and: 4661 // 4662 // enum { 4663 // a, // first line 4664 // // second line 4665 // b 4666 // }; 4667 // 4668 // The second line comment doesn't continue the first in these cases: 4669 // 4670 // // first line 4671 // // second line 4672 // 4673 // and: 4674 // 4675 // int i; // first line 4676 // // second line 4677 // 4678 // and: 4679 // 4680 // do { // first line 4681 // // second line 4682 // int i; 4683 // } while (true); 4684 // 4685 // and: 4686 // 4687 // enum { 4688 // a, // first line 4689 // // second line 4690 // }; 4691 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4692 4693 // Scan for '{//'. If found, use the column of '{' as a min column for line 4694 // comment section continuation. 4695 const FormatToken *PreviousToken = nullptr; 4696 for (const UnwrappedLineNode &Node : Line.Tokens) { 4697 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4698 isLineComment(*Node.Tok)) { 4699 MinColumnToken = PreviousToken; 4700 break; 4701 } 4702 PreviousToken = Node.Tok; 4703 4704 // Grab the last newline preceding a token in this unwrapped line. 4705 if (Node.Tok->NewlinesBefore > 0) 4706 MinColumnToken = Node.Tok; 4707 } 4708 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4709 MinColumnToken = PreviousToken; 4710 4711 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4712 MinColumnToken); 4713 } 4714 4715 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4716 bool JustComments = Line->Tokens.empty(); 4717 for (FormatToken *Tok : CommentsBeforeNextToken) { 4718 // Line comments that belong to the same line comment section are put on the 4719 // same line since later we might want to reflow content between them. 4720 // Additional fine-grained breaking of line comment sections is controlled 4721 // by the class BreakableLineCommentSection in case it is desirable to keep 4722 // several line comment sections in the same unwrapped line. 4723 // 4724 // FIXME: Consider putting separate line comment sections as children to the 4725 // unwrapped line instead. 4726 Tok->ContinuesLineCommentSection = 4727 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4728 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4729 addUnwrappedLine(); 4730 pushToken(Tok); 4731 } 4732 if (NewlineBeforeNext && JustComments) 4733 addUnwrappedLine(); 4734 CommentsBeforeNextToken.clear(); 4735 } 4736 4737 void UnwrappedLineParser::nextToken(int LevelDifference) { 4738 if (eof()) 4739 return; 4740 flushComments(isOnNewLine(*FormatTok)); 4741 pushToken(FormatTok); 4742 FormatToken *Previous = FormatTok; 4743 if (!Style.isJavaScript()) 4744 readToken(LevelDifference); 4745 else 4746 readTokenWithJavaScriptASI(); 4747 FormatTok->Previous = Previous; 4748 if (Style.isVerilog()) { 4749 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4750 // keywords like `begin`, we can't treat them the same as left braces 4751 // because some contexts require one of them. For example structs use 4752 // braces and if blocks use keywords, and a left brace can occur in an if 4753 // statement, but it is not a block. For keywords like `end`, we simply 4754 // treat them the same as right braces. 4755 if (Keywords.isVerilogEnd(*FormatTok)) 4756 FormatTok->Tok.setKind(tok::r_brace); 4757 } 4758 } 4759 4760 void UnwrappedLineParser::distributeComments( 4761 const SmallVectorImpl<FormatToken *> &Comments, 4762 const FormatToken *NextTok) { 4763 // Whether or not a line comment token continues a line is controlled by 4764 // the method continuesLineCommentSection, with the following caveat: 4765 // 4766 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4767 // that each comment line from the trail is aligned with the next token, if 4768 // the next token exists. If a trail exists, the beginning of the maximal 4769 // trail is marked as a start of a new comment section. 4770 // 4771 // For example in this code: 4772 // 4773 // int a; // line about a 4774 // // line 1 about b 4775 // // line 2 about b 4776 // int b; 4777 // 4778 // the two lines about b form a maximal trail, so there are two sections, the 4779 // first one consisting of the single comment "// line about a" and the 4780 // second one consisting of the next two comments. 4781 if (Comments.empty()) 4782 return; 4783 bool ShouldPushCommentsInCurrentLine = true; 4784 bool HasTrailAlignedWithNextToken = false; 4785 unsigned StartOfTrailAlignedWithNextToken = 0; 4786 if (NextTok) { 4787 // We are skipping the first element intentionally. 4788 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4789 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4790 HasTrailAlignedWithNextToken = true; 4791 StartOfTrailAlignedWithNextToken = i; 4792 } 4793 } 4794 } 4795 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4796 FormatToken *FormatTok = Comments[i]; 4797 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4798 FormatTok->ContinuesLineCommentSection = false; 4799 } else { 4800 FormatTok->ContinuesLineCommentSection = 4801 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4802 } 4803 if (!FormatTok->ContinuesLineCommentSection && 4804 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4805 ShouldPushCommentsInCurrentLine = false; 4806 } 4807 if (ShouldPushCommentsInCurrentLine) 4808 pushToken(FormatTok); 4809 else 4810 CommentsBeforeNextToken.push_back(FormatTok); 4811 } 4812 } 4813 4814 void UnwrappedLineParser::readToken(int LevelDifference) { 4815 SmallVector<FormatToken *, 1> Comments; 4816 bool PreviousWasComment = false; 4817 bool FirstNonCommentOnLine = false; 4818 do { 4819 FormatTok = Tokens->getNextToken(); 4820 assert(FormatTok); 4821 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, 4822 TT_ConflictAlternative)) { 4823 if (FormatTok->is(TT_ConflictStart)) 4824 conditionalCompilationStart(/*Unreachable=*/false); 4825 else if (FormatTok->is(TT_ConflictAlternative)) 4826 conditionalCompilationAlternative(); 4827 else if (FormatTok->is(TT_ConflictEnd)) 4828 conditionalCompilationEnd(); 4829 FormatTok = Tokens->getNextToken(); 4830 FormatTok->MustBreakBefore = true; 4831 FormatTok->MustBreakBeforeFinalized = true; 4832 } 4833 4834 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4835 const FormatToken &Tok, 4836 bool PreviousWasComment) { 4837 auto IsFirstOnLine = [](const FormatToken &Tok) { 4838 return Tok.HasUnescapedNewline || Tok.IsFirst; 4839 }; 4840 4841 // Consider preprocessor directives preceded by block comments as first 4842 // on line. 4843 if (PreviousWasComment) 4844 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4845 return IsFirstOnLine(Tok); 4846 }; 4847 4848 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4849 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4850 PreviousWasComment = FormatTok->is(tok::comment); 4851 4852 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4853 (!Style.isVerilog() || 4854 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4855 FirstNonCommentOnLine) { 4856 distributeComments(Comments, FormatTok); 4857 Comments.clear(); 4858 // If there is an unfinished unwrapped line, we flush the preprocessor 4859 // directives only after that unwrapped line was finished later. 4860 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4861 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4862 assert((LevelDifference >= 0 || 4863 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4864 "LevelDifference makes Line->Level negative"); 4865 Line->Level += LevelDifference; 4866 // Comments stored before the preprocessor directive need to be output 4867 // before the preprocessor directive, at the same level as the 4868 // preprocessor directive, as we consider them to apply to the directive. 4869 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4870 PPBranchLevel > 0) { 4871 Line->Level += PPBranchLevel; 4872 } 4873 assert(Line->Level >= Line->UnbracedBodyLevel); 4874 Line->Level -= Line->UnbracedBodyLevel; 4875 flushComments(isOnNewLine(*FormatTok)); 4876 parsePPDirective(); 4877 PreviousWasComment = FormatTok->is(tok::comment); 4878 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4879 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4880 } 4881 4882 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4883 !Line->InPPDirective) { 4884 continue; 4885 } 4886 4887 if (FormatTok->is(tok::identifier) && 4888 Macros.defined(FormatTok->TokenText) && 4889 // FIXME: Allow expanding macros in preprocessor directives. 4890 !Line->InPPDirective) { 4891 FormatToken *ID = FormatTok; 4892 unsigned Position = Tokens->getPosition(); 4893 4894 // To correctly parse the code, we need to replace the tokens of the macro 4895 // call with its expansion. 4896 auto PreCall = std::move(Line); 4897 Line.reset(new UnwrappedLine); 4898 bool OldInExpansion = InExpansion; 4899 InExpansion = true; 4900 // We parse the macro call into a new line. 4901 auto Args = parseMacroCall(); 4902 InExpansion = OldInExpansion; 4903 assert(Line->Tokens.front().Tok == ID); 4904 // And remember the unexpanded macro call tokens. 4905 auto UnexpandedLine = std::move(Line); 4906 // Reset to the old line. 4907 Line = std::move(PreCall); 4908 4909 LLVM_DEBUG({ 4910 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4911 if (Args) { 4912 llvm::dbgs() << "("; 4913 for (const auto &Arg : Args.value()) 4914 for (const auto &T : Arg) 4915 llvm::dbgs() << T->TokenText << " "; 4916 llvm::dbgs() << ")"; 4917 } 4918 llvm::dbgs() << "\n"; 4919 }); 4920 if (Macros.objectLike(ID->TokenText) && Args && 4921 !Macros.hasArity(ID->TokenText, Args->size())) { 4922 // The macro is either 4923 // - object-like, but we got argumnets, or 4924 // - overloaded to be both object-like and function-like, but none of 4925 // the function-like arities match the number of arguments. 4926 // Thus, expand as object-like macro. 4927 LLVM_DEBUG(llvm::dbgs() 4928 << "Macro \"" << ID->TokenText 4929 << "\" not overloaded for arity " << Args->size() 4930 << "or not function-like, using object-like overload."); 4931 Args.reset(); 4932 UnexpandedLine->Tokens.resize(1); 4933 Tokens->setPosition(Position); 4934 nextToken(); 4935 assert(!Args && Macros.objectLike(ID->TokenText)); 4936 } 4937 if ((!Args && Macros.objectLike(ID->TokenText)) || 4938 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4939 // Next, we insert the expanded tokens in the token stream at the 4940 // current position, and continue parsing. 4941 Unexpanded[ID] = std::move(UnexpandedLine); 4942 SmallVector<FormatToken *, 8> Expansion = 4943 Macros.expand(ID, std::move(Args)); 4944 if (!Expansion.empty()) 4945 FormatTok = Tokens->insertTokens(Expansion); 4946 4947 LLVM_DEBUG({ 4948 llvm::dbgs() << "Expanded: "; 4949 for (const auto &T : Expansion) 4950 llvm::dbgs() << T->TokenText << " "; 4951 llvm::dbgs() << "\n"; 4952 }); 4953 } else { 4954 LLVM_DEBUG({ 4955 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4956 << "\", because it was used "; 4957 if (Args) 4958 llvm::dbgs() << "with " << Args->size(); 4959 else 4960 llvm::dbgs() << "without"; 4961 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4962 }); 4963 Tokens->setPosition(Position); 4964 FormatTok = ID; 4965 } 4966 } 4967 4968 if (FormatTok->isNot(tok::comment)) { 4969 distributeComments(Comments, FormatTok); 4970 Comments.clear(); 4971 return; 4972 } 4973 4974 Comments.push_back(FormatTok); 4975 } while (!eof()); 4976 4977 distributeComments(Comments, nullptr); 4978 Comments.clear(); 4979 } 4980 4981 namespace { 4982 template <typename Iterator> 4983 void pushTokens(Iterator Begin, Iterator End, 4984 llvm::SmallVectorImpl<FormatToken *> &Into) { 4985 for (auto I = Begin; I != End; ++I) { 4986 Into.push_back(I->Tok); 4987 for (const auto &Child : I->Children) 4988 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4989 } 4990 } 4991 } // namespace 4992 4993 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4994 UnwrappedLineParser::parseMacroCall() { 4995 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4996 assert(Line->Tokens.empty()); 4997 nextToken(); 4998 if (FormatTok->isNot(tok::l_paren)) 4999 return Args; 5000 unsigned Position = Tokens->getPosition(); 5001 FormatToken *Tok = FormatTok; 5002 nextToken(); 5003 Args.emplace(); 5004 auto ArgStart = std::prev(Line->Tokens.end()); 5005 5006 int Parens = 0; 5007 do { 5008 switch (FormatTok->Tok.getKind()) { 5009 case tok::l_paren: 5010 ++Parens; 5011 nextToken(); 5012 break; 5013 case tok::r_paren: { 5014 if (Parens > 0) { 5015 --Parens; 5016 nextToken(); 5017 break; 5018 } 5019 Args->push_back({}); 5020 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5021 nextToken(); 5022 return Args; 5023 } 5024 case tok::comma: { 5025 if (Parens > 0) { 5026 nextToken(); 5027 break; 5028 } 5029 Args->push_back({}); 5030 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5031 nextToken(); 5032 ArgStart = std::prev(Line->Tokens.end()); 5033 break; 5034 } 5035 default: 5036 nextToken(); 5037 break; 5038 } 5039 } while (!eof()); 5040 Line->Tokens.resize(1); 5041 Tokens->setPosition(Position); 5042 FormatTok = Tok; 5043 return {}; 5044 } 5045 5046 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 5047 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 5048 if (MustBreakBeforeNextToken) { 5049 Line->Tokens.back().Tok->MustBreakBefore = true; 5050 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 5051 MustBreakBeforeNextToken = false; 5052 } 5053 } 5054 5055 } // end namespace format 5056 } // end namespace clang 5057