1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (SmallVectorImpl<UnwrappedLine>::const_iterator 55 CI = I->Children.begin(), 56 CE = I->Children.end(); 57 CI != CE; ++CI) { 58 OS << "\n"; 59 printLine(OS, *CI, (Prefix + " ").str()); 60 NewLine = true; 61 } 62 } 63 if (!NewLine) 64 OS << "\n"; 65 } 66 67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 68 printLine(llvm::dbgs(), Line); 69 } 70 71 class ScopedDeclarationState { 72 public: 73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 74 bool MustBeDeclaration) 75 : Line(Line), Stack(Stack) { 76 Line.MustBeDeclaration = MustBeDeclaration; 77 Stack.push_back(MustBeDeclaration); 78 } 79 ~ScopedDeclarationState() { 80 Stack.pop_back(); 81 if (!Stack.empty()) 82 Line.MustBeDeclaration = Stack.back(); 83 else 84 Line.MustBeDeclaration = true; 85 } 86 87 private: 88 UnwrappedLine &Line; 89 llvm::BitVector &Stack; 90 }; 91 92 } // end anonymous namespace 93 94 std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) { 95 llvm::raw_os_ostream OS(Stream); 96 printLine(OS, Line); 97 return Stream; 98 } 99 100 class ScopedLineState { 101 public: 102 ScopedLineState(UnwrappedLineParser &Parser, 103 bool SwitchToPreprocessorLines = false) 104 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 105 if (SwitchToPreprocessorLines) 106 Parser.CurrentLines = &Parser.PreprocessorDirectives; 107 else if (!Parser.Line->Tokens.empty()) 108 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 109 PreBlockLine = std::move(Parser.Line); 110 Parser.Line = std::make_unique<UnwrappedLine>(); 111 Parser.Line->Level = PreBlockLine->Level; 112 Parser.Line->PPLevel = PreBlockLine->PPLevel; 113 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 114 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 115 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; 116 } 117 118 ~ScopedLineState() { 119 if (!Parser.Line->Tokens.empty()) 120 Parser.addUnwrappedLine(); 121 assert(Parser.Line->Tokens.empty()); 122 Parser.Line = std::move(PreBlockLine); 123 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 124 Parser.MustBreakBeforeNextToken = true; 125 Parser.CurrentLines = OriginalLines; 126 } 127 128 private: 129 UnwrappedLineParser &Parser; 130 131 std::unique_ptr<UnwrappedLine> PreBlockLine; 132 SmallVectorImpl<UnwrappedLine> *OriginalLines; 133 }; 134 135 class CompoundStatementIndenter { 136 public: 137 CompoundStatementIndenter(UnwrappedLineParser *Parser, 138 const FormatStyle &Style, unsigned &LineLevel) 139 : CompoundStatementIndenter(Parser, LineLevel, 140 Style.BraceWrapping.AfterControlStatement, 141 Style.BraceWrapping.IndentBraces) {} 142 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 143 bool WrapBrace, bool IndentBrace) 144 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 145 if (WrapBrace) 146 Parser->addUnwrappedLine(); 147 if (IndentBrace) 148 ++LineLevel; 149 } 150 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 151 152 private: 153 unsigned &LineLevel; 154 unsigned OldLineLevel; 155 }; 156 157 UnwrappedLineParser::UnwrappedLineParser( 158 SourceManager &SourceMgr, const FormatStyle &Style, 159 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 160 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 161 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 162 IdentifierTable &IdentTable) 163 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 164 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), 165 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), 166 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 167 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 168 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 169 ? IG_Rejected 170 : IG_Inited), 171 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 172 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { 173 assert(IsCpp == LangOpts.CXXOperatorNames); 174 } 175 176 void UnwrappedLineParser::reset() { 177 PPBranchLevel = -1; 178 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 179 ? IG_Rejected 180 : IG_Inited; 181 IncludeGuardToken = nullptr; 182 Line.reset(new UnwrappedLine); 183 CommentsBeforeNextToken.clear(); 184 FormatTok = nullptr; 185 MustBreakBeforeNextToken = false; 186 IsDecltypeAutoFunction = false; 187 PreprocessorDirectives.clear(); 188 CurrentLines = &Lines; 189 DeclarationScopeStack.clear(); 190 NestedTooDeep.clear(); 191 NestedLambdas.clear(); 192 PPStack.clear(); 193 Line->FirstStartColumn = FirstStartColumn; 194 195 if (!Unexpanded.empty()) 196 for (FormatToken *Token : AllTokens) 197 Token->MacroCtx.reset(); 198 CurrentExpandedLines.clear(); 199 ExpandedLines.clear(); 200 Unexpanded.clear(); 201 InExpansion = false; 202 Reconstruct.reset(); 203 } 204 205 void UnwrappedLineParser::parse() { 206 IndexedTokenSource TokenSource(AllTokens); 207 Line->FirstStartColumn = FirstStartColumn; 208 do { 209 LLVM_DEBUG(llvm::dbgs() << "----\n"); 210 reset(); 211 Tokens = &TokenSource; 212 TokenSource.reset(); 213 214 readToken(); 215 parseFile(); 216 217 // If we found an include guard then all preprocessor directives (other than 218 // the guard) are over-indented by one. 219 if (IncludeGuard == IG_Found) { 220 for (auto &Line : Lines) 221 if (Line.InPPDirective && Line.Level > 0) 222 --Line.Level; 223 } 224 225 // Create line with eof token. 226 assert(eof()); 227 pushToken(FormatTok); 228 addUnwrappedLine(); 229 230 // In a first run, format everything with the lines containing macro calls 231 // replaced by the expansion. 232 if (!ExpandedLines.empty()) { 233 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 234 for (const auto &Line : Lines) { 235 if (!Line.Tokens.empty()) { 236 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 237 if (it != ExpandedLines.end()) { 238 for (const auto &Expanded : it->second) { 239 LLVM_DEBUG(printDebugInfo(Expanded)); 240 Callback.consumeUnwrappedLine(Expanded); 241 } 242 continue; 243 } 244 } 245 LLVM_DEBUG(printDebugInfo(Line)); 246 Callback.consumeUnwrappedLine(Line); 247 } 248 Callback.finishRun(); 249 } 250 251 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 252 for (const UnwrappedLine &Line : Lines) { 253 LLVM_DEBUG(printDebugInfo(Line)); 254 Callback.consumeUnwrappedLine(Line); 255 } 256 Callback.finishRun(); 257 Lines.clear(); 258 while (!PPLevelBranchIndex.empty() && 259 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 260 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 261 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 262 } 263 if (!PPLevelBranchIndex.empty()) { 264 ++PPLevelBranchIndex.back(); 265 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 266 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 267 } 268 } while (!PPLevelBranchIndex.empty()); 269 } 270 271 void UnwrappedLineParser::parseFile() { 272 // The top-level context in a file always has declarations, except for pre- 273 // processor directives and JavaScript files. 274 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 275 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 276 MustBeDeclaration); 277 if (Style.Language == FormatStyle::LK_TextProto) 278 parseBracedList(); 279 else 280 parseLevel(); 281 // Make sure to format the remaining tokens. 282 // 283 // LK_TextProto is special since its top-level is parsed as the body of a 284 // braced list, which does not necessarily have natural line separators such 285 // as a semicolon. Comments after the last entry that have been determined to 286 // not belong to that line, as in: 287 // key: value 288 // // endfile comment 289 // do not have a chance to be put on a line of their own until this point. 290 // Here we add this newline before end-of-file comments. 291 if (Style.Language == FormatStyle::LK_TextProto && 292 !CommentsBeforeNextToken.empty()) { 293 addUnwrappedLine(); 294 } 295 flushComments(true); 296 addUnwrappedLine(); 297 } 298 299 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 300 do { 301 switch (FormatTok->Tok.getKind()) { 302 case tok::l_brace: 303 return; 304 default: 305 if (FormatTok->is(Keywords.kw_where)) { 306 addUnwrappedLine(); 307 nextToken(); 308 parseCSharpGenericTypeConstraint(); 309 break; 310 } 311 nextToken(); 312 break; 313 } 314 } while (!eof()); 315 } 316 317 void UnwrappedLineParser::parseCSharpAttribute() { 318 int UnpairedSquareBrackets = 1; 319 do { 320 switch (FormatTok->Tok.getKind()) { 321 case tok::r_square: 322 nextToken(); 323 --UnpairedSquareBrackets; 324 if (UnpairedSquareBrackets == 0) { 325 addUnwrappedLine(); 326 return; 327 } 328 break; 329 case tok::l_square: 330 ++UnpairedSquareBrackets; 331 nextToken(); 332 break; 333 default: 334 nextToken(); 335 break; 336 } 337 } while (!eof()); 338 } 339 340 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 341 if (!Lines.empty() && Lines.back().InPPDirective) 342 return true; 343 344 const FormatToken *Previous = Tokens->getPreviousToken(); 345 return Previous && Previous->is(tok::comment) && 346 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 347 } 348 349 /// \brief Parses a level, that is ???. 350 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 351 /// \param IfKind The \p if statement kind in the level. 352 /// \param IfLeftBrace The left brace of the \p if block in the level. 353 /// \returns true if a simple block of if/else/for/while, or false otherwise. 354 /// (A simple block has a single statement.) 355 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 356 IfStmtKind *IfKind, 357 FormatToken **IfLeftBrace) { 358 const bool InRequiresExpression = 359 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 360 const bool IsPrecededByCommentOrPPDirective = 361 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 362 FormatToken *IfLBrace = nullptr; 363 bool HasDoWhile = false; 364 bool HasLabel = false; 365 unsigned StatementCount = 0; 366 bool SwitchLabelEncountered = false; 367 368 do { 369 if (FormatTok->isAttribute()) { 370 nextToken(); 371 if (FormatTok->is(tok::l_paren)) 372 parseParens(); 373 continue; 374 } 375 tok::TokenKind Kind = FormatTok->Tok.getKind(); 376 if (FormatTok->is(TT_MacroBlockBegin)) 377 Kind = tok::l_brace; 378 else if (FormatTok->is(TT_MacroBlockEnd)) 379 Kind = tok::r_brace; 380 381 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 382 &HasLabel, &StatementCount] { 383 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 384 HasDoWhile ? nullptr : &HasDoWhile, 385 HasLabel ? nullptr : &HasLabel); 386 ++StatementCount; 387 assert(StatementCount > 0 && "StatementCount overflow!"); 388 }; 389 390 switch (Kind) { 391 case tok::comment: 392 nextToken(); 393 addUnwrappedLine(); 394 break; 395 case tok::l_brace: 396 if (InRequiresExpression) { 397 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 398 } else if (FormatTok->Previous && 399 FormatTok->Previous->ClosesRequiresClause) { 400 // We need the 'default' case here to correctly parse a function 401 // l_brace. 402 ParseDefault(); 403 continue; 404 } 405 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { 406 if (tryToParseBracedList()) 407 continue; 408 FormatTok->setFinalizedType(TT_BlockLBrace); 409 } 410 parseBlock(); 411 ++StatementCount; 412 assert(StatementCount > 0 && "StatementCount overflow!"); 413 addUnwrappedLine(); 414 break; 415 case tok::r_brace: 416 if (OpeningBrace) { 417 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 418 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 419 return false; 420 } 421 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 422 HasDoWhile || IsPrecededByCommentOrPPDirective || 423 precededByCommentOrPPDirective()) { 424 return false; 425 } 426 const FormatToken *Next = Tokens->peekNextToken(); 427 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 428 return false; 429 if (IfLeftBrace) 430 *IfLeftBrace = IfLBrace; 431 return true; 432 } 433 nextToken(); 434 addUnwrappedLine(); 435 break; 436 case tok::kw_default: { 437 unsigned StoredPosition = Tokens->getPosition(); 438 auto *Next = Tokens->getNextNonComment(); 439 FormatTok = Tokens->setPosition(StoredPosition); 440 if (!Next->isOneOf(tok::colon, tok::arrow)) { 441 // default not followed by `:` or `->` is not a case label; treat it 442 // like an identifier. 443 parseStructuralElement(); 444 break; 445 } 446 // Else, if it is 'default:', fall through to the case handling. 447 [[fallthrough]]; 448 } 449 case tok::kw_case: 450 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 451 (Style.isJavaScript() && Line->MustBeDeclaration)) { 452 // Proto: there are no switch/case statements 453 // Verilog: Case labels don't have this word. We handle case 454 // labels including default in TokenAnnotator. 455 // JavaScript: A 'case: string' style field declaration. 456 ParseDefault(); 457 break; 458 } 459 if (!SwitchLabelEncountered && 460 (Style.IndentCaseLabels || 461 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) || 462 (Line->InPPDirective && Line->Level == 1))) { 463 ++Line->Level; 464 } 465 SwitchLabelEncountered = true; 466 parseStructuralElement(); 467 break; 468 case tok::l_square: 469 if (Style.isCSharp()) { 470 nextToken(); 471 parseCSharpAttribute(); 472 break; 473 } 474 if (handleCppAttributes()) 475 break; 476 [[fallthrough]]; 477 default: 478 ParseDefault(); 479 break; 480 } 481 } while (!eof()); 482 483 return false; 484 } 485 486 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 487 // We'll parse forward through the tokens until we hit 488 // a closing brace or eof - note that getNextToken() will 489 // parse macros, so this will magically work inside macro 490 // definitions, too. 491 unsigned StoredPosition = Tokens->getPosition(); 492 FormatToken *Tok = FormatTok; 493 const FormatToken *PrevTok = Tok->Previous; 494 // Keep a stack of positions of lbrace tokens. We will 495 // update information about whether an lbrace starts a 496 // braced init list or a different block during the loop. 497 struct StackEntry { 498 FormatToken *Tok; 499 const FormatToken *PrevTok; 500 }; 501 SmallVector<StackEntry, 8> LBraceStack; 502 assert(Tok->is(tok::l_brace)); 503 504 do { 505 auto *NextTok = Tokens->getNextNonComment(); 506 507 if (!Line->InMacroBody && !Style.isTableGen()) { 508 // Skip PPDirective lines and comments. 509 while (NextTok->is(tok::hash)) { 510 NextTok = Tokens->getNextToken(); 511 if (NextTok->is(tok::pp_not_keyword)) 512 break; 513 do { 514 NextTok = Tokens->getNextToken(); 515 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); 516 517 while (NextTok->is(tok::comment)) 518 NextTok = Tokens->getNextToken(); 519 } 520 } 521 522 switch (Tok->Tok.getKind()) { 523 case tok::l_brace: 524 if (Style.isJavaScript() && PrevTok) { 525 if (PrevTok->isOneOf(tok::colon, tok::less)) { 526 // A ':' indicates this code is in a type, or a braced list 527 // following a label in an object literal ({a: {b: 1}}). 528 // A '<' could be an object used in a comparison, but that is nonsense 529 // code (can never return true), so more likely it is a generic type 530 // argument (`X<{a: string; b: number}>`). 531 // The code below could be confused by semicolons between the 532 // individual members in a type member list, which would normally 533 // trigger BK_Block. In both cases, this must be parsed as an inline 534 // braced init. 535 Tok->setBlockKind(BK_BracedInit); 536 } else if (PrevTok->is(tok::r_paren)) { 537 // `) { }` can only occur in function or method declarations in JS. 538 Tok->setBlockKind(BK_Block); 539 } 540 } else { 541 Tok->setBlockKind(BK_Unknown); 542 } 543 LBraceStack.push_back({Tok, PrevTok}); 544 break; 545 case tok::r_brace: 546 if (LBraceStack.empty()) 547 break; 548 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) { 549 bool ProbablyBracedList = false; 550 if (Style.Language == FormatStyle::LK_Proto) { 551 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 552 } else if (LBrace->isNot(TT_EnumLBrace)) { 553 // Using OriginalColumn to distinguish between ObjC methods and 554 // binary operators is a bit hacky. 555 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 556 NextTok->OriginalColumn == 0; 557 558 // Try to detect a braced list. Note that regardless how we mark inner 559 // braces here, we will overwrite the BlockKind later if we parse a 560 // braced list (where all blocks inside are by default braced lists), 561 // or when we explicitly detect blocks (for example while parsing 562 // lambdas). 563 564 // If we already marked the opening brace as braced list, the closing 565 // must also be part of it. 566 ProbablyBracedList = LBrace->is(TT_BracedListLBrace); 567 568 ProbablyBracedList = ProbablyBracedList || 569 (Style.isJavaScript() && 570 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 571 Keywords.kw_as)); 572 ProbablyBracedList = 573 ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || 574 NextTok->is(tok::l_paren))); 575 576 // If there is a comma, semicolon or right paren after the closing 577 // brace, we assume this is a braced initializer list. 578 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 579 // braced list in JS. 580 ProbablyBracedList = 581 ProbablyBracedList || 582 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 583 tok::r_paren, tok::r_square, tok::ellipsis); 584 585 // Distinguish between braced list in a constructor initializer list 586 // followed by constructor body, or just adjacent blocks. 587 ProbablyBracedList = 588 ProbablyBracedList || 589 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 590 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 591 tok::greater)); 592 593 ProbablyBracedList = 594 ProbablyBracedList || 595 (NextTok->is(tok::identifier) && 596 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 597 598 ProbablyBracedList = ProbablyBracedList || 599 (NextTok->is(tok::semi) && 600 (!ExpectClassBody || LBraceStack.size() != 1)); 601 602 ProbablyBracedList = 603 ProbablyBracedList || 604 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 605 606 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 607 // We can have an array subscript after a braced init 608 // list, but C++11 attributes are expected after blocks. 609 NextTok = Tokens->getNextToken(); 610 ProbablyBracedList = NextTok->isNot(tok::l_square); 611 } 612 613 // Cpp macro definition body that is a nonempty braced list or block: 614 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && 615 !FormatTok->Previous && NextTok->is(tok::eof) && 616 // A statement can end with only `;` (simple statement), a block 617 // closing brace (compound statement), or `:` (label statement). 618 // If PrevTok is a block opening brace, Tok ends an empty block. 619 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { 620 ProbablyBracedList = true; 621 } 622 } 623 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block; 624 Tok->setBlockKind(BlockKind); 625 LBrace->setBlockKind(BlockKind); 626 } 627 LBraceStack.pop_back(); 628 break; 629 case tok::identifier: 630 if (Tok->isNot(TT_StatementMacro)) 631 break; 632 [[fallthrough]]; 633 case tok::at: 634 case tok::semi: 635 case tok::kw_if: 636 case tok::kw_while: 637 case tok::kw_for: 638 case tok::kw_switch: 639 case tok::kw_try: 640 case tok::kw___try: 641 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 642 LBraceStack.back().Tok->setBlockKind(BK_Block); 643 break; 644 default: 645 break; 646 } 647 648 PrevTok = Tok; 649 Tok = NextTok; 650 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 651 652 // Assume other blocks for all unclosed opening braces. 653 for (const auto &Entry : LBraceStack) 654 if (Entry.Tok->is(BK_Unknown)) 655 Entry.Tok->setBlockKind(BK_Block); 656 657 FormatTok = Tokens->setPosition(StoredPosition); 658 } 659 660 // Sets the token type of the directly previous right brace. 661 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 662 if (auto Prev = FormatTok->getPreviousNonComment(); 663 Prev && Prev->is(tok::r_brace)) { 664 Prev->setFinalizedType(Type); 665 } 666 } 667 668 template <class T> 669 static inline void hash_combine(std::size_t &seed, const T &v) { 670 std::hash<T> hasher; 671 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 672 } 673 674 size_t UnwrappedLineParser::computePPHash() const { 675 size_t h = 0; 676 for (const auto &i : PPStack) { 677 hash_combine(h, size_t(i.Kind)); 678 hash_combine(h, i.Line); 679 } 680 return h; 681 } 682 683 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 684 // is not null, subtracts its length (plus the preceding space) when computing 685 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 686 // running the token annotator on it so that we can restore them afterward. 687 bool UnwrappedLineParser::mightFitOnOneLine( 688 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 689 const auto ColumnLimit = Style.ColumnLimit; 690 if (ColumnLimit == 0) 691 return true; 692 693 auto &Tokens = ParsedLine.Tokens; 694 assert(!Tokens.empty()); 695 696 const auto *LastToken = Tokens.back().Tok; 697 assert(LastToken); 698 699 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 700 701 int Index = 0; 702 for (const auto &Token : Tokens) { 703 assert(Token.Tok); 704 auto &SavedToken = SavedTokens[Index++]; 705 SavedToken.Tok = new FormatToken; 706 SavedToken.Tok->copyFrom(*Token.Tok); 707 SavedToken.Children = std::move(Token.Children); 708 } 709 710 AnnotatedLine Line(ParsedLine); 711 assert(Line.Last == LastToken); 712 713 TokenAnnotator Annotator(Style, Keywords); 714 Annotator.annotate(Line); 715 Annotator.calculateFormattingInformation(Line); 716 717 auto Length = LastToken->TotalLength; 718 if (OpeningBrace) { 719 assert(OpeningBrace != Tokens.front().Tok); 720 if (auto Prev = OpeningBrace->Previous; 721 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 722 Length -= ColumnLimit; 723 } 724 Length -= OpeningBrace->TokenText.size() + 1; 725 } 726 727 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 728 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 729 Length -= FirstToken->TokenText.size() + 1; 730 } 731 732 Index = 0; 733 for (auto &Token : Tokens) { 734 const auto &SavedToken = SavedTokens[Index++]; 735 Token.Tok->copyFrom(*SavedToken.Tok); 736 Token.Children = std::move(SavedToken.Children); 737 delete SavedToken.Tok; 738 } 739 740 // If these change PPLevel needs to be used for get correct indentation. 741 assert(!Line.InMacroBody); 742 assert(!Line.InPPDirective); 743 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 744 } 745 746 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 747 unsigned AddLevels, bool MunchSemi, 748 bool KeepBraces, 749 IfStmtKind *IfKind, 750 bool UnindentWhitesmithsBraces) { 751 auto HandleVerilogBlockLabel = [this]() { 752 // ":" name 753 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 754 nextToken(); 755 if (Keywords.isVerilogIdentifier(*FormatTok)) 756 nextToken(); 757 } 758 }; 759 760 // Whether this is a Verilog-specific block that has a special header like a 761 // module. 762 const bool VerilogHierarchy = 763 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 764 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 765 (Style.isVerilog() && 766 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 767 "'{' or macro block token expected"); 768 FormatToken *Tok = FormatTok; 769 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 770 auto Index = CurrentLines->size(); 771 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 772 FormatTok->setBlockKind(BK_Block); 773 774 // For Whitesmiths mode, jump to the next level prior to skipping over the 775 // braces. 776 if (!VerilogHierarchy && AddLevels > 0 && 777 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 778 ++Line->Level; 779 } 780 781 size_t PPStartHash = computePPHash(); 782 783 const unsigned InitialLevel = Line->Level; 784 if (VerilogHierarchy) { 785 AddLevels += parseVerilogHierarchyHeader(); 786 } else { 787 nextToken(/*LevelDifference=*/AddLevels); 788 HandleVerilogBlockLabel(); 789 } 790 791 // Bail out if there are too many levels. Otherwise, the stack might overflow. 792 if (Line->Level > 300) 793 return nullptr; 794 795 if (MacroBlock && FormatTok->is(tok::l_paren)) 796 parseParens(); 797 798 size_t NbPreprocessorDirectives = 799 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 800 addUnwrappedLine(); 801 size_t OpeningLineIndex = 802 CurrentLines->empty() 803 ? (UnwrappedLine::kInvalidIndex) 804 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 805 806 // Whitesmiths is weird here. The brace needs to be indented for the namespace 807 // block, but the block itself may not be indented depending on the style 808 // settings. This allows the format to back up one level in those cases. 809 if (UnindentWhitesmithsBraces) 810 --Line->Level; 811 812 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 813 MustBeDeclaration); 814 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 815 Line->Level += AddLevels; 816 817 FormatToken *IfLBrace = nullptr; 818 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 819 820 if (eof()) 821 return IfLBrace; 822 823 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 824 : FormatTok->isNot(tok::r_brace)) { 825 Line->Level = InitialLevel; 826 FormatTok->setBlockKind(BK_Block); 827 return IfLBrace; 828 } 829 830 if (FormatTok->is(tok::r_brace)) { 831 FormatTok->setBlockKind(BK_Block); 832 if (Tok->is(TT_NamespaceLBrace)) 833 FormatTok->setFinalizedType(TT_NamespaceRBrace); 834 } 835 836 const bool IsFunctionRBrace = 837 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 838 839 auto RemoveBraces = [=]() mutable { 840 if (!SimpleBlock) 841 return false; 842 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 843 assert(FormatTok->is(tok::r_brace)); 844 const bool WrappedOpeningBrace = !Tok->Previous; 845 if (WrappedOpeningBrace && FollowedByComment) 846 return false; 847 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 848 if (KeepBraces && !HasRequiredIfBraces) 849 return false; 850 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 851 const FormatToken *Previous = Tokens->getPreviousToken(); 852 assert(Previous); 853 if (Previous->is(tok::r_brace) && !Previous->Optional) 854 return false; 855 } 856 assert(!CurrentLines->empty()); 857 auto &LastLine = CurrentLines->back(); 858 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 859 return false; 860 if (Tok->is(TT_ElseLBrace)) 861 return true; 862 if (WrappedOpeningBrace) { 863 assert(Index > 0); 864 --Index; // The line above the wrapped l_brace. 865 Tok = nullptr; 866 } 867 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 868 }; 869 if (RemoveBraces()) { 870 Tok->MatchingParen = FormatTok; 871 FormatTok->MatchingParen = Tok; 872 } 873 874 size_t PPEndHash = computePPHash(); 875 876 // Munch the closing brace. 877 nextToken(/*LevelDifference=*/-AddLevels); 878 879 // When this is a function block and there is an unnecessary semicolon 880 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 881 // it later). 882 if (Style.RemoveSemicolon && IsFunctionRBrace) { 883 while (FormatTok->is(tok::semi)) { 884 FormatTok->Optional = true; 885 nextToken(); 886 } 887 } 888 889 HandleVerilogBlockLabel(); 890 891 if (MacroBlock && FormatTok->is(tok::l_paren)) 892 parseParens(); 893 894 Line->Level = InitialLevel; 895 896 if (FormatTok->is(tok::kw_noexcept)) { 897 // A noexcept in a requires expression. 898 nextToken(); 899 } 900 901 if (FormatTok->is(tok::arrow)) { 902 // Following the } or noexcept we can find a trailing return type arrow 903 // as part of an implicit conversion constraint. 904 nextToken(); 905 parseStructuralElement(); 906 } 907 908 if (MunchSemi && FormatTok->is(tok::semi)) 909 nextToken(); 910 911 if (PPStartHash == PPEndHash) { 912 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 913 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 914 // Update the opening line to add the forward reference as well 915 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 916 CurrentLines->size() - 1; 917 } 918 } 919 920 return IfLBrace; 921 } 922 923 static bool isGoogScope(const UnwrappedLine &Line) { 924 // FIXME: Closure-library specific stuff should not be hard-coded but be 925 // configurable. 926 if (Line.Tokens.size() < 4) 927 return false; 928 auto I = Line.Tokens.begin(); 929 if (I->Tok->TokenText != "goog") 930 return false; 931 ++I; 932 if (I->Tok->isNot(tok::period)) 933 return false; 934 ++I; 935 if (I->Tok->TokenText != "scope") 936 return false; 937 ++I; 938 return I->Tok->is(tok::l_paren); 939 } 940 941 static bool isIIFE(const UnwrappedLine &Line, 942 const AdditionalKeywords &Keywords) { 943 // Look for the start of an immediately invoked anonymous function. 944 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 945 // This is commonly done in JavaScript to create a new, anonymous scope. 946 // Example: (function() { ... })() 947 if (Line.Tokens.size() < 3) 948 return false; 949 auto I = Line.Tokens.begin(); 950 if (I->Tok->isNot(tok::l_paren)) 951 return false; 952 ++I; 953 if (I->Tok->isNot(Keywords.kw_function)) 954 return false; 955 ++I; 956 return I->Tok->is(tok::l_paren); 957 } 958 959 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 960 const FormatToken &InitialToken) { 961 tok::TokenKind Kind = InitialToken.Tok.getKind(); 962 if (InitialToken.is(TT_NamespaceMacro)) 963 Kind = tok::kw_namespace; 964 965 switch (Kind) { 966 case tok::kw_namespace: 967 return Style.BraceWrapping.AfterNamespace; 968 case tok::kw_class: 969 return Style.BraceWrapping.AfterClass; 970 case tok::kw_union: 971 return Style.BraceWrapping.AfterUnion; 972 case tok::kw_struct: 973 return Style.BraceWrapping.AfterStruct; 974 case tok::kw_enum: 975 return Style.BraceWrapping.AfterEnum; 976 default: 977 return false; 978 } 979 } 980 981 void UnwrappedLineParser::parseChildBlock() { 982 assert(FormatTok->is(tok::l_brace)); 983 FormatTok->setBlockKind(BK_Block); 984 const FormatToken *OpeningBrace = FormatTok; 985 nextToken(); 986 { 987 bool SkipIndent = (Style.isJavaScript() && 988 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 989 ScopedLineState LineState(*this); 990 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 991 /*MustBeDeclaration=*/false); 992 Line->Level += SkipIndent ? 0 : 1; 993 parseLevel(OpeningBrace); 994 flushComments(isOnNewLine(*FormatTok)); 995 Line->Level -= SkipIndent ? 0 : 1; 996 } 997 nextToken(); 998 } 999 1000 void UnwrappedLineParser::parsePPDirective() { 1001 assert(FormatTok->is(tok::hash) && "'#' expected"); 1002 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1003 1004 nextToken(); 1005 1006 if (!FormatTok->Tok.getIdentifierInfo()) { 1007 parsePPUnknown(); 1008 return; 1009 } 1010 1011 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1012 case tok::pp_define: 1013 parsePPDefine(); 1014 return; 1015 case tok::pp_if: 1016 parsePPIf(/*IfDef=*/false); 1017 break; 1018 case tok::pp_ifdef: 1019 case tok::pp_ifndef: 1020 parsePPIf(/*IfDef=*/true); 1021 break; 1022 case tok::pp_else: 1023 case tok::pp_elifdef: 1024 case tok::pp_elifndef: 1025 case tok::pp_elif: 1026 parsePPElse(); 1027 break; 1028 case tok::pp_endif: 1029 parsePPEndIf(); 1030 break; 1031 case tok::pp_pragma: 1032 parsePPPragma(); 1033 break; 1034 default: 1035 parsePPUnknown(); 1036 break; 1037 } 1038 } 1039 1040 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1041 size_t Line = CurrentLines->size(); 1042 if (CurrentLines == &PreprocessorDirectives) 1043 Line += Lines.size(); 1044 1045 if (Unreachable || 1046 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1047 PPStack.push_back({PP_Unreachable, Line}); 1048 } else { 1049 PPStack.push_back({PP_Conditional, Line}); 1050 } 1051 } 1052 1053 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1054 ++PPBranchLevel; 1055 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1056 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1057 PPLevelBranchIndex.push_back(0); 1058 PPLevelBranchCount.push_back(0); 1059 } 1060 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1061 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1062 conditionalCompilationCondition(Unreachable || Skip); 1063 } 1064 1065 void UnwrappedLineParser::conditionalCompilationAlternative() { 1066 if (!PPStack.empty()) 1067 PPStack.pop_back(); 1068 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1069 if (!PPChainBranchIndex.empty()) 1070 ++PPChainBranchIndex.top(); 1071 conditionalCompilationCondition( 1072 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1073 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1074 } 1075 1076 void UnwrappedLineParser::conditionalCompilationEnd() { 1077 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1078 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1079 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1080 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1081 } 1082 // Guard against #endif's without #if. 1083 if (PPBranchLevel > -1) 1084 --PPBranchLevel; 1085 if (!PPChainBranchIndex.empty()) 1086 PPChainBranchIndex.pop(); 1087 if (!PPStack.empty()) 1088 PPStack.pop_back(); 1089 } 1090 1091 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1092 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1093 nextToken(); 1094 bool Unreachable = false; 1095 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1096 Unreachable = true; 1097 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1098 Unreachable = true; 1099 conditionalCompilationStart(Unreachable); 1100 FormatToken *IfCondition = FormatTok; 1101 // If there's a #ifndef on the first line, and the only lines before it are 1102 // comments, it could be an include guard. 1103 bool MaybeIncludeGuard = IfNDef; 1104 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1105 for (auto &Line : Lines) { 1106 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1107 MaybeIncludeGuard = false; 1108 IncludeGuard = IG_Rejected; 1109 break; 1110 } 1111 } 1112 } 1113 --PPBranchLevel; 1114 parsePPUnknown(); 1115 ++PPBranchLevel; 1116 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1117 IncludeGuard = IG_IfNdefed; 1118 IncludeGuardToken = IfCondition; 1119 } 1120 } 1121 1122 void UnwrappedLineParser::parsePPElse() { 1123 // If a potential include guard has an #else, it's not an include guard. 1124 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1125 IncludeGuard = IG_Rejected; 1126 // Don't crash when there is an #else without an #if. 1127 assert(PPBranchLevel >= -1); 1128 if (PPBranchLevel == -1) 1129 conditionalCompilationStart(/*Unreachable=*/true); 1130 conditionalCompilationAlternative(); 1131 --PPBranchLevel; 1132 parsePPUnknown(); 1133 ++PPBranchLevel; 1134 } 1135 1136 void UnwrappedLineParser::parsePPEndIf() { 1137 conditionalCompilationEnd(); 1138 parsePPUnknown(); 1139 // If the #endif of a potential include guard is the last thing in the file, 1140 // then we found an include guard. 1141 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1142 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1143 IncludeGuard = IG_Found; 1144 } 1145 } 1146 1147 void UnwrappedLineParser::parsePPDefine() { 1148 nextToken(); 1149 1150 if (!FormatTok->Tok.getIdentifierInfo()) { 1151 IncludeGuard = IG_Rejected; 1152 IncludeGuardToken = nullptr; 1153 parsePPUnknown(); 1154 return; 1155 } 1156 1157 if (IncludeGuard == IG_IfNdefed && 1158 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1159 IncludeGuard = IG_Defined; 1160 IncludeGuardToken = nullptr; 1161 for (auto &Line : Lines) { 1162 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1163 IncludeGuard = IG_Rejected; 1164 break; 1165 } 1166 } 1167 } 1168 1169 // In the context of a define, even keywords should be treated as normal 1170 // identifiers. Setting the kind to identifier is not enough, because we need 1171 // to treat additional keywords like __except as well, which are already 1172 // identifiers. Setting the identifier info to null interferes with include 1173 // guard processing above, and changes preprocessing nesting. 1174 FormatTok->Tok.setKind(tok::identifier); 1175 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1176 nextToken(); 1177 if (FormatTok->Tok.getKind() == tok::l_paren && 1178 !FormatTok->hasWhitespaceBefore()) { 1179 parseParens(); 1180 } 1181 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1182 Line->Level += PPBranchLevel + 1; 1183 addUnwrappedLine(); 1184 ++Line->Level; 1185 1186 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1187 assert((int)Line->PPLevel >= 0); 1188 Line->InMacroBody = true; 1189 1190 if (Style.SkipMacroDefinitionBody) { 1191 while (!eof()) { 1192 FormatTok->Finalized = true; 1193 FormatTok = Tokens->getNextToken(); 1194 } 1195 addUnwrappedLine(); 1196 return; 1197 } 1198 1199 // Errors during a preprocessor directive can only affect the layout of the 1200 // preprocessor directive, and thus we ignore them. An alternative approach 1201 // would be to use the same approach we use on the file level (no 1202 // re-indentation if there was a structural error) within the macro 1203 // definition. 1204 parseFile(); 1205 } 1206 1207 void UnwrappedLineParser::parsePPPragma() { 1208 Line->InPragmaDirective = true; 1209 parsePPUnknown(); 1210 } 1211 1212 void UnwrappedLineParser::parsePPUnknown() { 1213 do { 1214 nextToken(); 1215 } while (!eof()); 1216 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1217 Line->Level += PPBranchLevel + 1; 1218 addUnwrappedLine(); 1219 } 1220 1221 // Here we exclude certain tokens that are not usually the first token in an 1222 // unwrapped line. This is used in attempt to distinguish macro calls without 1223 // trailing semicolons from other constructs split to several lines. 1224 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1225 // Semicolon can be a null-statement, l_square can be a start of a macro or 1226 // a C++11 attribute, but this doesn't seem to be common. 1227 return !Tok.isOneOf(tok::semi, tok::l_brace, 1228 // Tokens that can only be used as binary operators and a 1229 // part of overloaded operator names. 1230 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1231 tok::less, tok::greater, tok::slash, tok::percent, 1232 tok::lessless, tok::greatergreater, tok::equal, 1233 tok::plusequal, tok::minusequal, tok::starequal, 1234 tok::slashequal, tok::percentequal, tok::ampequal, 1235 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1236 tok::lesslessequal, 1237 // Colon is used in labels, base class lists, initializer 1238 // lists, range-based for loops, ternary operator, but 1239 // should never be the first token in an unwrapped line. 1240 tok::colon, 1241 // 'noexcept' is a trailing annotation. 1242 tok::kw_noexcept); 1243 } 1244 1245 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1246 const FormatToken *FormatTok) { 1247 // FIXME: This returns true for C/C++ keywords like 'struct'. 1248 return FormatTok->is(tok::identifier) && 1249 (!FormatTok->Tok.getIdentifierInfo() || 1250 !FormatTok->isOneOf( 1251 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1252 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1253 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1254 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1255 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1256 Keywords.kw_instanceof, Keywords.kw_interface, 1257 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1258 } 1259 1260 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1261 const FormatToken *FormatTok) { 1262 return FormatTok->Tok.isLiteral() || 1263 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1264 mustBeJSIdent(Keywords, FormatTok); 1265 } 1266 1267 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1268 // when encountered after a value (see mustBeJSIdentOrValue). 1269 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1270 const FormatToken *FormatTok) { 1271 return FormatTok->isOneOf( 1272 tok::kw_return, Keywords.kw_yield, 1273 // conditionals 1274 tok::kw_if, tok::kw_else, 1275 // loops 1276 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1277 // switch/case 1278 tok::kw_switch, tok::kw_case, 1279 // exceptions 1280 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1281 // declaration 1282 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1283 Keywords.kw_async, Keywords.kw_function, 1284 // import/export 1285 Keywords.kw_import, tok::kw_export); 1286 } 1287 1288 // Checks whether a token is a type in K&R C (aka C78). 1289 static bool isC78Type(const FormatToken &Tok) { 1290 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1291 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1292 tok::identifier); 1293 } 1294 1295 // This function checks whether a token starts the first parameter declaration 1296 // in a K&R C (aka C78) function definition, e.g.: 1297 // int f(a, b) 1298 // short a, b; 1299 // { 1300 // return a + b; 1301 // } 1302 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1303 const FormatToken *FuncName) { 1304 assert(Tok); 1305 assert(Next); 1306 assert(FuncName); 1307 1308 if (FuncName->isNot(tok::identifier)) 1309 return false; 1310 1311 const FormatToken *Prev = FuncName->Previous; 1312 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1313 return false; 1314 1315 if (!isC78Type(*Tok) && 1316 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1317 return false; 1318 } 1319 1320 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1321 return false; 1322 1323 Tok = Tok->Previous; 1324 if (!Tok || Tok->isNot(tok::r_paren)) 1325 return false; 1326 1327 Tok = Tok->Previous; 1328 if (!Tok || Tok->isNot(tok::identifier)) 1329 return false; 1330 1331 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1332 } 1333 1334 bool UnwrappedLineParser::parseModuleImport() { 1335 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1336 1337 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1338 !Token->Tok.getIdentifierInfo() && 1339 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1340 return false; 1341 } 1342 1343 nextToken(); 1344 while (!eof()) { 1345 if (FormatTok->is(tok::colon)) { 1346 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1347 } 1348 // Handle import <foo/bar.h> as we would an include statement. 1349 else if (FormatTok->is(tok::less)) { 1350 nextToken(); 1351 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1352 // Mark tokens up to the trailing line comments as implicit string 1353 // literals. 1354 if (FormatTok->isNot(tok::comment) && 1355 !FormatTok->TokenText.starts_with("//")) { 1356 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1357 } 1358 nextToken(); 1359 } 1360 } 1361 if (FormatTok->is(tok::semi)) { 1362 nextToken(); 1363 break; 1364 } 1365 nextToken(); 1366 } 1367 1368 addUnwrappedLine(); 1369 return true; 1370 } 1371 1372 // readTokenWithJavaScriptASI reads the next token and terminates the current 1373 // line if JavaScript Automatic Semicolon Insertion must 1374 // happen between the current token and the next token. 1375 // 1376 // This method is conservative - it cannot cover all edge cases of JavaScript, 1377 // but only aims to correctly handle certain well known cases. It *must not* 1378 // return true in speculative cases. 1379 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1380 FormatToken *Previous = FormatTok; 1381 readToken(); 1382 FormatToken *Next = FormatTok; 1383 1384 bool IsOnSameLine = 1385 CommentsBeforeNextToken.empty() 1386 ? Next->NewlinesBefore == 0 1387 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1388 if (IsOnSameLine) 1389 return; 1390 1391 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1392 bool PreviousStartsTemplateExpr = 1393 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1394 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1395 // If the line contains an '@' sign, the previous token might be an 1396 // annotation, which can precede another identifier/value. 1397 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1398 return LineNode.Tok->is(tok::at); 1399 }); 1400 if (HasAt) 1401 return; 1402 } 1403 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1404 return addUnwrappedLine(); 1405 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1406 bool NextEndsTemplateExpr = 1407 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1408 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1409 (PreviousMustBeValue || 1410 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1411 tok::minusminus))) { 1412 return addUnwrappedLine(); 1413 } 1414 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1415 isJSDeclOrStmt(Keywords, Next)) { 1416 return addUnwrappedLine(); 1417 } 1418 } 1419 1420 void UnwrappedLineParser::parseStructuralElement( 1421 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1422 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1423 if (Style.Language == FormatStyle::LK_TableGen && 1424 FormatTok->is(tok::pp_include)) { 1425 nextToken(); 1426 if (FormatTok->is(tok::string_literal)) 1427 nextToken(); 1428 addUnwrappedLine(); 1429 return; 1430 } 1431 1432 if (IsCpp) { 1433 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1434 } 1435 } else if (Style.isVerilog()) { 1436 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1437 parseForOrWhileLoop(/*HasParens=*/false); 1438 return; 1439 } 1440 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1441 parseForOrWhileLoop(); 1442 return; 1443 } 1444 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1445 Keywords.kw_assume, Keywords.kw_cover)) { 1446 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1447 return; 1448 } 1449 1450 // Skip things that can exist before keywords like 'if' and 'case'. 1451 while (true) { 1452 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1453 Keywords.kw_unique0)) { 1454 nextToken(); 1455 } else if (FormatTok->is(tok::l_paren) && 1456 Tokens->peekNextToken()->is(tok::star)) { 1457 parseParens(); 1458 } else { 1459 break; 1460 } 1461 } 1462 } 1463 1464 // Tokens that only make sense at the beginning of a line. 1465 if (FormatTok->isAccessSpecifierKeyword()) { 1466 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1467 Style.isCSharp()) { 1468 nextToken(); 1469 } else { 1470 parseAccessSpecifier(); 1471 } 1472 return; 1473 } 1474 switch (FormatTok->Tok.getKind()) { 1475 case tok::kw_asm: 1476 nextToken(); 1477 if (FormatTok->is(tok::l_brace)) { 1478 FormatTok->setFinalizedType(TT_InlineASMBrace); 1479 nextToken(); 1480 while (FormatTok && !eof()) { 1481 if (FormatTok->is(tok::r_brace)) { 1482 FormatTok->setFinalizedType(TT_InlineASMBrace); 1483 nextToken(); 1484 addUnwrappedLine(); 1485 break; 1486 } 1487 FormatTok->Finalized = true; 1488 nextToken(); 1489 } 1490 } 1491 break; 1492 case tok::kw_namespace: 1493 parseNamespace(); 1494 return; 1495 case tok::kw_if: { 1496 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1497 // field/method declaration. 1498 break; 1499 } 1500 FormatToken *Tok = parseIfThenElse(IfKind); 1501 if (IfLeftBrace) 1502 *IfLeftBrace = Tok; 1503 return; 1504 } 1505 case tok::kw_for: 1506 case tok::kw_while: 1507 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1508 // field/method declaration. 1509 break; 1510 } 1511 parseForOrWhileLoop(); 1512 return; 1513 case tok::kw_do: 1514 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1515 // field/method declaration. 1516 break; 1517 } 1518 parseDoWhile(); 1519 if (HasDoWhile) 1520 *HasDoWhile = true; 1521 return; 1522 case tok::kw_switch: 1523 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1524 // 'switch: string' field declaration. 1525 break; 1526 } 1527 parseSwitch(/*IsExpr=*/false); 1528 return; 1529 case tok::kw_default: { 1530 // In Verilog default along with other labels are handled in the next loop. 1531 if (Style.isVerilog()) 1532 break; 1533 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1534 // 'default: string' field declaration. 1535 break; 1536 } 1537 auto *Default = FormatTok; 1538 nextToken(); 1539 if (FormatTok->is(tok::colon)) { 1540 FormatTok->setFinalizedType(TT_CaseLabelColon); 1541 parseLabel(); 1542 return; 1543 } 1544 if (FormatTok->is(tok::arrow)) { 1545 FormatTok->setFinalizedType(TT_CaseLabelArrow); 1546 Default->setFinalizedType(TT_SwitchExpressionLabel); 1547 parseLabel(); 1548 return; 1549 } 1550 // e.g. "default void f() {}" in a Java interface. 1551 break; 1552 } 1553 case tok::kw_case: 1554 // Proto: there are no switch/case statements. 1555 if (Style.Language == FormatStyle::LK_Proto) { 1556 nextToken(); 1557 return; 1558 } 1559 if (Style.isVerilog()) { 1560 parseBlock(); 1561 addUnwrappedLine(); 1562 return; 1563 } 1564 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1565 // 'case: string' field declaration. 1566 nextToken(); 1567 break; 1568 } 1569 parseCaseLabel(); 1570 return; 1571 case tok::kw_try: 1572 case tok::kw___try: 1573 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1574 // field/method declaration. 1575 break; 1576 } 1577 parseTryCatch(); 1578 return; 1579 case tok::kw_extern: 1580 nextToken(); 1581 if (Style.isVerilog()) { 1582 // In Verilog and extern module declaration looks like a start of module. 1583 // But there is no body and endmodule. So we handle it separately. 1584 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1585 parseVerilogHierarchyHeader(); 1586 return; 1587 } 1588 } else if (FormatTok->is(tok::string_literal)) { 1589 nextToken(); 1590 if (FormatTok->is(tok::l_brace)) { 1591 if (Style.BraceWrapping.AfterExternBlock) 1592 addUnwrappedLine(); 1593 // Either we indent or for backwards compatibility we follow the 1594 // AfterExternBlock style. 1595 unsigned AddLevels = 1596 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1597 (Style.BraceWrapping.AfterExternBlock && 1598 Style.IndentExternBlock == 1599 FormatStyle::IEBS_AfterExternBlock) 1600 ? 1u 1601 : 0u; 1602 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1603 addUnwrappedLine(); 1604 return; 1605 } 1606 } 1607 break; 1608 case tok::kw_export: 1609 if (Style.isJavaScript()) { 1610 parseJavaScriptEs6ImportExport(); 1611 return; 1612 } 1613 if (IsCpp) { 1614 nextToken(); 1615 if (FormatTok->is(tok::kw_namespace)) { 1616 parseNamespace(); 1617 return; 1618 } 1619 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1620 return; 1621 } 1622 break; 1623 case tok::kw_inline: 1624 nextToken(); 1625 if (FormatTok->is(tok::kw_namespace)) { 1626 parseNamespace(); 1627 return; 1628 } 1629 break; 1630 case tok::identifier: 1631 if (FormatTok->is(TT_ForEachMacro)) { 1632 parseForOrWhileLoop(); 1633 return; 1634 } 1635 if (FormatTok->is(TT_MacroBlockBegin)) { 1636 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1637 /*MunchSemi=*/false); 1638 return; 1639 } 1640 if (FormatTok->is(Keywords.kw_import)) { 1641 if (Style.isJavaScript()) { 1642 parseJavaScriptEs6ImportExport(); 1643 return; 1644 } 1645 if (Style.Language == FormatStyle::LK_Proto) { 1646 nextToken(); 1647 if (FormatTok->is(tok::kw_public)) 1648 nextToken(); 1649 if (FormatTok->isNot(tok::string_literal)) 1650 return; 1651 nextToken(); 1652 if (FormatTok->is(tok::semi)) 1653 nextToken(); 1654 addUnwrappedLine(); 1655 return; 1656 } 1657 if (IsCpp && parseModuleImport()) 1658 return; 1659 } 1660 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1661 Keywords.kw_slots, Keywords.kw_qslots)) { 1662 nextToken(); 1663 if (FormatTok->is(tok::colon)) { 1664 nextToken(); 1665 addUnwrappedLine(); 1666 return; 1667 } 1668 } 1669 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 1670 parseStatementMacro(); 1671 return; 1672 } 1673 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) { 1674 parseNamespace(); 1675 return; 1676 } 1677 // In Verilog labels can be any expression, so we don't do them here. 1678 // JS doesn't have macros, and within classes colons indicate fields, not 1679 // labels. 1680 // TableGen doesn't have labels. 1681 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1682 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1683 nextToken(); 1684 if (!Line->InMacroBody || CurrentLines->size() > 1) 1685 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1686 FormatTok->setFinalizedType(TT_GotoLabelColon); 1687 parseLabel(!Style.IndentGotoLabels); 1688 if (HasLabel) 1689 *HasLabel = true; 1690 return; 1691 } 1692 // In all other cases, parse the declaration. 1693 break; 1694 default: 1695 break; 1696 } 1697 1698 for (const bool InRequiresExpression = 1699 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 1700 !eof();) { 1701 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { 1702 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); 1703 Next && Next->isBinaryOperator()) { 1704 FormatTok->Tok.setKind(tok::identifier); 1705 } 1706 } 1707 const FormatToken *Previous = FormatTok->Previous; 1708 switch (FormatTok->Tok.getKind()) { 1709 case tok::at: 1710 nextToken(); 1711 if (FormatTok->is(tok::l_brace)) { 1712 nextToken(); 1713 parseBracedList(); 1714 break; 1715 } else if (Style.Language == FormatStyle::LK_Java && 1716 FormatTok->is(Keywords.kw_interface)) { 1717 nextToken(); 1718 break; 1719 } 1720 switch (FormatTok->Tok.getObjCKeywordID()) { 1721 case tok::objc_public: 1722 case tok::objc_protected: 1723 case tok::objc_package: 1724 case tok::objc_private: 1725 return parseAccessSpecifier(); 1726 case tok::objc_interface: 1727 case tok::objc_implementation: 1728 return parseObjCInterfaceOrImplementation(); 1729 case tok::objc_protocol: 1730 if (parseObjCProtocol()) 1731 return; 1732 break; 1733 case tok::objc_end: 1734 return; // Handled by the caller. 1735 case tok::objc_optional: 1736 case tok::objc_required: 1737 nextToken(); 1738 addUnwrappedLine(); 1739 return; 1740 case tok::objc_autoreleasepool: 1741 nextToken(); 1742 if (FormatTok->is(tok::l_brace)) { 1743 if (Style.BraceWrapping.AfterControlStatement == 1744 FormatStyle::BWACS_Always) { 1745 addUnwrappedLine(); 1746 } 1747 parseBlock(); 1748 } 1749 addUnwrappedLine(); 1750 return; 1751 case tok::objc_synchronized: 1752 nextToken(); 1753 if (FormatTok->is(tok::l_paren)) { 1754 // Skip synchronization object 1755 parseParens(); 1756 } 1757 if (FormatTok->is(tok::l_brace)) { 1758 if (Style.BraceWrapping.AfterControlStatement == 1759 FormatStyle::BWACS_Always) { 1760 addUnwrappedLine(); 1761 } 1762 parseBlock(); 1763 } 1764 addUnwrappedLine(); 1765 return; 1766 case tok::objc_try: 1767 // This branch isn't strictly necessary (the kw_try case below would 1768 // do this too after the tok::at is parsed above). But be explicit. 1769 parseTryCatch(); 1770 return; 1771 default: 1772 break; 1773 } 1774 break; 1775 case tok::kw_requires: { 1776 if (IsCpp) { 1777 bool ParsedClause = parseRequires(); 1778 if (ParsedClause) 1779 return; 1780 } else { 1781 nextToken(); 1782 } 1783 break; 1784 } 1785 case tok::kw_enum: 1786 // Ignore if this is part of "template <enum ..." or "... -> enum" or 1787 // "template <..., enum ...>". 1788 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) { 1789 nextToken(); 1790 break; 1791 } 1792 1793 // parseEnum falls through and does not yet add an unwrapped line as an 1794 // enum definition can start a structural element. 1795 if (!parseEnum()) 1796 break; 1797 // This only applies to C++ and Verilog. 1798 if (!IsCpp && !Style.isVerilog()) { 1799 addUnwrappedLine(); 1800 return; 1801 } 1802 break; 1803 case tok::kw_typedef: 1804 nextToken(); 1805 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1806 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1807 Keywords.kw_CF_CLOSED_ENUM, 1808 Keywords.kw_NS_CLOSED_ENUM)) { 1809 parseEnum(); 1810 } 1811 break; 1812 case tok::kw_class: 1813 if (Style.isVerilog()) { 1814 parseBlock(); 1815 addUnwrappedLine(); 1816 return; 1817 } 1818 if (Style.isTableGen()) { 1819 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1820 // This is same as def and so on. 1821 nextToken(); 1822 break; 1823 } 1824 [[fallthrough]]; 1825 case tok::kw_struct: 1826 case tok::kw_union: 1827 if (parseStructLike()) 1828 return; 1829 break; 1830 case tok::kw_decltype: 1831 nextToken(); 1832 if (FormatTok->is(tok::l_paren)) { 1833 parseParens(); 1834 assert(FormatTok->Previous); 1835 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1836 tok::l_paren)) { 1837 Line->SeenDecltypeAuto = true; 1838 } 1839 } 1840 break; 1841 case tok::period: 1842 nextToken(); 1843 // In Java, classes have an implicit static member "class". 1844 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1845 FormatTok->is(tok::kw_class)) { 1846 nextToken(); 1847 } 1848 if (Style.isJavaScript() && FormatTok && 1849 FormatTok->Tok.getIdentifierInfo()) { 1850 // JavaScript only has pseudo keywords, all keywords are allowed to 1851 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1852 nextToken(); 1853 } 1854 break; 1855 case tok::semi: 1856 nextToken(); 1857 addUnwrappedLine(); 1858 return; 1859 case tok::r_brace: 1860 addUnwrappedLine(); 1861 return; 1862 case tok::l_paren: { 1863 parseParens(); 1864 // Break the unwrapped line if a K&R C function definition has a parameter 1865 // declaration. 1866 if (OpeningBrace || !IsCpp || !Previous || eof()) 1867 break; 1868 if (isC78ParameterDecl(FormatTok, 1869 Tokens->peekNextToken(/*SkipComment=*/true), 1870 Previous)) { 1871 addUnwrappedLine(); 1872 return; 1873 } 1874 break; 1875 } 1876 case tok::kw_operator: 1877 nextToken(); 1878 if (FormatTok->isBinaryOperator()) 1879 nextToken(); 1880 break; 1881 case tok::caret: 1882 nextToken(); 1883 // Block return type. 1884 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) { 1885 nextToken(); 1886 // Return types: pointers are ok too. 1887 while (FormatTok->is(tok::star)) 1888 nextToken(); 1889 } 1890 // Block argument list. 1891 if (FormatTok->is(tok::l_paren)) 1892 parseParens(); 1893 // Block body. 1894 if (FormatTok->is(tok::l_brace)) 1895 parseChildBlock(); 1896 break; 1897 case tok::l_brace: 1898 if (InRequiresExpression) 1899 FormatTok->setFinalizedType(TT_BracedListLBrace); 1900 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1901 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1902 // A block outside of parentheses must be the last part of a 1903 // structural element. 1904 // FIXME: Figure out cases where this is not true, and add projections 1905 // for them (the one we know is missing are lambdas). 1906 if (Style.Language == FormatStyle::LK_Java && 1907 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1908 // If necessary, we could set the type to something different than 1909 // TT_FunctionLBrace. 1910 if (Style.BraceWrapping.AfterControlStatement == 1911 FormatStyle::BWACS_Always) { 1912 addUnwrappedLine(); 1913 } 1914 } else if (Style.BraceWrapping.AfterFunction) { 1915 addUnwrappedLine(); 1916 } 1917 if (!Previous || Previous->isNot(TT_TypeDeclarationParen)) 1918 FormatTok->setFinalizedType(TT_FunctionLBrace); 1919 parseBlock(); 1920 IsDecltypeAutoFunction = false; 1921 addUnwrappedLine(); 1922 return; 1923 } 1924 // Otherwise this was a braced init list, and the structural 1925 // element continues. 1926 break; 1927 case tok::kw_try: 1928 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1929 // field/method declaration. 1930 nextToken(); 1931 break; 1932 } 1933 // We arrive here when parsing function-try blocks. 1934 if (Style.BraceWrapping.AfterFunction) 1935 addUnwrappedLine(); 1936 parseTryCatch(); 1937 return; 1938 case tok::identifier: { 1939 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1940 Line->MustBeDeclaration) { 1941 addUnwrappedLine(); 1942 parseCSharpGenericTypeConstraint(); 1943 break; 1944 } 1945 if (FormatTok->is(TT_MacroBlockEnd)) { 1946 addUnwrappedLine(); 1947 return; 1948 } 1949 1950 // Function declarations (as opposed to function expressions) are parsed 1951 // on their own unwrapped line by continuing this loop. Function 1952 // expressions (functions that are not on their own line) must not create 1953 // a new unwrapped line, so they are special cased below. 1954 size_t TokenCount = Line->Tokens.size(); 1955 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1956 (TokenCount > 1 || 1957 (TokenCount == 1 && 1958 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1959 tryToParseJSFunction(); 1960 break; 1961 } 1962 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1963 FormatTok->is(Keywords.kw_interface)) { 1964 if (Style.isJavaScript()) { 1965 // In JavaScript/TypeScript, "interface" can be used as a standalone 1966 // identifier, e.g. in `var interface = 1;`. If "interface" is 1967 // followed by another identifier, it is very like to be an actual 1968 // interface declaration. 1969 unsigned StoredPosition = Tokens->getPosition(); 1970 FormatToken *Next = Tokens->getNextToken(); 1971 FormatTok = Tokens->setPosition(StoredPosition); 1972 if (!mustBeJSIdent(Keywords, Next)) { 1973 nextToken(); 1974 break; 1975 } 1976 } 1977 parseRecord(); 1978 addUnwrappedLine(); 1979 return; 1980 } 1981 1982 if (Style.isVerilog()) { 1983 if (FormatTok->is(Keywords.kw_table)) { 1984 parseVerilogTable(); 1985 return; 1986 } 1987 if (Keywords.isVerilogBegin(*FormatTok) || 1988 Keywords.isVerilogHierarchy(*FormatTok)) { 1989 parseBlock(); 1990 addUnwrappedLine(); 1991 return; 1992 } 1993 } 1994 1995 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) { 1996 if (parseStructLike()) 1997 return; 1998 break; 1999 } 2000 2001 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 2002 parseStatementMacro(); 2003 return; 2004 } 2005 2006 // See if the following token should start a new unwrapped line. 2007 StringRef Text = FormatTok->TokenText; 2008 2009 FormatToken *PreviousToken = FormatTok; 2010 nextToken(); 2011 2012 // JS doesn't have macros, and within classes colons indicate fields, not 2013 // labels. 2014 if (Style.isJavaScript()) 2015 break; 2016 2017 auto OneTokenSoFar = [&]() { 2018 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2019 while (I != E && I->Tok->is(tok::comment)) 2020 ++I; 2021 if (Style.isVerilog()) 2022 while (I != E && I->Tok->is(tok::hash)) 2023 ++I; 2024 return I != E && (++I == E); 2025 }; 2026 if (OneTokenSoFar()) { 2027 // Recognize function-like macro usages without trailing semicolon as 2028 // well as free-standing macros like Q_OBJECT. 2029 bool FunctionLike = FormatTok->is(tok::l_paren); 2030 if (FunctionLike) 2031 parseParens(); 2032 2033 bool FollowedByNewline = 2034 CommentsBeforeNextToken.empty() 2035 ? FormatTok->NewlinesBefore > 0 2036 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2037 2038 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2039 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2040 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2041 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2042 addUnwrappedLine(); 2043 return; 2044 } 2045 } 2046 break; 2047 } 2048 case tok::equal: 2049 if ((Style.isJavaScript() || Style.isCSharp()) && 2050 FormatTok->is(TT_FatArrow)) { 2051 tryToParseChildBlock(); 2052 break; 2053 } 2054 2055 nextToken(); 2056 if (FormatTok->is(tok::l_brace)) { 2057 // Block kind should probably be set to BK_BracedInit for any language. 2058 // C# needs this change to ensure that array initialisers and object 2059 // initialisers are indented the same way. 2060 if (Style.isCSharp()) 2061 FormatTok->setBlockKind(BK_BracedInit); 2062 // TableGen's defset statement has syntax of the form, 2063 // `defset <type> <name> = { <statement>... }` 2064 if (Style.isTableGen() && 2065 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2066 FormatTok->setFinalizedType(TT_FunctionLBrace); 2067 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2068 /*MunchSemi=*/false); 2069 addUnwrappedLine(); 2070 break; 2071 } 2072 nextToken(); 2073 parseBracedList(); 2074 } else if (Style.Language == FormatStyle::LK_Proto && 2075 FormatTok->is(tok::less)) { 2076 nextToken(); 2077 parseBracedList(/*IsAngleBracket=*/true); 2078 } 2079 break; 2080 case tok::l_square: 2081 parseSquare(); 2082 break; 2083 case tok::kw_new: 2084 parseNew(); 2085 break; 2086 case tok::kw_switch: 2087 if (Style.Language == FormatStyle::LK_Java) 2088 parseSwitch(/*IsExpr=*/true); 2089 else 2090 nextToken(); 2091 break; 2092 case tok::kw_case: 2093 // Proto: there are no switch/case statements. 2094 if (Style.Language == FormatStyle::LK_Proto) { 2095 nextToken(); 2096 return; 2097 } 2098 // In Verilog switch is called case. 2099 if (Style.isVerilog()) { 2100 parseBlock(); 2101 addUnwrappedLine(); 2102 return; 2103 } 2104 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2105 // 'case: string' field declaration. 2106 nextToken(); 2107 break; 2108 } 2109 parseCaseLabel(); 2110 break; 2111 case tok::kw_default: 2112 nextToken(); 2113 if (Style.isVerilog()) { 2114 if (FormatTok->is(tok::colon)) { 2115 // The label will be handled in the next iteration. 2116 break; 2117 } 2118 if (FormatTok->is(Keywords.kw_clocking)) { 2119 // A default clocking block. 2120 parseBlock(); 2121 addUnwrappedLine(); 2122 return; 2123 } 2124 parseVerilogCaseLabel(); 2125 return; 2126 } 2127 break; 2128 case tok::colon: 2129 nextToken(); 2130 if (Style.isVerilog()) { 2131 parseVerilogCaseLabel(); 2132 return; 2133 } 2134 break; 2135 case tok::greater: 2136 nextToken(); 2137 if (FormatTok->is(tok::l_brace)) 2138 FormatTok->Previous->setFinalizedType(TT_TemplateCloser); 2139 break; 2140 default: 2141 nextToken(); 2142 break; 2143 } 2144 } 2145 } 2146 2147 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2148 assert(FormatTok->is(tok::l_brace)); 2149 if (!Style.isCSharp()) 2150 return false; 2151 // See if it's a property accessor. 2152 if (FormatTok->Previous->isNot(tok::identifier)) 2153 return false; 2154 2155 // See if we are inside a property accessor. 2156 // 2157 // Record the current tokenPosition so that we can advance and 2158 // reset the current token. `Next` is not set yet so we need 2159 // another way to advance along the token stream. 2160 unsigned int StoredPosition = Tokens->getPosition(); 2161 FormatToken *Tok = Tokens->getNextToken(); 2162 2163 // A trivial property accessor is of the form: 2164 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2165 // Track these as they do not require line breaks to be introduced. 2166 bool HasSpecialAccessor = false; 2167 bool IsTrivialPropertyAccessor = true; 2168 while (!eof()) { 2169 if (Tok->isAccessSpecifierKeyword() || 2170 Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get, 2171 Keywords.kw_init, Keywords.kw_set)) { 2172 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2173 HasSpecialAccessor = true; 2174 Tok = Tokens->getNextToken(); 2175 continue; 2176 } 2177 if (Tok->isNot(tok::r_brace)) 2178 IsTrivialPropertyAccessor = false; 2179 break; 2180 } 2181 2182 if (!HasSpecialAccessor) { 2183 Tokens->setPosition(StoredPosition); 2184 return false; 2185 } 2186 2187 // Try to parse the property accessor: 2188 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2189 Tokens->setPosition(StoredPosition); 2190 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2191 addUnwrappedLine(); 2192 nextToken(); 2193 do { 2194 switch (FormatTok->Tok.getKind()) { 2195 case tok::r_brace: 2196 nextToken(); 2197 if (FormatTok->is(tok::equal)) { 2198 while (!eof() && FormatTok->isNot(tok::semi)) 2199 nextToken(); 2200 nextToken(); 2201 } 2202 addUnwrappedLine(); 2203 return true; 2204 case tok::l_brace: 2205 ++Line->Level; 2206 parseBlock(/*MustBeDeclaration=*/true); 2207 addUnwrappedLine(); 2208 --Line->Level; 2209 break; 2210 case tok::equal: 2211 if (FormatTok->is(TT_FatArrow)) { 2212 ++Line->Level; 2213 do { 2214 nextToken(); 2215 } while (!eof() && FormatTok->isNot(tok::semi)); 2216 nextToken(); 2217 addUnwrappedLine(); 2218 --Line->Level; 2219 break; 2220 } 2221 nextToken(); 2222 break; 2223 default: 2224 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2225 Keywords.kw_set) && 2226 !IsTrivialPropertyAccessor) { 2227 // Non-trivial get/set needs to be on its own line. 2228 addUnwrappedLine(); 2229 } 2230 nextToken(); 2231 } 2232 } while (!eof()); 2233 2234 // Unreachable for well-formed code (paired '{' and '}'). 2235 return true; 2236 } 2237 2238 bool UnwrappedLineParser::tryToParseLambda() { 2239 assert(FormatTok->is(tok::l_square)); 2240 if (!IsCpp) { 2241 nextToken(); 2242 return false; 2243 } 2244 FormatToken &LSquare = *FormatTok; 2245 if (!tryToParseLambdaIntroducer()) 2246 return false; 2247 2248 bool SeenArrow = false; 2249 bool InTemplateParameterList = false; 2250 2251 while (FormatTok->isNot(tok::l_brace)) { 2252 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) { 2253 nextToken(); 2254 continue; 2255 } 2256 switch (FormatTok->Tok.getKind()) { 2257 case tok::l_brace: 2258 break; 2259 case tok::l_paren: 2260 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2261 break; 2262 case tok::l_square: 2263 parseSquare(); 2264 break; 2265 case tok::less: 2266 assert(FormatTok->Previous); 2267 if (FormatTok->Previous->is(tok::r_square)) 2268 InTemplateParameterList = true; 2269 nextToken(); 2270 break; 2271 case tok::kw_auto: 2272 case tok::kw_class: 2273 case tok::kw_struct: 2274 case tok::kw_union: 2275 case tok::kw_template: 2276 case tok::kw_typename: 2277 case tok::amp: 2278 case tok::star: 2279 case tok::kw_const: 2280 case tok::kw_constexpr: 2281 case tok::kw_consteval: 2282 case tok::comma: 2283 case tok::greater: 2284 case tok::identifier: 2285 case tok::numeric_constant: 2286 case tok::coloncolon: 2287 case tok::kw_mutable: 2288 case tok::kw_noexcept: 2289 case tok::kw_static: 2290 nextToken(); 2291 break; 2292 // Specialization of a template with an integer parameter can contain 2293 // arithmetic, logical, comparison and ternary operators. 2294 // 2295 // FIXME: This also accepts sequences of operators that are not in the scope 2296 // of a template argument list. 2297 // 2298 // In a C++ lambda a template type can only occur after an arrow. We use 2299 // this as an heuristic to distinguish between Objective-C expressions 2300 // followed by an `a->b` expression, such as: 2301 // ([obj func:arg] + a->b) 2302 // Otherwise the code below would parse as a lambda. 2303 case tok::plus: 2304 case tok::minus: 2305 case tok::exclaim: 2306 case tok::tilde: 2307 case tok::slash: 2308 case tok::percent: 2309 case tok::lessless: 2310 case tok::pipe: 2311 case tok::pipepipe: 2312 case tok::ampamp: 2313 case tok::caret: 2314 case tok::equalequal: 2315 case tok::exclaimequal: 2316 case tok::greaterequal: 2317 case tok::lessequal: 2318 case tok::question: 2319 case tok::colon: 2320 case tok::ellipsis: 2321 case tok::kw_true: 2322 case tok::kw_false: 2323 if (SeenArrow || InTemplateParameterList) { 2324 nextToken(); 2325 break; 2326 } 2327 return true; 2328 case tok::arrow: 2329 // This might or might not actually be a lambda arrow (this could be an 2330 // ObjC method invocation followed by a dereferencing arrow). We might 2331 // reset this back to TT_Unknown in TokenAnnotator. 2332 FormatTok->setFinalizedType(TT_LambdaArrow); 2333 SeenArrow = true; 2334 nextToken(); 2335 break; 2336 case tok::kw_requires: { 2337 auto *RequiresToken = FormatTok; 2338 nextToken(); 2339 parseRequiresClause(RequiresToken); 2340 break; 2341 } 2342 case tok::equal: 2343 if (!InTemplateParameterList) 2344 return true; 2345 nextToken(); 2346 break; 2347 default: 2348 return true; 2349 } 2350 } 2351 2352 FormatTok->setFinalizedType(TT_LambdaLBrace); 2353 LSquare.setFinalizedType(TT_LambdaLSquare); 2354 2355 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2356 parseChildBlock(); 2357 assert(!NestedLambdas.empty()); 2358 NestedLambdas.pop_back(); 2359 2360 return true; 2361 } 2362 2363 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2364 const FormatToken *Previous = FormatTok->Previous; 2365 const FormatToken *LeftSquare = FormatTok; 2366 nextToken(); 2367 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2368 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2369 tok::kw_co_yield, tok::kw_co_return)) || 2370 Previous->closesScope())) || 2371 LeftSquare->isCppStructuredBinding(IsCpp)) { 2372 return false; 2373 } 2374 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2375 return false; 2376 if (FormatTok->is(tok::r_square)) { 2377 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2378 if (Next->is(tok::greater)) 2379 return false; 2380 } 2381 parseSquare(/*LambdaIntroducer=*/true); 2382 return true; 2383 } 2384 2385 void UnwrappedLineParser::tryToParseJSFunction() { 2386 assert(FormatTok->is(Keywords.kw_function)); 2387 if (FormatTok->is(Keywords.kw_async)) 2388 nextToken(); 2389 // Consume "function". 2390 nextToken(); 2391 2392 // Consume * (generator function). Treat it like C++'s overloaded operators. 2393 if (FormatTok->is(tok::star)) { 2394 FormatTok->setFinalizedType(TT_OverloadedOperator); 2395 nextToken(); 2396 } 2397 2398 // Consume function name. 2399 if (FormatTok->is(tok::identifier)) 2400 nextToken(); 2401 2402 if (FormatTok->isNot(tok::l_paren)) 2403 return; 2404 2405 // Parse formal parameter list. 2406 parseParens(); 2407 2408 if (FormatTok->is(tok::colon)) { 2409 // Parse a type definition. 2410 nextToken(); 2411 2412 // Eat the type declaration. For braced inline object types, balance braces, 2413 // otherwise just parse until finding an l_brace for the function body. 2414 if (FormatTok->is(tok::l_brace)) 2415 tryToParseBracedList(); 2416 else 2417 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2418 nextToken(); 2419 } 2420 2421 if (FormatTok->is(tok::semi)) 2422 return; 2423 2424 parseChildBlock(); 2425 } 2426 2427 bool UnwrappedLineParser::tryToParseBracedList() { 2428 if (FormatTok->is(BK_Unknown)) 2429 calculateBraceTypes(); 2430 assert(FormatTok->isNot(BK_Unknown)); 2431 if (FormatTok->is(BK_Block)) 2432 return false; 2433 nextToken(); 2434 parseBracedList(); 2435 return true; 2436 } 2437 2438 bool UnwrappedLineParser::tryToParseChildBlock() { 2439 assert(Style.isJavaScript() || Style.isCSharp()); 2440 assert(FormatTok->is(TT_FatArrow)); 2441 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2442 // They always start an expression or a child block if followed by a curly 2443 // brace. 2444 nextToken(); 2445 if (FormatTok->isNot(tok::l_brace)) 2446 return false; 2447 parseChildBlock(); 2448 return true; 2449 } 2450 2451 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2452 assert(!IsAngleBracket || !IsEnum); 2453 bool HasError = false; 2454 2455 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2456 // replace this by using parseAssignmentExpression() inside. 2457 do { 2458 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2459 tryToParseChildBlock()) { 2460 continue; 2461 } 2462 if (Style.isJavaScript()) { 2463 if (FormatTok->is(Keywords.kw_function)) { 2464 tryToParseJSFunction(); 2465 continue; 2466 } 2467 if (FormatTok->is(tok::l_brace)) { 2468 // Could be a method inside of a braced list `{a() { return 1; }}`. 2469 if (tryToParseBracedList()) 2470 continue; 2471 parseChildBlock(); 2472 } 2473 } 2474 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2475 if (IsEnum) { 2476 FormatTok->setBlockKind(BK_Block); 2477 if (!Style.AllowShortEnumsOnASingleLine) 2478 addUnwrappedLine(); 2479 } 2480 nextToken(); 2481 return !HasError; 2482 } 2483 switch (FormatTok->Tok.getKind()) { 2484 case tok::l_square: 2485 if (Style.isCSharp()) 2486 parseSquare(); 2487 else 2488 tryToParseLambda(); 2489 break; 2490 case tok::l_paren: 2491 parseParens(); 2492 // JavaScript can just have free standing methods and getters/setters in 2493 // object literals. Detect them by a "{" following ")". 2494 if (Style.isJavaScript()) { 2495 if (FormatTok->is(tok::l_brace)) 2496 parseChildBlock(); 2497 break; 2498 } 2499 break; 2500 case tok::l_brace: 2501 // Assume there are no blocks inside a braced init list apart 2502 // from the ones we explicitly parse out (like lambdas). 2503 FormatTok->setBlockKind(BK_BracedInit); 2504 if (!IsAngleBracket) { 2505 auto *Prev = FormatTok->Previous; 2506 if (Prev && Prev->is(tok::greater)) 2507 Prev->setFinalizedType(TT_TemplateCloser); 2508 } 2509 nextToken(); 2510 parseBracedList(); 2511 break; 2512 case tok::less: 2513 nextToken(); 2514 if (IsAngleBracket) 2515 parseBracedList(/*IsAngleBracket=*/true); 2516 break; 2517 case tok::semi: 2518 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2519 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2520 // used for error recovery if we have otherwise determined that this is 2521 // a braced list. 2522 if (Style.isJavaScript()) { 2523 nextToken(); 2524 break; 2525 } 2526 HasError = true; 2527 if (!IsEnum) 2528 return false; 2529 nextToken(); 2530 break; 2531 case tok::comma: 2532 nextToken(); 2533 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2534 addUnwrappedLine(); 2535 break; 2536 default: 2537 nextToken(); 2538 break; 2539 } 2540 } while (!eof()); 2541 return false; 2542 } 2543 2544 /// \brief Parses a pair of parentheses (and everything between them). 2545 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2546 /// double ampersands. This applies for all nested scopes as well. 2547 /// 2548 /// Returns whether there is a `=` token between the parentheses. 2549 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2550 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2551 auto *LeftParen = FormatTok; 2552 bool SeenEqual = false; 2553 bool MightBeFoldExpr = false; 2554 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2555 nextToken(); 2556 do { 2557 switch (FormatTok->Tok.getKind()) { 2558 case tok::l_paren: 2559 if (parseParens(AmpAmpTokenType)) 2560 SeenEqual = true; 2561 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2562 parseChildBlock(); 2563 break; 2564 case tok::r_paren: { 2565 auto *Prev = LeftParen->Previous; 2566 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && 2567 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2568 const auto *Next = Tokens->peekNextToken(); 2569 const bool DoubleParens = 2570 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2571 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2572 const bool Blacklisted = 2573 PrevPrev && 2574 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2575 (SeenEqual && 2576 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2577 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2578 const bool ReturnParens = 2579 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2580 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2581 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2582 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2583 Next->is(tok::semi); 2584 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2585 LeftParen->Optional = true; 2586 FormatTok->Optional = true; 2587 } 2588 } 2589 if (Prev) { 2590 if (Prev->is(TT_TypenameMacro)) { 2591 LeftParen->setFinalizedType(TT_TypeDeclarationParen); 2592 FormatTok->setFinalizedType(TT_TypeDeclarationParen); 2593 } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) { 2594 Prev->setFinalizedType(TT_TemplateCloser); 2595 } 2596 } 2597 nextToken(); 2598 return SeenEqual; 2599 } 2600 case tok::r_brace: 2601 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2602 return SeenEqual; 2603 case tok::l_square: 2604 tryToParseLambda(); 2605 break; 2606 case tok::l_brace: 2607 if (!tryToParseBracedList()) 2608 parseChildBlock(); 2609 break; 2610 case tok::at: 2611 nextToken(); 2612 if (FormatTok->is(tok::l_brace)) { 2613 nextToken(); 2614 parseBracedList(); 2615 } 2616 break; 2617 case tok::ellipsis: 2618 MightBeFoldExpr = true; 2619 nextToken(); 2620 break; 2621 case tok::equal: 2622 SeenEqual = true; 2623 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2624 tryToParseChildBlock(); 2625 else 2626 nextToken(); 2627 break; 2628 case tok::kw_class: 2629 if (Style.isJavaScript()) 2630 parseRecord(/*ParseAsExpr=*/true); 2631 else 2632 nextToken(); 2633 break; 2634 case tok::identifier: 2635 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2636 tryToParseJSFunction(); 2637 else 2638 nextToken(); 2639 break; 2640 case tok::kw_switch: 2641 if (Style.Language == FormatStyle::LK_Java) 2642 parseSwitch(/*IsExpr=*/true); 2643 else 2644 nextToken(); 2645 break; 2646 case tok::kw_requires: { 2647 auto RequiresToken = FormatTok; 2648 nextToken(); 2649 parseRequiresExpression(RequiresToken); 2650 break; 2651 } 2652 case tok::ampamp: 2653 if (AmpAmpTokenType != TT_Unknown) 2654 FormatTok->setFinalizedType(AmpAmpTokenType); 2655 [[fallthrough]]; 2656 default: 2657 nextToken(); 2658 break; 2659 } 2660 } while (!eof()); 2661 return SeenEqual; 2662 } 2663 2664 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2665 if (!LambdaIntroducer) { 2666 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2667 if (tryToParseLambda()) 2668 return; 2669 } 2670 do { 2671 switch (FormatTok->Tok.getKind()) { 2672 case tok::l_paren: 2673 parseParens(); 2674 break; 2675 case tok::r_square: 2676 nextToken(); 2677 return; 2678 case tok::r_brace: 2679 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2680 return; 2681 case tok::l_square: 2682 parseSquare(); 2683 break; 2684 case tok::l_brace: { 2685 if (!tryToParseBracedList()) 2686 parseChildBlock(); 2687 break; 2688 } 2689 case tok::at: 2690 case tok::colon: 2691 nextToken(); 2692 if (FormatTok->is(tok::l_brace)) { 2693 nextToken(); 2694 parseBracedList(); 2695 } 2696 break; 2697 default: 2698 nextToken(); 2699 break; 2700 } 2701 } while (!eof()); 2702 } 2703 2704 void UnwrappedLineParser::keepAncestorBraces() { 2705 if (!Style.RemoveBracesLLVM) 2706 return; 2707 2708 const int MaxNestingLevels = 2; 2709 const int Size = NestedTooDeep.size(); 2710 if (Size >= MaxNestingLevels) 2711 NestedTooDeep[Size - MaxNestingLevels] = true; 2712 NestedTooDeep.push_back(false); 2713 } 2714 2715 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2716 for (const auto &Token : llvm::reverse(Line.Tokens)) 2717 if (Token.Tok->isNot(tok::comment)) 2718 return Token.Tok; 2719 2720 return nullptr; 2721 } 2722 2723 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2724 FormatToken *Tok = nullptr; 2725 2726 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2727 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2728 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2729 ? getLastNonComment(*Line) 2730 : Line->Tokens.back().Tok; 2731 assert(Tok); 2732 if (Tok->BraceCount < 0) { 2733 assert(Tok->BraceCount == -1); 2734 Tok = nullptr; 2735 } else { 2736 Tok->BraceCount = -1; 2737 } 2738 } 2739 2740 addUnwrappedLine(); 2741 ++Line->Level; 2742 ++Line->UnbracedBodyLevel; 2743 parseStructuralElement(); 2744 --Line->UnbracedBodyLevel; 2745 2746 if (Tok) { 2747 assert(!Line->InPPDirective); 2748 Tok = nullptr; 2749 for (const auto &L : llvm::reverse(*CurrentLines)) { 2750 if (!L.InPPDirective && getLastNonComment(L)) { 2751 Tok = L.Tokens.back().Tok; 2752 break; 2753 } 2754 } 2755 assert(Tok); 2756 ++Tok->BraceCount; 2757 } 2758 2759 if (CheckEOF && eof()) 2760 addUnwrappedLine(); 2761 2762 --Line->Level; 2763 } 2764 2765 static void markOptionalBraces(FormatToken *LeftBrace) { 2766 if (!LeftBrace) 2767 return; 2768 2769 assert(LeftBrace->is(tok::l_brace)); 2770 2771 FormatToken *RightBrace = LeftBrace->MatchingParen; 2772 if (!RightBrace) { 2773 assert(!LeftBrace->Optional); 2774 return; 2775 } 2776 2777 assert(RightBrace->is(tok::r_brace)); 2778 assert(RightBrace->MatchingParen == LeftBrace); 2779 assert(LeftBrace->Optional == RightBrace->Optional); 2780 2781 LeftBrace->Optional = true; 2782 RightBrace->Optional = true; 2783 } 2784 2785 void UnwrappedLineParser::handleAttributes() { 2786 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2787 if (FormatTok->isAttribute()) 2788 nextToken(); 2789 else if (FormatTok->is(tok::l_square)) 2790 handleCppAttributes(); 2791 } 2792 2793 bool UnwrappedLineParser::handleCppAttributes() { 2794 // Handle [[likely]] / [[unlikely]] attributes. 2795 assert(FormatTok->is(tok::l_square)); 2796 if (!tryToParseSimpleAttribute()) 2797 return false; 2798 parseSquare(); 2799 return true; 2800 } 2801 2802 /// Returns whether \c Tok begins a block. 2803 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2804 // FIXME: rename the function or make 2805 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2806 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2807 : Tok.is(tok::l_brace); 2808 } 2809 2810 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2811 bool KeepBraces, 2812 bool IsVerilogAssert) { 2813 assert((FormatTok->is(tok::kw_if) || 2814 (Style.isVerilog() && 2815 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2816 Keywords.kw_assume, Keywords.kw_cover))) && 2817 "'if' expected"); 2818 nextToken(); 2819 2820 if (IsVerilogAssert) { 2821 // Handle `assert #0` and `assert final`. 2822 if (FormatTok->is(Keywords.kw_verilogHash)) { 2823 nextToken(); 2824 if (FormatTok->is(tok::numeric_constant)) 2825 nextToken(); 2826 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2827 Keywords.kw_sequence)) { 2828 nextToken(); 2829 } 2830 } 2831 2832 // TableGen's if statement has the form of `if <cond> then { ... }`. 2833 if (Style.isTableGen()) { 2834 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2835 // Simply skip until then. This range only contains a value. 2836 nextToken(); 2837 } 2838 } 2839 2840 // Handle `if !consteval`. 2841 if (FormatTok->is(tok::exclaim)) 2842 nextToken(); 2843 2844 bool KeepIfBraces = true; 2845 if (FormatTok->is(tok::kw_consteval)) { 2846 nextToken(); 2847 } else { 2848 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2849 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2850 nextToken(); 2851 if (FormatTok->is(tok::l_paren)) { 2852 FormatTok->setFinalizedType(TT_ConditionLParen); 2853 parseParens(); 2854 } 2855 } 2856 handleAttributes(); 2857 // The then action is optional in Verilog assert statements. 2858 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2859 nextToken(); 2860 addUnwrappedLine(); 2861 return nullptr; 2862 } 2863 2864 bool NeedsUnwrappedLine = false; 2865 keepAncestorBraces(); 2866 2867 FormatToken *IfLeftBrace = nullptr; 2868 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2869 2870 if (isBlockBegin(*FormatTok)) { 2871 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2872 IfLeftBrace = FormatTok; 2873 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2874 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2875 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2876 setPreviousRBraceType(TT_ControlStatementRBrace); 2877 if (Style.BraceWrapping.BeforeElse) 2878 addUnwrappedLine(); 2879 else 2880 NeedsUnwrappedLine = true; 2881 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2882 addUnwrappedLine(); 2883 } else { 2884 parseUnbracedBody(); 2885 } 2886 2887 if (Style.RemoveBracesLLVM) { 2888 assert(!NestedTooDeep.empty()); 2889 KeepIfBraces = KeepIfBraces || 2890 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2891 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2892 IfBlockKind == IfStmtKind::IfElseIf; 2893 } 2894 2895 bool KeepElseBraces = KeepIfBraces; 2896 FormatToken *ElseLeftBrace = nullptr; 2897 IfStmtKind Kind = IfStmtKind::IfOnly; 2898 2899 if (FormatTok->is(tok::kw_else)) { 2900 if (Style.RemoveBracesLLVM) { 2901 NestedTooDeep.back() = false; 2902 Kind = IfStmtKind::IfElse; 2903 } 2904 nextToken(); 2905 handleAttributes(); 2906 if (isBlockBegin(*FormatTok)) { 2907 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2908 FormatTok->setFinalizedType(TT_ElseLBrace); 2909 ElseLeftBrace = FormatTok; 2910 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2911 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2912 FormatToken *IfLBrace = 2913 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2914 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2915 setPreviousRBraceType(TT_ElseRBrace); 2916 if (FormatTok->is(tok::kw_else)) { 2917 KeepElseBraces = KeepElseBraces || 2918 ElseBlockKind == IfStmtKind::IfOnly || 2919 ElseBlockKind == IfStmtKind::IfElseIf; 2920 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2921 KeepElseBraces = true; 2922 assert(ElseLeftBrace->MatchingParen); 2923 markOptionalBraces(ElseLeftBrace); 2924 } 2925 addUnwrappedLine(); 2926 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2927 const FormatToken *Previous = Tokens->getPreviousToken(); 2928 assert(Previous); 2929 const bool IsPrecededByComment = Previous->is(tok::comment); 2930 if (IsPrecededByComment) { 2931 addUnwrappedLine(); 2932 ++Line->Level; 2933 } 2934 bool TooDeep = true; 2935 if (Style.RemoveBracesLLVM) { 2936 Kind = IfStmtKind::IfElseIf; 2937 TooDeep = NestedTooDeep.pop_back_val(); 2938 } 2939 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2940 if (Style.RemoveBracesLLVM) 2941 NestedTooDeep.push_back(TooDeep); 2942 if (IsPrecededByComment) 2943 --Line->Level; 2944 } else { 2945 parseUnbracedBody(/*CheckEOF=*/true); 2946 } 2947 } else { 2948 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2949 if (NeedsUnwrappedLine) 2950 addUnwrappedLine(); 2951 } 2952 2953 if (!Style.RemoveBracesLLVM) 2954 return nullptr; 2955 2956 assert(!NestedTooDeep.empty()); 2957 KeepElseBraces = KeepElseBraces || 2958 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2959 NestedTooDeep.back(); 2960 2961 NestedTooDeep.pop_back(); 2962 2963 if (!KeepIfBraces && !KeepElseBraces) { 2964 markOptionalBraces(IfLeftBrace); 2965 markOptionalBraces(ElseLeftBrace); 2966 } else if (IfLeftBrace) { 2967 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2968 if (IfRightBrace) { 2969 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2970 assert(!IfLeftBrace->Optional); 2971 assert(!IfRightBrace->Optional); 2972 IfLeftBrace->MatchingParen = nullptr; 2973 IfRightBrace->MatchingParen = nullptr; 2974 } 2975 } 2976 2977 if (IfKind) 2978 *IfKind = Kind; 2979 2980 return IfLeftBrace; 2981 } 2982 2983 void UnwrappedLineParser::parseTryCatch() { 2984 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2985 nextToken(); 2986 bool NeedsUnwrappedLine = false; 2987 bool HasCtorInitializer = false; 2988 if (FormatTok->is(tok::colon)) { 2989 auto *Colon = FormatTok; 2990 // We are in a function try block, what comes is an initializer list. 2991 nextToken(); 2992 if (FormatTok->is(tok::identifier)) { 2993 HasCtorInitializer = true; 2994 Colon->setFinalizedType(TT_CtorInitializerColon); 2995 } 2996 2997 // In case identifiers were removed by clang-tidy, what might follow is 2998 // multiple commas in sequence - before the first identifier. 2999 while (FormatTok->is(tok::comma)) 3000 nextToken(); 3001 3002 while (FormatTok->is(tok::identifier)) { 3003 nextToken(); 3004 if (FormatTok->is(tok::l_paren)) { 3005 parseParens(); 3006 } else if (FormatTok->is(tok::l_brace)) { 3007 nextToken(); 3008 parseBracedList(); 3009 } 3010 3011 // In case identifiers were removed by clang-tidy, what might follow is 3012 // multiple commas in sequence - after the first identifier. 3013 while (FormatTok->is(tok::comma)) 3014 nextToken(); 3015 } 3016 } 3017 // Parse try with resource. 3018 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 3019 parseParens(); 3020 3021 keepAncestorBraces(); 3022 3023 if (FormatTok->is(tok::l_brace)) { 3024 if (HasCtorInitializer) 3025 FormatTok->setFinalizedType(TT_FunctionLBrace); 3026 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3027 parseBlock(); 3028 if (Style.BraceWrapping.BeforeCatch) 3029 addUnwrappedLine(); 3030 else 3031 NeedsUnwrappedLine = true; 3032 } else if (FormatTok->isNot(tok::kw_catch)) { 3033 // The C++ standard requires a compound-statement after a try. 3034 // If there's none, we try to assume there's a structuralElement 3035 // and try to continue. 3036 addUnwrappedLine(); 3037 ++Line->Level; 3038 parseStructuralElement(); 3039 --Line->Level; 3040 } 3041 while (true) { 3042 if (FormatTok->is(tok::at)) 3043 nextToken(); 3044 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 3045 tok::kw___finally) || 3046 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3047 FormatTok->is(Keywords.kw_finally)) || 3048 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 3049 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 3050 break; 3051 } 3052 nextToken(); 3053 while (FormatTok->isNot(tok::l_brace)) { 3054 if (FormatTok->is(tok::l_paren)) { 3055 parseParens(); 3056 continue; 3057 } 3058 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 3059 if (Style.RemoveBracesLLVM) 3060 NestedTooDeep.pop_back(); 3061 return; 3062 } 3063 nextToken(); 3064 } 3065 NeedsUnwrappedLine = false; 3066 Line->MustBeDeclaration = false; 3067 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3068 parseBlock(); 3069 if (Style.BraceWrapping.BeforeCatch) 3070 addUnwrappedLine(); 3071 else 3072 NeedsUnwrappedLine = true; 3073 } 3074 3075 if (Style.RemoveBracesLLVM) 3076 NestedTooDeep.pop_back(); 3077 3078 if (NeedsUnwrappedLine) 3079 addUnwrappedLine(); 3080 } 3081 3082 void UnwrappedLineParser::parseNamespace() { 3083 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3084 "'namespace' expected"); 3085 3086 const FormatToken &InitialToken = *FormatTok; 3087 nextToken(); 3088 if (InitialToken.is(TT_NamespaceMacro)) { 3089 parseParens(); 3090 } else { 3091 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3092 tok::l_square, tok::period, tok::l_paren) || 3093 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3094 if (FormatTok->is(tok::l_square)) 3095 parseSquare(); 3096 else if (FormatTok->is(tok::l_paren)) 3097 parseParens(); 3098 else 3099 nextToken(); 3100 } 3101 } 3102 if (FormatTok->is(tok::l_brace)) { 3103 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3104 3105 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3106 addUnwrappedLine(); 3107 3108 unsigned AddLevels = 3109 Style.NamespaceIndentation == FormatStyle::NI_All || 3110 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3111 DeclarationScopeStack.size() > 1) 3112 ? 1u 3113 : 0u; 3114 bool ManageWhitesmithsBraces = 3115 AddLevels == 0u && 3116 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3117 3118 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3119 // the whole block. 3120 if (ManageWhitesmithsBraces) 3121 ++Line->Level; 3122 3123 // Munch the semicolon after a namespace. This is more common than one would 3124 // think. Putting the semicolon into its own line is very ugly. 3125 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3126 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3127 ManageWhitesmithsBraces); 3128 3129 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3130 3131 if (ManageWhitesmithsBraces) 3132 --Line->Level; 3133 } 3134 // FIXME: Add error handling. 3135 } 3136 3137 void UnwrappedLineParser::parseNew() { 3138 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3139 nextToken(); 3140 3141 if (Style.isCSharp()) { 3142 do { 3143 // Handle constructor invocation, e.g. `new(field: value)`. 3144 if (FormatTok->is(tok::l_paren)) 3145 parseParens(); 3146 3147 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3148 if (FormatTok->is(tok::l_brace)) 3149 parseBracedList(); 3150 3151 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3152 return; 3153 3154 nextToken(); 3155 } while (!eof()); 3156 } 3157 3158 if (Style.Language != FormatStyle::LK_Java) 3159 return; 3160 3161 // In Java, we can parse everything up to the parens, which aren't optional. 3162 do { 3163 // There should not be a ;, { or } before the new's open paren. 3164 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3165 return; 3166 3167 // Consume the parens. 3168 if (FormatTok->is(tok::l_paren)) { 3169 parseParens(); 3170 3171 // If there is a class body of an anonymous class, consume that as child. 3172 if (FormatTok->is(tok::l_brace)) 3173 parseChildBlock(); 3174 return; 3175 } 3176 nextToken(); 3177 } while (!eof()); 3178 } 3179 3180 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3181 keepAncestorBraces(); 3182 3183 if (isBlockBegin(*FormatTok)) { 3184 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3185 FormatToken *LeftBrace = FormatTok; 3186 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3187 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3188 /*MunchSemi=*/true, KeepBraces); 3189 setPreviousRBraceType(TT_ControlStatementRBrace); 3190 if (!KeepBraces) { 3191 assert(!NestedTooDeep.empty()); 3192 if (!NestedTooDeep.back()) 3193 markOptionalBraces(LeftBrace); 3194 } 3195 if (WrapRightBrace) 3196 addUnwrappedLine(); 3197 } else { 3198 parseUnbracedBody(); 3199 } 3200 3201 if (!KeepBraces) 3202 NestedTooDeep.pop_back(); 3203 } 3204 3205 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3206 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3207 (Style.isVerilog() && 3208 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3209 Keywords.kw_always_ff, Keywords.kw_always_latch, 3210 Keywords.kw_final, Keywords.kw_initial, 3211 Keywords.kw_foreach, Keywords.kw_forever, 3212 Keywords.kw_repeat))) && 3213 "'for', 'while' or foreach macro expected"); 3214 const bool KeepBraces = !Style.RemoveBracesLLVM || 3215 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3216 3217 nextToken(); 3218 // JS' for await ( ... 3219 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3220 nextToken(); 3221 if (IsCpp && FormatTok->is(tok::kw_co_await)) 3222 nextToken(); 3223 if (HasParens && FormatTok->is(tok::l_paren)) { 3224 // The type is only set for Verilog basically because we were afraid to 3225 // change the existing behavior for loops. See the discussion on D121756 for 3226 // details. 3227 if (Style.isVerilog()) 3228 FormatTok->setFinalizedType(TT_ConditionLParen); 3229 parseParens(); 3230 } 3231 3232 if (Style.isVerilog()) { 3233 // Event control. 3234 parseVerilogSensitivityList(); 3235 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3236 Tokens->getPreviousToken()->is(tok::r_paren)) { 3237 nextToken(); 3238 addUnwrappedLine(); 3239 return; 3240 } 3241 3242 handleAttributes(); 3243 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3244 } 3245 3246 void UnwrappedLineParser::parseDoWhile() { 3247 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3248 nextToken(); 3249 3250 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3251 3252 // FIXME: Add error handling. 3253 if (FormatTok->isNot(tok::kw_while)) { 3254 addUnwrappedLine(); 3255 return; 3256 } 3257 3258 FormatTok->setFinalizedType(TT_DoWhile); 3259 3260 // If in Whitesmiths mode, the line with the while() needs to be indented 3261 // to the same level as the block. 3262 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3263 ++Line->Level; 3264 3265 nextToken(); 3266 parseStructuralElement(); 3267 } 3268 3269 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3270 nextToken(); 3271 unsigned OldLineLevel = Line->Level; 3272 3273 if (LeftAlignLabel) 3274 Line->Level = 0; 3275 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3276 --Line->Level; 3277 3278 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3279 FormatTok->is(tok::l_brace)) { 3280 3281 CompoundStatementIndenter Indenter(this, Line->Level, 3282 Style.BraceWrapping.AfterCaseLabel, 3283 Style.BraceWrapping.IndentBraces); 3284 parseBlock(); 3285 if (FormatTok->is(tok::kw_break)) { 3286 if (Style.BraceWrapping.AfterControlStatement == 3287 FormatStyle::BWACS_Always) { 3288 addUnwrappedLine(); 3289 if (!Style.IndentCaseBlocks && 3290 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3291 ++Line->Level; 3292 } 3293 } 3294 parseStructuralElement(); 3295 } 3296 addUnwrappedLine(); 3297 } else { 3298 if (FormatTok->is(tok::semi)) 3299 nextToken(); 3300 addUnwrappedLine(); 3301 } 3302 Line->Level = OldLineLevel; 3303 if (FormatTok->isNot(tok::l_brace)) { 3304 parseStructuralElement(); 3305 addUnwrappedLine(); 3306 } 3307 } 3308 3309 void UnwrappedLineParser::parseCaseLabel() { 3310 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3311 auto *Case = FormatTok; 3312 3313 // FIXME: fix handling of complex expressions here. 3314 do { 3315 nextToken(); 3316 if (FormatTok->is(tok::colon)) { 3317 FormatTok->setFinalizedType(TT_CaseLabelColon); 3318 break; 3319 } 3320 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) { 3321 FormatTok->setFinalizedType(TT_CaseLabelArrow); 3322 Case->setFinalizedType(TT_SwitchExpressionLabel); 3323 break; 3324 } 3325 } while (!eof()); 3326 parseLabel(); 3327 } 3328 3329 void UnwrappedLineParser::parseSwitch(bool IsExpr) { 3330 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3331 nextToken(); 3332 if (FormatTok->is(tok::l_paren)) 3333 parseParens(); 3334 3335 keepAncestorBraces(); 3336 3337 if (FormatTok->is(tok::l_brace)) { 3338 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3339 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace 3340 : TT_ControlStatementLBrace); 3341 if (IsExpr) 3342 parseChildBlock(); 3343 else 3344 parseBlock(); 3345 setPreviousRBraceType(TT_ControlStatementRBrace); 3346 if (!IsExpr) 3347 addUnwrappedLine(); 3348 } else { 3349 addUnwrappedLine(); 3350 ++Line->Level; 3351 parseStructuralElement(); 3352 --Line->Level; 3353 } 3354 3355 if (Style.RemoveBracesLLVM) 3356 NestedTooDeep.pop_back(); 3357 } 3358 3359 // Operators that can follow a C variable. 3360 static bool isCOperatorFollowingVar(tok::TokenKind Kind) { 3361 switch (Kind) { 3362 case tok::ampamp: 3363 case tok::ampequal: 3364 case tok::arrow: 3365 case tok::caret: 3366 case tok::caretequal: 3367 case tok::comma: 3368 case tok::ellipsis: 3369 case tok::equal: 3370 case tok::equalequal: 3371 case tok::exclaim: 3372 case tok::exclaimequal: 3373 case tok::greater: 3374 case tok::greaterequal: 3375 case tok::greatergreater: 3376 case tok::greatergreaterequal: 3377 case tok::l_paren: 3378 case tok::l_square: 3379 case tok::less: 3380 case tok::lessequal: 3381 case tok::lessless: 3382 case tok::lesslessequal: 3383 case tok::minus: 3384 case tok::minusequal: 3385 case tok::minusminus: 3386 case tok::percent: 3387 case tok::percentequal: 3388 case tok::period: 3389 case tok::pipe: 3390 case tok::pipeequal: 3391 case tok::pipepipe: 3392 case tok::plus: 3393 case tok::plusequal: 3394 case tok::plusplus: 3395 case tok::question: 3396 case tok::r_brace: 3397 case tok::r_paren: 3398 case tok::r_square: 3399 case tok::semi: 3400 case tok::slash: 3401 case tok::slashequal: 3402 case tok::star: 3403 case tok::starequal: 3404 return true; 3405 default: 3406 return false; 3407 } 3408 } 3409 3410 void UnwrappedLineParser::parseAccessSpecifier() { 3411 FormatToken *AccessSpecifierCandidate = FormatTok; 3412 nextToken(); 3413 // Understand Qt's slots. 3414 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3415 nextToken(); 3416 // Otherwise, we don't know what it is, and we'd better keep the next token. 3417 if (FormatTok->is(tok::colon)) { 3418 nextToken(); 3419 addUnwrappedLine(); 3420 } else if (FormatTok->isNot(tok::coloncolon) && 3421 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3422 // Not a variable name nor namespace name. 3423 addUnwrappedLine(); 3424 } else if (AccessSpecifierCandidate) { 3425 // Consider the access specifier to be a C identifier. 3426 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3427 } 3428 } 3429 3430 /// \brief Parses a requires, decides if it is a clause or an expression. 3431 /// \pre The current token has to be the requires keyword. 3432 /// \returns true if it parsed a clause. 3433 bool UnwrappedLineParser::parseRequires() { 3434 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3435 auto RequiresToken = FormatTok; 3436 3437 // We try to guess if it is a requires clause, or a requires expression. For 3438 // that we first consume the keyword and check the next token. 3439 nextToken(); 3440 3441 switch (FormatTok->Tok.getKind()) { 3442 case tok::l_brace: 3443 // This can only be an expression, never a clause. 3444 parseRequiresExpression(RequiresToken); 3445 return false; 3446 case tok::l_paren: 3447 // Clauses and expression can start with a paren, it's unclear what we have. 3448 break; 3449 default: 3450 // All other tokens can only be a clause. 3451 parseRequiresClause(RequiresToken); 3452 return true; 3453 } 3454 3455 // Looking forward we would have to decide if there are function declaration 3456 // like arguments to the requires expression: 3457 // requires (T t) { 3458 // Or there is a constraint expression for the requires clause: 3459 // requires (C<T> && ... 3460 3461 // But first let's look behind. 3462 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3463 3464 if (!PreviousNonComment || 3465 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3466 // If there is no token, or an expression left brace, we are a requires 3467 // clause within a requires expression. 3468 parseRequiresClause(RequiresToken); 3469 return true; 3470 } 3471 3472 switch (PreviousNonComment->Tok.getKind()) { 3473 case tok::greater: 3474 case tok::r_paren: 3475 case tok::kw_noexcept: 3476 case tok::kw_const: 3477 // This is a requires clause. 3478 parseRequiresClause(RequiresToken); 3479 return true; 3480 case tok::amp: 3481 case tok::ampamp: { 3482 // This can be either: 3483 // if (... && requires (T t) ...) 3484 // Or 3485 // void member(...) && requires (C<T> ... 3486 // We check the one token before that for a const: 3487 // void member(...) const && requires (C<T> ... 3488 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3489 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3490 parseRequiresClause(RequiresToken); 3491 return true; 3492 } 3493 break; 3494 } 3495 default: 3496 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { 3497 // This is a requires clause. 3498 parseRequiresClause(RequiresToken); 3499 return true; 3500 } 3501 // It's an expression. 3502 parseRequiresExpression(RequiresToken); 3503 return false; 3504 } 3505 3506 // Now we look forward and try to check if the paren content is a parameter 3507 // list. The parameters can be cv-qualified and contain references or 3508 // pointers. 3509 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3510 // of stuff: typename, const, *, &, &&, ::, identifiers. 3511 3512 unsigned StoredPosition = Tokens->getPosition(); 3513 FormatToken *NextToken = Tokens->getNextToken(); 3514 int Lookahead = 0; 3515 auto PeekNext = [&Lookahead, &NextToken, this] { 3516 ++Lookahead; 3517 NextToken = Tokens->getNextToken(); 3518 }; 3519 3520 bool FoundType = false; 3521 bool LastWasColonColon = false; 3522 int OpenAngles = 0; 3523 3524 for (; Lookahead < 50; PeekNext()) { 3525 switch (NextToken->Tok.getKind()) { 3526 case tok::kw_volatile: 3527 case tok::kw_const: 3528 case tok::comma: 3529 if (OpenAngles == 0) { 3530 FormatTok = Tokens->setPosition(StoredPosition); 3531 parseRequiresExpression(RequiresToken); 3532 return false; 3533 } 3534 break; 3535 case tok::eof: 3536 // Break out of the loop. 3537 Lookahead = 50; 3538 break; 3539 case tok::coloncolon: 3540 LastWasColonColon = true; 3541 break; 3542 case tok::kw_decltype: 3543 case tok::identifier: 3544 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3545 FormatTok = Tokens->setPosition(StoredPosition); 3546 parseRequiresExpression(RequiresToken); 3547 return false; 3548 } 3549 FoundType = true; 3550 LastWasColonColon = false; 3551 break; 3552 case tok::less: 3553 ++OpenAngles; 3554 break; 3555 case tok::greater: 3556 --OpenAngles; 3557 break; 3558 default: 3559 if (NextToken->isTypeName(LangOpts)) { 3560 FormatTok = Tokens->setPosition(StoredPosition); 3561 parseRequiresExpression(RequiresToken); 3562 return false; 3563 } 3564 break; 3565 } 3566 } 3567 // This seems to be a complicated expression, just assume it's a clause. 3568 FormatTok = Tokens->setPosition(StoredPosition); 3569 parseRequiresClause(RequiresToken); 3570 return true; 3571 } 3572 3573 /// \brief Parses a requires clause. 3574 /// \param RequiresToken The requires keyword token, which starts this clause. 3575 /// \pre We need to be on the next token after the requires keyword. 3576 /// \sa parseRequiresExpression 3577 /// 3578 /// Returns if it either has finished parsing the clause, or it detects, that 3579 /// the clause is incorrect. 3580 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3581 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3582 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3583 3584 // If there is no previous token, we are within a requires expression, 3585 // otherwise we will always have the template or function declaration in front 3586 // of it. 3587 bool InRequiresExpression = 3588 !RequiresToken->Previous || 3589 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3590 3591 RequiresToken->setFinalizedType(InRequiresExpression 3592 ? TT_RequiresClauseInARequiresExpression 3593 : TT_RequiresClause); 3594 3595 // NOTE: parseConstraintExpression is only ever called from this function. 3596 // It could be inlined into here. 3597 parseConstraintExpression(); 3598 3599 if (!InRequiresExpression) 3600 FormatTok->Previous->ClosesRequiresClause = true; 3601 } 3602 3603 /// \brief Parses a requires expression. 3604 /// \param RequiresToken The requires keyword token, which starts this clause. 3605 /// \pre We need to be on the next token after the requires keyword. 3606 /// \sa parseRequiresClause 3607 /// 3608 /// Returns if it either has finished parsing the expression, or it detects, 3609 /// that the expression is incorrect. 3610 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3611 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3612 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3613 3614 RequiresToken->setFinalizedType(TT_RequiresExpression); 3615 3616 if (FormatTok->is(tok::l_paren)) { 3617 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3618 parseParens(); 3619 } 3620 3621 if (FormatTok->is(tok::l_brace)) { 3622 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3623 parseChildBlock(); 3624 } 3625 } 3626 3627 /// \brief Parses a constraint expression. 3628 /// 3629 /// This is the body of a requires clause. It returns, when the parsing is 3630 /// complete, or the expression is incorrect. 3631 void UnwrappedLineParser::parseConstraintExpression() { 3632 // The special handling for lambdas is needed since tryToParseLambda() eats a 3633 // token and if a requires expression is the last part of a requires clause 3634 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3635 // not set on the correct token. Thus we need to be aware if we even expect a 3636 // lambda to be possible. 3637 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3638 bool LambdaNextTimeAllowed = true; 3639 3640 // Within lambda declarations, it is permitted to put a requires clause after 3641 // its template parameter list, which would place the requires clause right 3642 // before the parentheses of the parameters of the lambda declaration. Thus, 3643 // we track if we expect to see grouping parentheses at all. 3644 // Without this check, `requires foo<T> (T t)` in the below example would be 3645 // seen as the whole requires clause, accidentally eating the parameters of 3646 // the lambda. 3647 // [&]<typename T> requires foo<T> (T t) { ... }; 3648 bool TopLevelParensAllowed = true; 3649 3650 do { 3651 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3652 3653 switch (FormatTok->Tok.getKind()) { 3654 case tok::kw_requires: { 3655 auto RequiresToken = FormatTok; 3656 nextToken(); 3657 parseRequiresExpression(RequiresToken); 3658 break; 3659 } 3660 3661 case tok::l_paren: 3662 if (!TopLevelParensAllowed) 3663 return; 3664 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3665 TopLevelParensAllowed = false; 3666 break; 3667 3668 case tok::l_square: 3669 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3670 return; 3671 break; 3672 3673 case tok::kw_const: 3674 case tok::semi: 3675 case tok::kw_class: 3676 case tok::kw_struct: 3677 case tok::kw_union: 3678 return; 3679 3680 case tok::l_brace: 3681 // Potential function body. 3682 return; 3683 3684 case tok::ampamp: 3685 case tok::pipepipe: 3686 FormatTok->setFinalizedType(TT_BinaryOperator); 3687 nextToken(); 3688 LambdaNextTimeAllowed = true; 3689 TopLevelParensAllowed = true; 3690 break; 3691 3692 case tok::comma: 3693 case tok::comment: 3694 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3695 nextToken(); 3696 break; 3697 3698 case tok::kw_sizeof: 3699 case tok::greater: 3700 case tok::greaterequal: 3701 case tok::greatergreater: 3702 case tok::less: 3703 case tok::lessequal: 3704 case tok::lessless: 3705 case tok::equalequal: 3706 case tok::exclaim: 3707 case tok::exclaimequal: 3708 case tok::plus: 3709 case tok::minus: 3710 case tok::star: 3711 case tok::slash: 3712 LambdaNextTimeAllowed = true; 3713 TopLevelParensAllowed = true; 3714 // Just eat them. 3715 nextToken(); 3716 break; 3717 3718 case tok::numeric_constant: 3719 case tok::coloncolon: 3720 case tok::kw_true: 3721 case tok::kw_false: 3722 TopLevelParensAllowed = false; 3723 // Just eat them. 3724 nextToken(); 3725 break; 3726 3727 case tok::kw_static_cast: 3728 case tok::kw_const_cast: 3729 case tok::kw_reinterpret_cast: 3730 case tok::kw_dynamic_cast: 3731 nextToken(); 3732 if (FormatTok->isNot(tok::less)) 3733 return; 3734 3735 nextToken(); 3736 parseBracedList(/*IsAngleBracket=*/true); 3737 break; 3738 3739 default: 3740 if (!FormatTok->Tok.getIdentifierInfo()) { 3741 // Identifiers are part of the default case, we check for more then 3742 // tok::identifier to handle builtin type traits. 3743 return; 3744 } 3745 3746 // We need to differentiate identifiers for a template deduction guide, 3747 // variables, or function return types (the constraint expression has 3748 // ended before that), and basically all other cases. But it's easier to 3749 // check the other way around. 3750 assert(FormatTok->Previous); 3751 switch (FormatTok->Previous->Tok.getKind()) { 3752 case tok::coloncolon: // Nested identifier. 3753 case tok::ampamp: // Start of a function or variable for the 3754 case tok::pipepipe: // constraint expression. (binary) 3755 case tok::exclaim: // The same as above, but unary. 3756 case tok::kw_requires: // Initial identifier of a requires clause. 3757 case tok::equal: // Initial identifier of a concept declaration. 3758 break; 3759 default: 3760 return; 3761 } 3762 3763 // Read identifier with optional template declaration. 3764 nextToken(); 3765 if (FormatTok->is(tok::less)) { 3766 nextToken(); 3767 parseBracedList(/*IsAngleBracket=*/true); 3768 } 3769 TopLevelParensAllowed = false; 3770 break; 3771 } 3772 } while (!eof()); 3773 } 3774 3775 bool UnwrappedLineParser::parseEnum() { 3776 const FormatToken &InitialToken = *FormatTok; 3777 3778 // Won't be 'enum' for NS_ENUMs. 3779 if (FormatTok->is(tok::kw_enum)) 3780 nextToken(); 3781 3782 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3783 // declarations. An "enum" keyword followed by a colon would be a syntax 3784 // error and thus assume it is just an identifier. 3785 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3786 return false; 3787 3788 // In protobuf, "enum" can be used as a field name. 3789 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3790 return false; 3791 3792 if (IsCpp) { 3793 // Eat up enum class ... 3794 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3795 nextToken(); 3796 while (FormatTok->is(tok::l_square)) 3797 if (!handleCppAttributes()) 3798 return false; 3799 } 3800 3801 while (FormatTok->Tok.getIdentifierInfo() || 3802 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3803 tok::greater, tok::comma, tok::question, 3804 tok::l_square)) { 3805 if (Style.isVerilog()) { 3806 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3807 nextToken(); 3808 // In Verilog the base type can have dimensions. 3809 while (FormatTok->is(tok::l_square)) 3810 parseSquare(); 3811 } else { 3812 nextToken(); 3813 } 3814 // We can have macros or attributes in between 'enum' and the enum name. 3815 if (FormatTok->is(tok::l_paren)) 3816 parseParens(); 3817 if (FormatTok->is(tok::identifier)) { 3818 nextToken(); 3819 // If there are two identifiers in a row, this is likely an elaborate 3820 // return type. In Java, this can be "implements", etc. 3821 if (IsCpp && FormatTok->is(tok::identifier)) 3822 return false; 3823 } 3824 } 3825 3826 // Just a declaration or something is wrong. 3827 if (FormatTok->isNot(tok::l_brace)) 3828 return true; 3829 FormatTok->setFinalizedType(TT_EnumLBrace); 3830 FormatTok->setBlockKind(BK_Block); 3831 3832 if (Style.Language == FormatStyle::LK_Java) { 3833 // Java enums are different. 3834 parseJavaEnumBody(); 3835 return true; 3836 } 3837 if (Style.Language == FormatStyle::LK_Proto) { 3838 parseBlock(/*MustBeDeclaration=*/true); 3839 return true; 3840 } 3841 3842 if (!Style.AllowShortEnumsOnASingleLine && 3843 ShouldBreakBeforeBrace(Style, InitialToken)) { 3844 addUnwrappedLine(); 3845 } 3846 // Parse enum body. 3847 nextToken(); 3848 if (!Style.AllowShortEnumsOnASingleLine) { 3849 addUnwrappedLine(); 3850 Line->Level += 1; 3851 } 3852 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3853 if (!Style.AllowShortEnumsOnASingleLine) 3854 Line->Level -= 1; 3855 if (HasError) { 3856 if (FormatTok->is(tok::semi)) 3857 nextToken(); 3858 addUnwrappedLine(); 3859 } 3860 setPreviousRBraceType(TT_EnumRBrace); 3861 return true; 3862 3863 // There is no addUnwrappedLine() here so that we fall through to parsing a 3864 // structural element afterwards. Thus, in "enum A {} n, m;", 3865 // "} n, m;" will end up in one unwrapped line. 3866 } 3867 3868 bool UnwrappedLineParser::parseStructLike() { 3869 // parseRecord falls through and does not yet add an unwrapped line as a 3870 // record declaration or definition can start a structural element. 3871 parseRecord(); 3872 // This does not apply to Java, JavaScript and C#. 3873 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3874 Style.isCSharp()) { 3875 if (FormatTok->is(tok::semi)) 3876 nextToken(); 3877 addUnwrappedLine(); 3878 return true; 3879 } 3880 return false; 3881 } 3882 3883 namespace { 3884 // A class used to set and restore the Token position when peeking 3885 // ahead in the token source. 3886 class ScopedTokenPosition { 3887 unsigned StoredPosition; 3888 FormatTokenSource *Tokens; 3889 3890 public: 3891 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3892 assert(Tokens && "Tokens expected to not be null"); 3893 StoredPosition = Tokens->getPosition(); 3894 } 3895 3896 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3897 }; 3898 } // namespace 3899 3900 // Look to see if we have [[ by looking ahead, if 3901 // its not then rewind to the original position. 3902 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3903 ScopedTokenPosition AutoPosition(Tokens); 3904 FormatToken *Tok = Tokens->getNextToken(); 3905 // We already read the first [ check for the second. 3906 if (Tok->isNot(tok::l_square)) 3907 return false; 3908 // Double check that the attribute is just something 3909 // fairly simple. 3910 while (Tok->isNot(tok::eof)) { 3911 if (Tok->is(tok::r_square)) 3912 break; 3913 Tok = Tokens->getNextToken(); 3914 } 3915 if (Tok->is(tok::eof)) 3916 return false; 3917 Tok = Tokens->getNextToken(); 3918 if (Tok->isNot(tok::r_square)) 3919 return false; 3920 Tok = Tokens->getNextToken(); 3921 if (Tok->is(tok::semi)) 3922 return false; 3923 return true; 3924 } 3925 3926 void UnwrappedLineParser::parseJavaEnumBody() { 3927 assert(FormatTok->is(tok::l_brace)); 3928 const FormatToken *OpeningBrace = FormatTok; 3929 3930 // Determine whether the enum is simple, i.e. does not have a semicolon or 3931 // constants with class bodies. Simple enums can be formatted like braced 3932 // lists, contracted to a single line, etc. 3933 unsigned StoredPosition = Tokens->getPosition(); 3934 bool IsSimple = true; 3935 FormatToken *Tok = Tokens->getNextToken(); 3936 while (Tok->isNot(tok::eof)) { 3937 if (Tok->is(tok::r_brace)) 3938 break; 3939 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3940 IsSimple = false; 3941 break; 3942 } 3943 // FIXME: This will also mark enums with braces in the arguments to enum 3944 // constants as "not simple". This is probably fine in practice, though. 3945 Tok = Tokens->getNextToken(); 3946 } 3947 FormatTok = Tokens->setPosition(StoredPosition); 3948 3949 if (IsSimple) { 3950 nextToken(); 3951 parseBracedList(); 3952 addUnwrappedLine(); 3953 return; 3954 } 3955 3956 // Parse the body of a more complex enum. 3957 // First add a line for everything up to the "{". 3958 nextToken(); 3959 addUnwrappedLine(); 3960 ++Line->Level; 3961 3962 // Parse the enum constants. 3963 while (!eof()) { 3964 if (FormatTok->is(tok::l_brace)) { 3965 // Parse the constant's class body. 3966 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3967 /*MunchSemi=*/false); 3968 } else if (FormatTok->is(tok::l_paren)) { 3969 parseParens(); 3970 } else if (FormatTok->is(tok::comma)) { 3971 nextToken(); 3972 addUnwrappedLine(); 3973 } else if (FormatTok->is(tok::semi)) { 3974 nextToken(); 3975 addUnwrappedLine(); 3976 break; 3977 } else if (FormatTok->is(tok::r_brace)) { 3978 addUnwrappedLine(); 3979 break; 3980 } else { 3981 nextToken(); 3982 } 3983 } 3984 3985 // Parse the class body after the enum's ";" if any. 3986 parseLevel(OpeningBrace); 3987 nextToken(); 3988 --Line->Level; 3989 addUnwrappedLine(); 3990 } 3991 3992 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3993 const FormatToken &InitialToken = *FormatTok; 3994 nextToken(); 3995 3996 const FormatToken *ClassName = nullptr; 3997 bool IsDerived = false; 3998 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 3999 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 4000 }; 4001 // JavaScript/TypeScript supports anonymous classes like: 4002 // a = class extends foo { } 4003 bool JSPastExtendsOrImplements = false; 4004 // The actual identifier can be a nested name specifier, and in macros 4005 // it is often token-pasted. 4006 // An [[attribute]] can be before the identifier. 4007 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 4008 tok::kw_alignas, tok::l_square) || 4009 FormatTok->isAttribute() || 4010 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 4011 FormatTok->isOneOf(tok::period, tok::comma))) { 4012 if (Style.isJavaScript() && 4013 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 4014 JSPastExtendsOrImplements = true; 4015 // JavaScript/TypeScript supports inline object types in 4016 // extends/implements positions: 4017 // class Foo implements {bar: number} { } 4018 nextToken(); 4019 if (FormatTok->is(tok::l_brace)) { 4020 tryToParseBracedList(); 4021 continue; 4022 } 4023 } 4024 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 4025 continue; 4026 const auto *Previous = FormatTok; 4027 nextToken(); 4028 switch (FormatTok->Tok.getKind()) { 4029 case tok::l_paren: 4030 // We can have macros in between 'class' and the class name. 4031 if (!IsNonMacroIdentifier(Previous) || 4032 // e.g. `struct macro(a) S { int i; };` 4033 Previous->Previous == &InitialToken) { 4034 parseParens(); 4035 } 4036 break; 4037 case tok::coloncolon: 4038 case tok::hashhash: 4039 break; 4040 default: 4041 if (!JSPastExtendsOrImplements && !ClassName && 4042 Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) { 4043 ClassName = Previous; 4044 } 4045 } 4046 } 4047 4048 auto IsListInitialization = [&] { 4049 if (!ClassName || IsDerived) 4050 return false; 4051 assert(FormatTok->is(tok::l_brace)); 4052 const auto *Prev = FormatTok->getPreviousNonComment(); 4053 assert(Prev); 4054 return Prev != ClassName && Prev->is(tok::identifier) && 4055 Prev->isNot(Keywords.kw_final) && tryToParseBracedList(); 4056 }; 4057 4058 if (FormatTok->isOneOf(tok::colon, tok::less)) { 4059 int AngleNestingLevel = 0; 4060 do { 4061 if (FormatTok->is(tok::less)) 4062 ++AngleNestingLevel; 4063 else if (FormatTok->is(tok::greater)) 4064 --AngleNestingLevel; 4065 4066 if (AngleNestingLevel == 0) { 4067 if (FormatTok->is(tok::colon)) { 4068 IsDerived = true; 4069 } else if (FormatTok->is(tok::identifier) && 4070 FormatTok->Previous->is(tok::coloncolon)) { 4071 ClassName = FormatTok; 4072 } else if (FormatTok->is(tok::l_paren) && 4073 IsNonMacroIdentifier(FormatTok->Previous)) { 4074 break; 4075 } 4076 } 4077 if (FormatTok->is(tok::l_brace)) { 4078 if (AngleNestingLevel == 0 && IsListInitialization()) 4079 return; 4080 calculateBraceTypes(/*ExpectClassBody=*/true); 4081 if (!tryToParseBracedList()) 4082 break; 4083 } 4084 if (FormatTok->is(tok::l_square)) { 4085 FormatToken *Previous = FormatTok->Previous; 4086 if (!Previous || (Previous->isNot(tok::r_paren) && 4087 !Previous->isTypeOrIdentifier(LangOpts))) { 4088 // Don't try parsing a lambda if we had a closing parenthesis before, 4089 // it was probably a pointer to an array: int (*)[]. 4090 if (!tryToParseLambda()) 4091 continue; 4092 } else { 4093 parseSquare(); 4094 continue; 4095 } 4096 } 4097 if (FormatTok->is(tok::semi)) 4098 return; 4099 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 4100 addUnwrappedLine(); 4101 nextToken(); 4102 parseCSharpGenericTypeConstraint(); 4103 break; 4104 } 4105 nextToken(); 4106 } while (!eof()); 4107 } 4108 4109 auto GetBraceTypes = 4110 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 4111 switch (RecordTok.Tok.getKind()) { 4112 case tok::kw_class: 4113 return {TT_ClassLBrace, TT_ClassRBrace}; 4114 case tok::kw_struct: 4115 return {TT_StructLBrace, TT_StructRBrace}; 4116 case tok::kw_union: 4117 return {TT_UnionLBrace, TT_UnionRBrace}; 4118 default: 4119 // Useful for e.g. interface. 4120 return {TT_RecordLBrace, TT_RecordRBrace}; 4121 } 4122 }; 4123 if (FormatTok->is(tok::l_brace)) { 4124 if (IsListInitialization()) 4125 return; 4126 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4127 FormatTok->setFinalizedType(OpenBraceType); 4128 if (ParseAsExpr) { 4129 parseChildBlock(); 4130 } else { 4131 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4132 addUnwrappedLine(); 4133 4134 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4135 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4136 } 4137 setPreviousRBraceType(ClosingBraceType); 4138 } 4139 // There is no addUnwrappedLine() here so that we fall through to parsing a 4140 // structural element afterwards. Thus, in "class A {} n, m;", 4141 // "} n, m;" will end up in one unwrapped line. 4142 } 4143 4144 void UnwrappedLineParser::parseObjCMethod() { 4145 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4146 "'(' or identifier expected."); 4147 do { 4148 if (FormatTok->is(tok::semi)) { 4149 nextToken(); 4150 addUnwrappedLine(); 4151 return; 4152 } else if (FormatTok->is(tok::l_brace)) { 4153 if (Style.BraceWrapping.AfterFunction) 4154 addUnwrappedLine(); 4155 parseBlock(); 4156 addUnwrappedLine(); 4157 return; 4158 } else { 4159 nextToken(); 4160 } 4161 } while (!eof()); 4162 } 4163 4164 void UnwrappedLineParser::parseObjCProtocolList() { 4165 assert(FormatTok->is(tok::less) && "'<' expected."); 4166 do { 4167 nextToken(); 4168 // Early exit in case someone forgot a close angle. 4169 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4170 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4171 return; 4172 } 4173 } while (!eof() && FormatTok->isNot(tok::greater)); 4174 nextToken(); // Skip '>'. 4175 } 4176 4177 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4178 do { 4179 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4180 nextToken(); 4181 addUnwrappedLine(); 4182 break; 4183 } 4184 if (FormatTok->is(tok::l_brace)) { 4185 parseBlock(); 4186 // In ObjC interfaces, nothing should be following the "}". 4187 addUnwrappedLine(); 4188 } else if (FormatTok->is(tok::r_brace)) { 4189 // Ignore stray "}". parseStructuralElement doesn't consume them. 4190 nextToken(); 4191 addUnwrappedLine(); 4192 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4193 nextToken(); 4194 parseObjCMethod(); 4195 } else { 4196 parseStructuralElement(); 4197 } 4198 } while (!eof()); 4199 } 4200 4201 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4202 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4203 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4204 nextToken(); 4205 nextToken(); // interface name 4206 4207 // @interface can be followed by a lightweight generic 4208 // specialization list, then either a base class or a category. 4209 if (FormatTok->is(tok::less)) 4210 parseObjCLightweightGenerics(); 4211 if (FormatTok->is(tok::colon)) { 4212 nextToken(); 4213 nextToken(); // base class name 4214 // The base class can also have lightweight generics applied to it. 4215 if (FormatTok->is(tok::less)) 4216 parseObjCLightweightGenerics(); 4217 } else if (FormatTok->is(tok::l_paren)) { 4218 // Skip category, if present. 4219 parseParens(); 4220 } 4221 4222 if (FormatTok->is(tok::less)) 4223 parseObjCProtocolList(); 4224 4225 if (FormatTok->is(tok::l_brace)) { 4226 if (Style.BraceWrapping.AfterObjCDeclaration) 4227 addUnwrappedLine(); 4228 parseBlock(/*MustBeDeclaration=*/true); 4229 } 4230 4231 // With instance variables, this puts '}' on its own line. Without instance 4232 // variables, this ends the @interface line. 4233 addUnwrappedLine(); 4234 4235 parseObjCUntilAtEnd(); 4236 } 4237 4238 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4239 assert(FormatTok->is(tok::less)); 4240 // Unlike protocol lists, generic parameterizations support 4241 // nested angles: 4242 // 4243 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4244 // NSObject <NSCopying, NSSecureCoding> 4245 // 4246 // so we need to count how many open angles we have left. 4247 unsigned NumOpenAngles = 1; 4248 do { 4249 nextToken(); 4250 // Early exit in case someone forgot a close angle. 4251 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4252 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4253 break; 4254 } 4255 if (FormatTok->is(tok::less)) { 4256 ++NumOpenAngles; 4257 } else if (FormatTok->is(tok::greater)) { 4258 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4259 --NumOpenAngles; 4260 } 4261 } while (!eof() && NumOpenAngles != 0); 4262 nextToken(); // Skip '>'. 4263 } 4264 4265 // Returns true for the declaration/definition form of @protocol, 4266 // false for the expression form. 4267 bool UnwrappedLineParser::parseObjCProtocol() { 4268 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4269 nextToken(); 4270 4271 if (FormatTok->is(tok::l_paren)) { 4272 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4273 return false; 4274 } 4275 4276 // The definition/declaration form, 4277 // @protocol Foo 4278 // - (int)someMethod; 4279 // @end 4280 4281 nextToken(); // protocol name 4282 4283 if (FormatTok->is(tok::less)) 4284 parseObjCProtocolList(); 4285 4286 // Check for protocol declaration. 4287 if (FormatTok->is(tok::semi)) { 4288 nextToken(); 4289 addUnwrappedLine(); 4290 return true; 4291 } 4292 4293 addUnwrappedLine(); 4294 parseObjCUntilAtEnd(); 4295 return true; 4296 } 4297 4298 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4299 bool IsImport = FormatTok->is(Keywords.kw_import); 4300 assert(IsImport || FormatTok->is(tok::kw_export)); 4301 nextToken(); 4302 4303 // Consume the "default" in "export default class/function". 4304 if (FormatTok->is(tok::kw_default)) 4305 nextToken(); 4306 4307 // Consume "async function", "function" and "default function", so that these 4308 // get parsed as free-standing JS functions, i.e. do not require a trailing 4309 // semicolon. 4310 if (FormatTok->is(Keywords.kw_async)) 4311 nextToken(); 4312 if (FormatTok->is(Keywords.kw_function)) { 4313 nextToken(); 4314 return; 4315 } 4316 4317 // For imports, `export *`, `export {...}`, consume the rest of the line up 4318 // to the terminating `;`. For everything else, just return and continue 4319 // parsing the structural element, i.e. the declaration or expression for 4320 // `export default`. 4321 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4322 !FormatTok->isStringLiteral() && 4323 !(FormatTok->is(Keywords.kw_type) && 4324 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4325 return; 4326 } 4327 4328 while (!eof()) { 4329 if (FormatTok->is(tok::semi)) 4330 return; 4331 if (Line->Tokens.empty()) { 4332 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4333 // import statement should terminate. 4334 return; 4335 } 4336 if (FormatTok->is(tok::l_brace)) { 4337 FormatTok->setBlockKind(BK_Block); 4338 nextToken(); 4339 parseBracedList(); 4340 } else { 4341 nextToken(); 4342 } 4343 } 4344 } 4345 4346 void UnwrappedLineParser::parseStatementMacro() { 4347 nextToken(); 4348 if (FormatTok->is(tok::l_paren)) 4349 parseParens(); 4350 if (FormatTok->is(tok::semi)) 4351 nextToken(); 4352 addUnwrappedLine(); 4353 } 4354 4355 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4356 // consume things like a::`b.c[d:e] or a::* 4357 while (true) { 4358 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4359 tok::coloncolon, tok::hash) || 4360 Keywords.isVerilogIdentifier(*FormatTok)) { 4361 nextToken(); 4362 } else if (FormatTok->is(tok::l_square)) { 4363 parseSquare(); 4364 } else { 4365 break; 4366 } 4367 } 4368 } 4369 4370 void UnwrappedLineParser::parseVerilogSensitivityList() { 4371 if (FormatTok->isNot(tok::at)) 4372 return; 4373 nextToken(); 4374 // A block event expression has 2 at signs. 4375 if (FormatTok->is(tok::at)) 4376 nextToken(); 4377 switch (FormatTok->Tok.getKind()) { 4378 case tok::star: 4379 nextToken(); 4380 break; 4381 case tok::l_paren: 4382 parseParens(); 4383 break; 4384 default: 4385 parseVerilogHierarchyIdentifier(); 4386 break; 4387 } 4388 } 4389 4390 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4391 unsigned AddLevels = 0; 4392 4393 if (FormatTok->is(Keywords.kw_clocking)) { 4394 nextToken(); 4395 if (Keywords.isVerilogIdentifier(*FormatTok)) 4396 nextToken(); 4397 parseVerilogSensitivityList(); 4398 if (FormatTok->is(tok::semi)) 4399 nextToken(); 4400 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4401 Keywords.kw_casez, Keywords.kw_randcase, 4402 Keywords.kw_randsequence)) { 4403 if (Style.IndentCaseLabels) 4404 AddLevels++; 4405 nextToken(); 4406 if (FormatTok->is(tok::l_paren)) { 4407 FormatTok->setFinalizedType(TT_ConditionLParen); 4408 parseParens(); 4409 } 4410 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4411 nextToken(); 4412 // The case header has no semicolon. 4413 } else { 4414 // "module" etc. 4415 nextToken(); 4416 // all the words like the name of the module and specifiers like 4417 // "automatic" and the width of function return type 4418 while (true) { 4419 if (FormatTok->is(tok::l_square)) { 4420 auto Prev = FormatTok->getPreviousNonComment(); 4421 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4422 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4423 parseSquare(); 4424 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4425 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4426 nextToken(); 4427 } else { 4428 break; 4429 } 4430 } 4431 4432 auto NewLine = [this]() { 4433 addUnwrappedLine(); 4434 Line->IsContinuation = true; 4435 }; 4436 4437 // package imports 4438 while (FormatTok->is(Keywords.kw_import)) { 4439 NewLine(); 4440 nextToken(); 4441 parseVerilogHierarchyIdentifier(); 4442 if (FormatTok->is(tok::semi)) 4443 nextToken(); 4444 } 4445 4446 // parameters and ports 4447 if (FormatTok->is(Keywords.kw_verilogHash)) { 4448 NewLine(); 4449 nextToken(); 4450 if (FormatTok->is(tok::l_paren)) { 4451 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4452 parseParens(); 4453 } 4454 } 4455 if (FormatTok->is(tok::l_paren)) { 4456 NewLine(); 4457 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4458 parseParens(); 4459 } 4460 4461 // extends and implements 4462 if (FormatTok->is(Keywords.kw_extends)) { 4463 NewLine(); 4464 nextToken(); 4465 parseVerilogHierarchyIdentifier(); 4466 if (FormatTok->is(tok::l_paren)) 4467 parseParens(); 4468 } 4469 if (FormatTok->is(Keywords.kw_implements)) { 4470 NewLine(); 4471 do { 4472 nextToken(); 4473 parseVerilogHierarchyIdentifier(); 4474 } while (FormatTok->is(tok::comma)); 4475 } 4476 4477 // Coverage event for cover groups. 4478 if (FormatTok->is(tok::at)) { 4479 NewLine(); 4480 parseVerilogSensitivityList(); 4481 } 4482 4483 if (FormatTok->is(tok::semi)) 4484 nextToken(/*LevelDifference=*/1); 4485 addUnwrappedLine(); 4486 } 4487 4488 return AddLevels; 4489 } 4490 4491 void UnwrappedLineParser::parseVerilogTable() { 4492 assert(FormatTok->is(Keywords.kw_table)); 4493 nextToken(/*LevelDifference=*/1); 4494 addUnwrappedLine(); 4495 4496 auto InitialLevel = Line->Level++; 4497 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4498 FormatToken *Tok = FormatTok; 4499 nextToken(); 4500 if (Tok->is(tok::semi)) 4501 addUnwrappedLine(); 4502 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4503 Tok->setFinalizedType(TT_VerilogTableItem); 4504 } 4505 Line->Level = InitialLevel; 4506 nextToken(/*LevelDifference=*/-1); 4507 addUnwrappedLine(); 4508 } 4509 4510 void UnwrappedLineParser::parseVerilogCaseLabel() { 4511 // The label will get unindented in AnnotatingParser. If there are no leading 4512 // spaces, indent the rest here so that things inside the block will be 4513 // indented relative to things outside. We don't use parseLabel because we 4514 // don't know whether this colon is a label or a ternary expression at this 4515 // point. 4516 auto OrigLevel = Line->Level; 4517 auto FirstLine = CurrentLines->size(); 4518 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4519 ++Line->Level; 4520 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4521 --Line->Level; 4522 parseStructuralElement(); 4523 // Restore the indentation in both the new line and the line that has the 4524 // label. 4525 if (CurrentLines->size() > FirstLine) 4526 (*CurrentLines)[FirstLine].Level = OrigLevel; 4527 Line->Level = OrigLevel; 4528 } 4529 4530 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4531 for (const auto &N : Line.Tokens) { 4532 if (N.Tok->MacroCtx) 4533 return true; 4534 for (const UnwrappedLine &Child : N.Children) 4535 if (containsExpansion(Child)) 4536 return true; 4537 } 4538 return false; 4539 } 4540 4541 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4542 if (Line->Tokens.empty()) 4543 return; 4544 LLVM_DEBUG({ 4545 if (!parsingPPDirective()) { 4546 llvm::dbgs() << "Adding unwrapped line:\n"; 4547 printDebugInfo(*Line); 4548 } 4549 }); 4550 4551 // If this line closes a block when in Whitesmiths mode, remember that 4552 // information so that the level can be decreased after the line is added. 4553 // This has to happen after the addition of the line since the line itself 4554 // needs to be indented. 4555 bool ClosesWhitesmithsBlock = 4556 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4557 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4558 4559 // If the current line was expanded from a macro call, we use it to 4560 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4561 // line and the unexpanded token stream. 4562 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4563 if (!Reconstruct) 4564 Reconstruct.emplace(Line->Level, Unexpanded); 4565 Reconstruct->addLine(*Line); 4566 4567 // While the reconstructed unexpanded lines are stored in the normal 4568 // flow of lines, the expanded lines are stored on the side to be analyzed 4569 // in an extra step. 4570 CurrentExpandedLines.push_back(std::move(*Line)); 4571 4572 if (Reconstruct->finished()) { 4573 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4574 assert(!Reconstructed.Tokens.empty() && 4575 "Reconstructed must at least contain the macro identifier."); 4576 assert(!parsingPPDirective()); 4577 LLVM_DEBUG({ 4578 llvm::dbgs() << "Adding unexpanded line:\n"; 4579 printDebugInfo(Reconstructed); 4580 }); 4581 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4582 Lines.push_back(std::move(Reconstructed)); 4583 CurrentExpandedLines.clear(); 4584 Reconstruct.reset(); 4585 } 4586 } else { 4587 // At the top level we only get here when no unexpansion is going on, or 4588 // when conditional formatting led to unfinished macro reconstructions. 4589 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4590 CurrentLines->push_back(std::move(*Line)); 4591 } 4592 Line->Tokens.clear(); 4593 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4594 Line->FirstStartColumn = 0; 4595 Line->IsContinuation = false; 4596 Line->SeenDecltypeAuto = false; 4597 4598 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4599 --Line->Level; 4600 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4601 CurrentLines->append( 4602 std::make_move_iterator(PreprocessorDirectives.begin()), 4603 std::make_move_iterator(PreprocessorDirectives.end())); 4604 PreprocessorDirectives.clear(); 4605 } 4606 // Disconnect the current token from the last token on the previous line. 4607 FormatTok->Previous = nullptr; 4608 } 4609 4610 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4611 4612 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4613 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4614 FormatTok.NewlinesBefore > 0; 4615 } 4616 4617 // Checks if \p FormatTok is a line comment that continues the line comment 4618 // section on \p Line. 4619 static bool 4620 continuesLineCommentSection(const FormatToken &FormatTok, 4621 const UnwrappedLine &Line, 4622 const llvm::Regex &CommentPragmasRegex) { 4623 if (Line.Tokens.empty()) 4624 return false; 4625 4626 StringRef IndentContent = FormatTok.TokenText; 4627 if (FormatTok.TokenText.starts_with("//") || 4628 FormatTok.TokenText.starts_with("/*")) { 4629 IndentContent = FormatTok.TokenText.substr(2); 4630 } 4631 if (CommentPragmasRegex.match(IndentContent)) 4632 return false; 4633 4634 // If Line starts with a line comment, then FormatTok continues the comment 4635 // section if its original column is greater or equal to the original start 4636 // column of the line. 4637 // 4638 // Define the min column token of a line as follows: if a line ends in '{' or 4639 // contains a '{' followed by a line comment, then the min column token is 4640 // that '{'. Otherwise, the min column token of the line is the first token of 4641 // the line. 4642 // 4643 // If Line starts with a token other than a line comment, then FormatTok 4644 // continues the comment section if its original column is greater than the 4645 // original start column of the min column token of the line. 4646 // 4647 // For example, the second line comment continues the first in these cases: 4648 // 4649 // // first line 4650 // // second line 4651 // 4652 // and: 4653 // 4654 // // first line 4655 // // second line 4656 // 4657 // and: 4658 // 4659 // int i; // first line 4660 // // second line 4661 // 4662 // and: 4663 // 4664 // do { // first line 4665 // // second line 4666 // int i; 4667 // } while (true); 4668 // 4669 // and: 4670 // 4671 // enum { 4672 // a, // first line 4673 // // second line 4674 // b 4675 // }; 4676 // 4677 // The second line comment doesn't continue the first in these cases: 4678 // 4679 // // first line 4680 // // second line 4681 // 4682 // and: 4683 // 4684 // int i; // first line 4685 // // second line 4686 // 4687 // and: 4688 // 4689 // do { // first line 4690 // // second line 4691 // int i; 4692 // } while (true); 4693 // 4694 // and: 4695 // 4696 // enum { 4697 // a, // first line 4698 // // second line 4699 // }; 4700 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4701 4702 // Scan for '{//'. If found, use the column of '{' as a min column for line 4703 // comment section continuation. 4704 const FormatToken *PreviousToken = nullptr; 4705 for (const UnwrappedLineNode &Node : Line.Tokens) { 4706 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4707 isLineComment(*Node.Tok)) { 4708 MinColumnToken = PreviousToken; 4709 break; 4710 } 4711 PreviousToken = Node.Tok; 4712 4713 // Grab the last newline preceding a token in this unwrapped line. 4714 if (Node.Tok->NewlinesBefore > 0) 4715 MinColumnToken = Node.Tok; 4716 } 4717 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4718 MinColumnToken = PreviousToken; 4719 4720 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4721 MinColumnToken); 4722 } 4723 4724 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4725 bool JustComments = Line->Tokens.empty(); 4726 for (FormatToken *Tok : CommentsBeforeNextToken) { 4727 // Line comments that belong to the same line comment section are put on the 4728 // same line since later we might want to reflow content between them. 4729 // Additional fine-grained breaking of line comment sections is controlled 4730 // by the class BreakableLineCommentSection in case it is desirable to keep 4731 // several line comment sections in the same unwrapped line. 4732 // 4733 // FIXME: Consider putting separate line comment sections as children to the 4734 // unwrapped line instead. 4735 Tok->ContinuesLineCommentSection = 4736 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4737 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4738 addUnwrappedLine(); 4739 pushToken(Tok); 4740 } 4741 if (NewlineBeforeNext && JustComments) 4742 addUnwrappedLine(); 4743 CommentsBeforeNextToken.clear(); 4744 } 4745 4746 void UnwrappedLineParser::nextToken(int LevelDifference) { 4747 if (eof()) 4748 return; 4749 flushComments(isOnNewLine(*FormatTok)); 4750 pushToken(FormatTok); 4751 FormatToken *Previous = FormatTok; 4752 if (!Style.isJavaScript()) 4753 readToken(LevelDifference); 4754 else 4755 readTokenWithJavaScriptASI(); 4756 FormatTok->Previous = Previous; 4757 if (Style.isVerilog()) { 4758 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4759 // keywords like `begin`, we can't treat them the same as left braces 4760 // because some contexts require one of them. For example structs use 4761 // braces and if blocks use keywords, and a left brace can occur in an if 4762 // statement, but it is not a block. For keywords like `end`, we simply 4763 // treat them the same as right braces. 4764 if (Keywords.isVerilogEnd(*FormatTok)) 4765 FormatTok->Tok.setKind(tok::r_brace); 4766 } 4767 } 4768 4769 void UnwrappedLineParser::distributeComments( 4770 const SmallVectorImpl<FormatToken *> &Comments, 4771 const FormatToken *NextTok) { 4772 // Whether or not a line comment token continues a line is controlled by 4773 // the method continuesLineCommentSection, with the following caveat: 4774 // 4775 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4776 // that each comment line from the trail is aligned with the next token, if 4777 // the next token exists. If a trail exists, the beginning of the maximal 4778 // trail is marked as a start of a new comment section. 4779 // 4780 // For example in this code: 4781 // 4782 // int a; // line about a 4783 // // line 1 about b 4784 // // line 2 about b 4785 // int b; 4786 // 4787 // the two lines about b form a maximal trail, so there are two sections, the 4788 // first one consisting of the single comment "// line about a" and the 4789 // second one consisting of the next two comments. 4790 if (Comments.empty()) 4791 return; 4792 bool ShouldPushCommentsInCurrentLine = true; 4793 bool HasTrailAlignedWithNextToken = false; 4794 unsigned StartOfTrailAlignedWithNextToken = 0; 4795 if (NextTok) { 4796 // We are skipping the first element intentionally. 4797 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4798 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4799 HasTrailAlignedWithNextToken = true; 4800 StartOfTrailAlignedWithNextToken = i; 4801 } 4802 } 4803 } 4804 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4805 FormatToken *FormatTok = Comments[i]; 4806 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4807 FormatTok->ContinuesLineCommentSection = false; 4808 } else { 4809 FormatTok->ContinuesLineCommentSection = 4810 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4811 } 4812 if (!FormatTok->ContinuesLineCommentSection && 4813 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4814 ShouldPushCommentsInCurrentLine = false; 4815 } 4816 if (ShouldPushCommentsInCurrentLine) 4817 pushToken(FormatTok); 4818 else 4819 CommentsBeforeNextToken.push_back(FormatTok); 4820 } 4821 } 4822 4823 void UnwrappedLineParser::readToken(int LevelDifference) { 4824 SmallVector<FormatToken *, 1> Comments; 4825 bool PreviousWasComment = false; 4826 bool FirstNonCommentOnLine = false; 4827 do { 4828 FormatTok = Tokens->getNextToken(); 4829 assert(FormatTok); 4830 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, 4831 TT_ConflictAlternative)) { 4832 if (FormatTok->is(TT_ConflictStart)) 4833 conditionalCompilationStart(/*Unreachable=*/false); 4834 else if (FormatTok->is(TT_ConflictAlternative)) 4835 conditionalCompilationAlternative(); 4836 else if (FormatTok->is(TT_ConflictEnd)) 4837 conditionalCompilationEnd(); 4838 FormatTok = Tokens->getNextToken(); 4839 FormatTok->MustBreakBefore = true; 4840 FormatTok->MustBreakBeforeFinalized = true; 4841 } 4842 4843 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4844 const FormatToken &Tok, 4845 bool PreviousWasComment) { 4846 auto IsFirstOnLine = [](const FormatToken &Tok) { 4847 return Tok.HasUnescapedNewline || Tok.IsFirst; 4848 }; 4849 4850 // Consider preprocessor directives preceded by block comments as first 4851 // on line. 4852 if (PreviousWasComment) 4853 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4854 return IsFirstOnLine(Tok); 4855 }; 4856 4857 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4858 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4859 PreviousWasComment = FormatTok->is(tok::comment); 4860 4861 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4862 (!Style.isVerilog() || 4863 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4864 FirstNonCommentOnLine) { 4865 distributeComments(Comments, FormatTok); 4866 Comments.clear(); 4867 // If there is an unfinished unwrapped line, we flush the preprocessor 4868 // directives only after that unwrapped line was finished later. 4869 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4870 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4871 assert((LevelDifference >= 0 || 4872 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4873 "LevelDifference makes Line->Level negative"); 4874 Line->Level += LevelDifference; 4875 // Comments stored before the preprocessor directive need to be output 4876 // before the preprocessor directive, at the same level as the 4877 // preprocessor directive, as we consider them to apply to the directive. 4878 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4879 PPBranchLevel > 0) { 4880 Line->Level += PPBranchLevel; 4881 } 4882 assert(Line->Level >= Line->UnbracedBodyLevel); 4883 Line->Level -= Line->UnbracedBodyLevel; 4884 flushComments(isOnNewLine(*FormatTok)); 4885 parsePPDirective(); 4886 PreviousWasComment = FormatTok->is(tok::comment); 4887 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4888 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4889 } 4890 4891 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4892 !Line->InPPDirective) { 4893 continue; 4894 } 4895 4896 if (FormatTok->is(tok::identifier) && 4897 Macros.defined(FormatTok->TokenText) && 4898 // FIXME: Allow expanding macros in preprocessor directives. 4899 !Line->InPPDirective) { 4900 FormatToken *ID = FormatTok; 4901 unsigned Position = Tokens->getPosition(); 4902 4903 // To correctly parse the code, we need to replace the tokens of the macro 4904 // call with its expansion. 4905 auto PreCall = std::move(Line); 4906 Line.reset(new UnwrappedLine); 4907 bool OldInExpansion = InExpansion; 4908 InExpansion = true; 4909 // We parse the macro call into a new line. 4910 auto Args = parseMacroCall(); 4911 InExpansion = OldInExpansion; 4912 assert(Line->Tokens.front().Tok == ID); 4913 // And remember the unexpanded macro call tokens. 4914 auto UnexpandedLine = std::move(Line); 4915 // Reset to the old line. 4916 Line = std::move(PreCall); 4917 4918 LLVM_DEBUG({ 4919 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4920 if (Args) { 4921 llvm::dbgs() << "("; 4922 for (const auto &Arg : Args.value()) 4923 for (const auto &T : Arg) 4924 llvm::dbgs() << T->TokenText << " "; 4925 llvm::dbgs() << ")"; 4926 } 4927 llvm::dbgs() << "\n"; 4928 }); 4929 if (Macros.objectLike(ID->TokenText) && Args && 4930 !Macros.hasArity(ID->TokenText, Args->size())) { 4931 // The macro is either 4932 // - object-like, but we got argumnets, or 4933 // - overloaded to be both object-like and function-like, but none of 4934 // the function-like arities match the number of arguments. 4935 // Thus, expand as object-like macro. 4936 LLVM_DEBUG(llvm::dbgs() 4937 << "Macro \"" << ID->TokenText 4938 << "\" not overloaded for arity " << Args->size() 4939 << "or not function-like, using object-like overload."); 4940 Args.reset(); 4941 UnexpandedLine->Tokens.resize(1); 4942 Tokens->setPosition(Position); 4943 nextToken(); 4944 assert(!Args && Macros.objectLike(ID->TokenText)); 4945 } 4946 if ((!Args && Macros.objectLike(ID->TokenText)) || 4947 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4948 // Next, we insert the expanded tokens in the token stream at the 4949 // current position, and continue parsing. 4950 Unexpanded[ID] = std::move(UnexpandedLine); 4951 SmallVector<FormatToken *, 8> Expansion = 4952 Macros.expand(ID, std::move(Args)); 4953 if (!Expansion.empty()) 4954 FormatTok = Tokens->insertTokens(Expansion); 4955 4956 LLVM_DEBUG({ 4957 llvm::dbgs() << "Expanded: "; 4958 for (const auto &T : Expansion) 4959 llvm::dbgs() << T->TokenText << " "; 4960 llvm::dbgs() << "\n"; 4961 }); 4962 } else { 4963 LLVM_DEBUG({ 4964 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4965 << "\", because it was used "; 4966 if (Args) 4967 llvm::dbgs() << "with " << Args->size(); 4968 else 4969 llvm::dbgs() << "without"; 4970 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4971 }); 4972 Tokens->setPosition(Position); 4973 FormatTok = ID; 4974 } 4975 } 4976 4977 if (FormatTok->isNot(tok::comment)) { 4978 distributeComments(Comments, FormatTok); 4979 Comments.clear(); 4980 return; 4981 } 4982 4983 Comments.push_back(FormatTok); 4984 } while (!eof()); 4985 4986 distributeComments(Comments, nullptr); 4987 Comments.clear(); 4988 } 4989 4990 namespace { 4991 template <typename Iterator> 4992 void pushTokens(Iterator Begin, Iterator End, 4993 llvm::SmallVectorImpl<FormatToken *> &Into) { 4994 for (auto I = Begin; I != End; ++I) { 4995 Into.push_back(I->Tok); 4996 for (const auto &Child : I->Children) 4997 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4998 } 4999 } 5000 } // namespace 5001 5002 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 5003 UnwrappedLineParser::parseMacroCall() { 5004 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 5005 assert(Line->Tokens.empty()); 5006 nextToken(); 5007 if (FormatTok->isNot(tok::l_paren)) 5008 return Args; 5009 unsigned Position = Tokens->getPosition(); 5010 FormatToken *Tok = FormatTok; 5011 nextToken(); 5012 Args.emplace(); 5013 auto ArgStart = std::prev(Line->Tokens.end()); 5014 5015 int Parens = 0; 5016 do { 5017 switch (FormatTok->Tok.getKind()) { 5018 case tok::l_paren: 5019 ++Parens; 5020 nextToken(); 5021 break; 5022 case tok::r_paren: { 5023 if (Parens > 0) { 5024 --Parens; 5025 nextToken(); 5026 break; 5027 } 5028 Args->push_back({}); 5029 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5030 nextToken(); 5031 return Args; 5032 } 5033 case tok::comma: { 5034 if (Parens > 0) { 5035 nextToken(); 5036 break; 5037 } 5038 Args->push_back({}); 5039 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5040 nextToken(); 5041 ArgStart = std::prev(Line->Tokens.end()); 5042 break; 5043 } 5044 default: 5045 nextToken(); 5046 break; 5047 } 5048 } while (!eof()); 5049 Line->Tokens.resize(1); 5050 Tokens->setPosition(Position); 5051 FormatTok = Tok; 5052 return {}; 5053 } 5054 5055 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 5056 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 5057 if (MustBreakBeforeNextToken) { 5058 Line->Tokens.back().Tok->MustBreakBefore = true; 5059 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 5060 MustBreakBeforeNextToken = false; 5061 } 5062 } 5063 5064 } // end namespace format 5065 } // end namespace clang 5066