1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (SmallVectorImpl<UnwrappedLine>::const_iterator 55 CI = I->Children.begin(), 56 CE = I->Children.end(); 57 CI != CE; ++CI) { 58 OS << "\n"; 59 printLine(OS, *CI, (Prefix + " ").str()); 60 NewLine = true; 61 } 62 } 63 if (!NewLine) 64 OS << "\n"; 65 } 66 67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 68 printLine(llvm::dbgs(), Line); 69 } 70 71 class ScopedDeclarationState { 72 public: 73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 74 bool MustBeDeclaration) 75 : Line(Line), Stack(Stack) { 76 Line.MustBeDeclaration = MustBeDeclaration; 77 Stack.push_back(MustBeDeclaration); 78 } 79 ~ScopedDeclarationState() { 80 Stack.pop_back(); 81 if (!Stack.empty()) 82 Line.MustBeDeclaration = Stack.back(); 83 else 84 Line.MustBeDeclaration = true; 85 } 86 87 private: 88 UnwrappedLine &Line; 89 llvm::BitVector &Stack; 90 }; 91 92 } // end anonymous namespace 93 94 class ScopedLineState { 95 public: 96 ScopedLineState(UnwrappedLineParser &Parser, 97 bool SwitchToPreprocessorLines = false) 98 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 99 if (SwitchToPreprocessorLines) 100 Parser.CurrentLines = &Parser.PreprocessorDirectives; 101 else if (!Parser.Line->Tokens.empty()) 102 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 103 PreBlockLine = std::move(Parser.Line); 104 Parser.Line = std::make_unique<UnwrappedLine>(); 105 Parser.Line->Level = PreBlockLine->Level; 106 Parser.Line->PPLevel = PreBlockLine->PPLevel; 107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 108 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 109 } 110 111 ~ScopedLineState() { 112 if (!Parser.Line->Tokens.empty()) 113 Parser.addUnwrappedLine(); 114 assert(Parser.Line->Tokens.empty()); 115 Parser.Line = std::move(PreBlockLine); 116 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 117 Parser.MustBreakBeforeNextToken = true; 118 Parser.CurrentLines = OriginalLines; 119 } 120 121 private: 122 UnwrappedLineParser &Parser; 123 124 std::unique_ptr<UnwrappedLine> PreBlockLine; 125 SmallVectorImpl<UnwrappedLine> *OriginalLines; 126 }; 127 128 class CompoundStatementIndenter { 129 public: 130 CompoundStatementIndenter(UnwrappedLineParser *Parser, 131 const FormatStyle &Style, unsigned &LineLevel) 132 : CompoundStatementIndenter(Parser, LineLevel, 133 Style.BraceWrapping.AfterControlStatement, 134 Style.BraceWrapping.IndentBraces) {} 135 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 136 bool WrapBrace, bool IndentBrace) 137 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 138 if (WrapBrace) 139 Parser->addUnwrappedLine(); 140 if (IndentBrace) 141 ++LineLevel; 142 } 143 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 144 145 private: 146 unsigned &LineLevel; 147 unsigned OldLineLevel; 148 }; 149 150 UnwrappedLineParser::UnwrappedLineParser( 151 SourceManager &SourceMgr, const FormatStyle &Style, 152 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 153 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 154 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 155 IdentifierTable &IdentTable) 156 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 157 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 158 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 159 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 160 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 161 ? IG_Rejected 162 : IG_Inited), 163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 164 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} 165 166 void UnwrappedLineParser::reset() { 167 PPBranchLevel = -1; 168 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 169 ? IG_Rejected 170 : IG_Inited; 171 IncludeGuardToken = nullptr; 172 Line.reset(new UnwrappedLine); 173 CommentsBeforeNextToken.clear(); 174 FormatTok = nullptr; 175 MustBreakBeforeNextToken = false; 176 PreprocessorDirectives.clear(); 177 CurrentLines = &Lines; 178 DeclarationScopeStack.clear(); 179 NestedTooDeep.clear(); 180 PPStack.clear(); 181 Line->FirstStartColumn = FirstStartColumn; 182 183 if (!Unexpanded.empty()) 184 for (FormatToken *Token : AllTokens) 185 Token->MacroCtx.reset(); 186 CurrentExpandedLines.clear(); 187 ExpandedLines.clear(); 188 Unexpanded.clear(); 189 InExpansion = false; 190 Reconstruct.reset(); 191 } 192 193 void UnwrappedLineParser::parse() { 194 IndexedTokenSource TokenSource(AllTokens); 195 Line->FirstStartColumn = FirstStartColumn; 196 do { 197 LLVM_DEBUG(llvm::dbgs() << "----\n"); 198 reset(); 199 Tokens = &TokenSource; 200 TokenSource.reset(); 201 202 readToken(); 203 parseFile(); 204 205 // If we found an include guard then all preprocessor directives (other than 206 // the guard) are over-indented by one. 207 if (IncludeGuard == IG_Found) { 208 for (auto &Line : Lines) 209 if (Line.InPPDirective && Line.Level > 0) 210 --Line.Level; 211 } 212 213 // Create line with eof token. 214 assert(FormatTok->is(tok::eof)); 215 pushToken(FormatTok); 216 addUnwrappedLine(); 217 218 // In a first run, format everything with the lines containing macro calls 219 // replaced by the expansion. 220 if (!ExpandedLines.empty()) { 221 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 222 for (const auto &Line : Lines) { 223 if (!Line.Tokens.empty()) { 224 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 225 if (it != ExpandedLines.end()) { 226 for (const auto &Expanded : it->second) { 227 LLVM_DEBUG(printDebugInfo(Expanded)); 228 Callback.consumeUnwrappedLine(Expanded); 229 } 230 continue; 231 } 232 } 233 LLVM_DEBUG(printDebugInfo(Line)); 234 Callback.consumeUnwrappedLine(Line); 235 } 236 Callback.finishRun(); 237 } 238 239 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 240 for (const UnwrappedLine &Line : Lines) { 241 LLVM_DEBUG(printDebugInfo(Line)); 242 Callback.consumeUnwrappedLine(Line); 243 } 244 Callback.finishRun(); 245 Lines.clear(); 246 while (!PPLevelBranchIndex.empty() && 247 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 248 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 249 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 250 } 251 if (!PPLevelBranchIndex.empty()) { 252 ++PPLevelBranchIndex.back(); 253 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 254 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 255 } 256 } while (!PPLevelBranchIndex.empty()); 257 } 258 259 void UnwrappedLineParser::parseFile() { 260 // The top-level context in a file always has declarations, except for pre- 261 // processor directives and JavaScript files. 262 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 263 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 264 MustBeDeclaration); 265 if (Style.Language == FormatStyle::LK_TextProto) 266 parseBracedList(); 267 else 268 parseLevel(); 269 // Make sure to format the remaining tokens. 270 // 271 // LK_TextProto is special since its top-level is parsed as the body of a 272 // braced list, which does not necessarily have natural line separators such 273 // as a semicolon. Comments after the last entry that have been determined to 274 // not belong to that line, as in: 275 // key: value 276 // // endfile comment 277 // do not have a chance to be put on a line of their own until this point. 278 // Here we add this newline before end-of-file comments. 279 if (Style.Language == FormatStyle::LK_TextProto && 280 !CommentsBeforeNextToken.empty()) { 281 addUnwrappedLine(); 282 } 283 flushComments(true); 284 addUnwrappedLine(); 285 } 286 287 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 288 do { 289 switch (FormatTok->Tok.getKind()) { 290 case tok::l_brace: 291 return; 292 default: 293 if (FormatTok->is(Keywords.kw_where)) { 294 addUnwrappedLine(); 295 nextToken(); 296 parseCSharpGenericTypeConstraint(); 297 break; 298 } 299 nextToken(); 300 break; 301 } 302 } while (!eof()); 303 } 304 305 void UnwrappedLineParser::parseCSharpAttribute() { 306 int UnpairedSquareBrackets = 1; 307 do { 308 switch (FormatTok->Tok.getKind()) { 309 case tok::r_square: 310 nextToken(); 311 --UnpairedSquareBrackets; 312 if (UnpairedSquareBrackets == 0) { 313 addUnwrappedLine(); 314 return; 315 } 316 break; 317 case tok::l_square: 318 ++UnpairedSquareBrackets; 319 nextToken(); 320 break; 321 default: 322 nextToken(); 323 break; 324 } 325 } while (!eof()); 326 } 327 328 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 329 if (!Lines.empty() && Lines.back().InPPDirective) 330 return true; 331 332 const FormatToken *Previous = Tokens->getPreviousToken(); 333 return Previous && Previous->is(tok::comment) && 334 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 335 } 336 337 /// \brief Parses a level, that is ???. 338 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level 339 /// \param CanContainBracedList If the content can contain (at any level) a 340 /// braced list. 341 /// \param NextLBracesType The type for left brace found in this level. 342 /// \param IfKind The \p if statement kind in the level. 343 /// \param IfLeftBrace The left brace of the \p if block in the level. 344 /// \returns true if a simple block of if/else/for/while, or false otherwise. 345 /// (A simple block has a single statement.) 346 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 347 bool CanContainBracedList, 348 TokenType NextLBracesType, 349 IfStmtKind *IfKind, 350 FormatToken **IfLeftBrace) { 351 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace 352 ? TT_BracedListLBrace 353 : TT_Unknown; 354 const bool IsPrecededByCommentOrPPDirective = 355 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 356 FormatToken *IfLBrace = nullptr; 357 bool HasDoWhile = false; 358 bool HasLabel = false; 359 unsigned StatementCount = 0; 360 bool SwitchLabelEncountered = false; 361 362 do { 363 if (FormatTok->getType() == TT_AttributeMacro) { 364 nextToken(); 365 continue; 366 } 367 tok::TokenKind kind = FormatTok->Tok.getKind(); 368 if (FormatTok->getType() == TT_MacroBlockBegin) 369 kind = tok::l_brace; 370 else if (FormatTok->getType() == TT_MacroBlockEnd) 371 kind = tok::r_brace; 372 373 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind, 374 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] { 375 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind, 376 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile, 377 HasLabel ? nullptr : &HasLabel); 378 ++StatementCount; 379 assert(StatementCount > 0 && "StatementCount overflow!"); 380 }; 381 382 switch (kind) { 383 case tok::comment: 384 nextToken(); 385 addUnwrappedLine(); 386 break; 387 case tok::l_brace: 388 if (NextLBracesType != TT_Unknown) { 389 FormatTok->setFinalizedType(NextLBracesType); 390 } else if (FormatTok->Previous && 391 FormatTok->Previous->ClosesRequiresClause) { 392 // We need the 'default' case here to correctly parse a function 393 // l_brace. 394 ParseDefault(); 395 continue; 396 } 397 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && 398 tryToParseBracedList()) { 399 continue; 400 } 401 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 402 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr, 403 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, 404 NextLBracesType); 405 ++StatementCount; 406 assert(StatementCount > 0 && "StatementCount overflow!"); 407 addUnwrappedLine(); 408 break; 409 case tok::r_brace: 410 if (OpeningBrace) { 411 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 412 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 413 return false; 414 } 415 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 416 HasDoWhile || IsPrecededByCommentOrPPDirective || 417 precededByCommentOrPPDirective()) { 418 return false; 419 } 420 const FormatToken *Next = Tokens->peekNextToken(); 421 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 422 return false; 423 if (IfLeftBrace) 424 *IfLeftBrace = IfLBrace; 425 return true; 426 } 427 nextToken(); 428 addUnwrappedLine(); 429 break; 430 case tok::kw_default: { 431 unsigned StoredPosition = Tokens->getPosition(); 432 FormatToken *Next; 433 do { 434 Next = Tokens->getNextToken(); 435 assert(Next); 436 } while (Next->is(tok::comment)); 437 FormatTok = Tokens->setPosition(StoredPosition); 438 if (Next->isNot(tok::colon)) { 439 // default not followed by ':' is not a case label; treat it like 440 // an identifier. 441 parseStructuralElement(); 442 break; 443 } 444 // Else, if it is 'default:', fall through to the case handling. 445 [[fallthrough]]; 446 } 447 case tok::kw_case: 448 if (Style.isProto() || Style.isVerilog() || 449 (Style.isJavaScript() && Line->MustBeDeclaration)) { 450 // Proto: there are no switch/case statements 451 // Verilog: Case labels don't have this word. We handle case 452 // labels including default in TokenAnnotator. 453 // JavaScript: A 'case: string' style field declaration. 454 ParseDefault(); 455 break; 456 } 457 if (!SwitchLabelEncountered && 458 (Style.IndentCaseLabels || 459 (Line->InPPDirective && Line->Level == 1))) { 460 ++Line->Level; 461 } 462 SwitchLabelEncountered = true; 463 parseStructuralElement(); 464 break; 465 case tok::l_square: 466 if (Style.isCSharp()) { 467 nextToken(); 468 parseCSharpAttribute(); 469 break; 470 } 471 if (handleCppAttributes()) 472 break; 473 [[fallthrough]]; 474 default: 475 ParseDefault(); 476 break; 477 } 478 } while (!eof()); 479 480 return false; 481 } 482 483 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 484 // We'll parse forward through the tokens until we hit 485 // a closing brace or eof - note that getNextToken() will 486 // parse macros, so this will magically work inside macro 487 // definitions, too. 488 unsigned StoredPosition = Tokens->getPosition(); 489 FormatToken *Tok = FormatTok; 490 const FormatToken *PrevTok = Tok->Previous; 491 // Keep a stack of positions of lbrace tokens. We will 492 // update information about whether an lbrace starts a 493 // braced init list or a different block during the loop. 494 struct StackEntry { 495 FormatToken *Tok; 496 const FormatToken *PrevTok; 497 }; 498 SmallVector<StackEntry, 8> LBraceStack; 499 assert(Tok->is(tok::l_brace)); 500 do { 501 // Get next non-comment token. 502 FormatToken *NextTok; 503 do { 504 NextTok = Tokens->getNextToken(); 505 } while (NextTok->is(tok::comment)); 506 507 switch (Tok->Tok.getKind()) { 508 case tok::l_brace: 509 if (Style.isJavaScript() && PrevTok) { 510 if (PrevTok->isOneOf(tok::colon, tok::less)) { 511 // A ':' indicates this code is in a type, or a braced list 512 // following a label in an object literal ({a: {b: 1}}). 513 // A '<' could be an object used in a comparison, but that is nonsense 514 // code (can never return true), so more likely it is a generic type 515 // argument (`X<{a: string; b: number}>`). 516 // The code below could be confused by semicolons between the 517 // individual members in a type member list, which would normally 518 // trigger BK_Block. In both cases, this must be parsed as an inline 519 // braced init. 520 Tok->setBlockKind(BK_BracedInit); 521 } else if (PrevTok->is(tok::r_paren)) { 522 // `) { }` can only occur in function or method declarations in JS. 523 Tok->setBlockKind(BK_Block); 524 } 525 } else { 526 Tok->setBlockKind(BK_Unknown); 527 } 528 LBraceStack.push_back({Tok, PrevTok}); 529 break; 530 case tok::r_brace: 531 if (LBraceStack.empty()) 532 break; 533 if (LBraceStack.back().Tok->is(BK_Unknown)) { 534 bool ProbablyBracedList = false; 535 if (Style.Language == FormatStyle::LK_Proto) { 536 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 537 } else { 538 // Skip NextTok over preprocessor lines, otherwise we may not 539 // properly diagnose the block as a braced intializer 540 // if the comma separator appears after the pp directive. 541 while (NextTok->is(tok::hash)) { 542 ScopedMacroState MacroState(*Line, Tokens, NextTok); 543 do { 544 NextTok = Tokens->getNextToken(); 545 } while (NextTok->isNot(tok::eof)); 546 } 547 548 // Using OriginalColumn to distinguish between ObjC methods and 549 // binary operators is a bit hacky. 550 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 551 NextTok->OriginalColumn == 0; 552 553 // Try to detect a braced list. Note that regardless how we mark inner 554 // braces here, we will overwrite the BlockKind later if we parse a 555 // braced list (where all blocks inside are by default braced lists), 556 // or when we explicitly detect blocks (for example while parsing 557 // lambdas). 558 559 // If we already marked the opening brace as braced list, the closing 560 // must also be part of it. 561 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace); 562 563 ProbablyBracedList = ProbablyBracedList || 564 (Style.isJavaScript() && 565 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 566 Keywords.kw_as)); 567 ProbablyBracedList = ProbablyBracedList || 568 (Style.isCpp() && NextTok->is(tok::l_paren)); 569 570 // If there is a comma, semicolon or right paren after the closing 571 // brace, we assume this is a braced initializer list. 572 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 573 // braced list in JS. 574 ProbablyBracedList = 575 ProbablyBracedList || 576 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 577 tok::r_paren, tok::r_square, tok::ellipsis); 578 579 // Distinguish between braced list in a constructor initializer list 580 // followed by constructor body, or just adjacent blocks. 581 ProbablyBracedList = 582 ProbablyBracedList || 583 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 584 LBraceStack.back().PrevTok->is(tok::identifier)); 585 586 ProbablyBracedList = 587 ProbablyBracedList || 588 (NextTok->is(tok::identifier) && 589 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 590 591 ProbablyBracedList = ProbablyBracedList || 592 (NextTok->is(tok::semi) && 593 (!ExpectClassBody || LBraceStack.size() != 1)); 594 595 ProbablyBracedList = 596 ProbablyBracedList || 597 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 598 599 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 600 // We can have an array subscript after a braced init 601 // list, but C++11 attributes are expected after blocks. 602 NextTok = Tokens->getNextToken(); 603 ProbablyBracedList = NextTok->isNot(tok::l_square); 604 } 605 } 606 if (ProbablyBracedList) { 607 Tok->setBlockKind(BK_BracedInit); 608 LBraceStack.back().Tok->setBlockKind(BK_BracedInit); 609 } else { 610 Tok->setBlockKind(BK_Block); 611 LBraceStack.back().Tok->setBlockKind(BK_Block); 612 } 613 } 614 LBraceStack.pop_back(); 615 break; 616 case tok::identifier: 617 if (!Tok->is(TT_StatementMacro)) 618 break; 619 [[fallthrough]]; 620 case tok::at: 621 case tok::semi: 622 case tok::kw_if: 623 case tok::kw_while: 624 case tok::kw_for: 625 case tok::kw_switch: 626 case tok::kw_try: 627 case tok::kw___try: 628 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 629 LBraceStack.back().Tok->setBlockKind(BK_Block); 630 break; 631 default: 632 break; 633 } 634 PrevTok = Tok; 635 Tok = NextTok; 636 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 637 638 // Assume other blocks for all unclosed opening braces. 639 for (const auto &Entry : LBraceStack) 640 if (Entry.Tok->is(BK_Unknown)) 641 Entry.Tok->setBlockKind(BK_Block); 642 643 FormatTok = Tokens->setPosition(StoredPosition); 644 } 645 646 template <class T> 647 static inline void hash_combine(std::size_t &seed, const T &v) { 648 std::hash<T> hasher; 649 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 650 } 651 652 size_t UnwrappedLineParser::computePPHash() const { 653 size_t h = 0; 654 for (const auto &i : PPStack) { 655 hash_combine(h, size_t(i.Kind)); 656 hash_combine(h, i.Line); 657 } 658 return h; 659 } 660 661 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 662 // is not null, subtracts its length (plus the preceding space) when computing 663 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 664 // running the token annotator on it so that we can restore them afterward. 665 bool UnwrappedLineParser::mightFitOnOneLine( 666 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 667 const auto ColumnLimit = Style.ColumnLimit; 668 if (ColumnLimit == 0) 669 return true; 670 671 auto &Tokens = ParsedLine.Tokens; 672 assert(!Tokens.empty()); 673 674 const auto *LastToken = Tokens.back().Tok; 675 assert(LastToken); 676 677 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 678 679 int Index = 0; 680 for (const auto &Token : Tokens) { 681 assert(Token.Tok); 682 auto &SavedToken = SavedTokens[Index++]; 683 SavedToken.Tok = new FormatToken; 684 SavedToken.Tok->copyFrom(*Token.Tok); 685 SavedToken.Children = std::move(Token.Children); 686 } 687 688 AnnotatedLine Line(ParsedLine); 689 assert(Line.Last == LastToken); 690 691 TokenAnnotator Annotator(Style, Keywords); 692 Annotator.annotate(Line); 693 Annotator.calculateFormattingInformation(Line); 694 695 auto Length = LastToken->TotalLength; 696 if (OpeningBrace) { 697 assert(OpeningBrace != Tokens.front().Tok); 698 if (auto Prev = OpeningBrace->Previous; 699 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 700 Length -= ColumnLimit; 701 } 702 Length -= OpeningBrace->TokenText.size() + 1; 703 } 704 705 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 706 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 707 Length -= FirstToken->TokenText.size() + 1; 708 } 709 710 Index = 0; 711 for (auto &Token : Tokens) { 712 const auto &SavedToken = SavedTokens[Index++]; 713 Token.Tok->copyFrom(*SavedToken.Tok); 714 Token.Children = std::move(SavedToken.Children); 715 delete SavedToken.Tok; 716 } 717 718 // If these change PPLevel needs to be used for get correct indentation. 719 assert(!Line.InMacroBody); 720 assert(!Line.InPPDirective); 721 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 722 } 723 724 FormatToken *UnwrappedLineParser::parseBlock( 725 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, 726 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces, 727 bool CanContainBracedList, TokenType NextLBracesType) { 728 auto HandleVerilogBlockLabel = [this]() { 729 // ":" name 730 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 731 nextToken(); 732 if (Keywords.isVerilogIdentifier(*FormatTok)) 733 nextToken(); 734 } 735 }; 736 737 // Whether this is a Verilog-specific block that has a special header like a 738 // module. 739 const bool VerilogHierarchy = 740 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 741 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 742 (Style.isVerilog() && 743 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 744 "'{' or macro block token expected"); 745 FormatToken *Tok = FormatTok; 746 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 747 auto Index = CurrentLines->size(); 748 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 749 FormatTok->setBlockKind(BK_Block); 750 751 // For Whitesmiths mode, jump to the next level prior to skipping over the 752 // braces. 753 if (!VerilogHierarchy && AddLevels > 0 && 754 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 755 ++Line->Level; 756 } 757 758 size_t PPStartHash = computePPHash(); 759 760 const unsigned InitialLevel = Line->Level; 761 if (VerilogHierarchy) { 762 AddLevels += parseVerilogHierarchyHeader(); 763 } else { 764 nextToken(/*LevelDifference=*/AddLevels); 765 HandleVerilogBlockLabel(); 766 } 767 768 // Bail out if there are too many levels. Otherwise, the stack might overflow. 769 if (Line->Level > 300) 770 return nullptr; 771 772 if (MacroBlock && FormatTok->is(tok::l_paren)) 773 parseParens(); 774 775 size_t NbPreprocessorDirectives = 776 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 777 addUnwrappedLine(); 778 size_t OpeningLineIndex = 779 CurrentLines->empty() 780 ? (UnwrappedLine::kInvalidIndex) 781 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 782 783 // Whitesmiths is weird here. The brace needs to be indented for the namespace 784 // block, but the block itself may not be indented depending on the style 785 // settings. This allows the format to back up one level in those cases. 786 if (UnindentWhitesmithsBraces) 787 --Line->Level; 788 789 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 790 MustBeDeclaration); 791 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 792 Line->Level += AddLevels; 793 794 FormatToken *IfLBrace = nullptr; 795 const bool SimpleBlock = 796 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace); 797 798 if (eof()) 799 return IfLBrace; 800 801 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 802 : !FormatTok->is(tok::r_brace)) { 803 Line->Level = InitialLevel; 804 FormatTok->setBlockKind(BK_Block); 805 return IfLBrace; 806 } 807 808 const bool IsFunctionRBrace = 809 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 810 811 auto RemoveBraces = [=]() mutable { 812 if (!SimpleBlock) 813 return false; 814 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 815 assert(FormatTok->is(tok::r_brace)); 816 const bool WrappedOpeningBrace = !Tok->Previous; 817 if (WrappedOpeningBrace && FollowedByComment) 818 return false; 819 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 820 if (KeepBraces && !HasRequiredIfBraces) 821 return false; 822 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 823 const FormatToken *Previous = Tokens->getPreviousToken(); 824 assert(Previous); 825 if (Previous->is(tok::r_brace) && !Previous->Optional) 826 return false; 827 } 828 assert(!CurrentLines->empty()); 829 auto &LastLine = CurrentLines->back(); 830 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 831 return false; 832 if (Tok->is(TT_ElseLBrace)) 833 return true; 834 if (WrappedOpeningBrace) { 835 assert(Index > 0); 836 --Index; // The line above the wrapped l_brace. 837 Tok = nullptr; 838 } 839 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 840 }; 841 if (RemoveBraces()) { 842 Tok->MatchingParen = FormatTok; 843 FormatTok->MatchingParen = Tok; 844 } 845 846 size_t PPEndHash = computePPHash(); 847 848 // Munch the closing brace. 849 nextToken(/*LevelDifference=*/-AddLevels); 850 851 // When this is a function block and there is an unnecessary semicolon 852 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 853 // it later). 854 if (Style.RemoveSemicolon && IsFunctionRBrace) { 855 while (FormatTok->is(tok::semi)) { 856 FormatTok->Optional = true; 857 nextToken(); 858 } 859 } 860 861 HandleVerilogBlockLabel(); 862 863 if (MacroBlock && FormatTok->is(tok::l_paren)) 864 parseParens(); 865 866 Line->Level = InitialLevel; 867 868 if (FormatTok->is(tok::kw_noexcept)) { 869 // A noexcept in a requires expression. 870 nextToken(); 871 } 872 873 if (FormatTok->is(tok::arrow)) { 874 // Following the } or noexcept we can find a trailing return type arrow 875 // as part of an implicit conversion constraint. 876 nextToken(); 877 parseStructuralElement(); 878 } 879 880 if (MunchSemi && FormatTok->is(tok::semi)) 881 nextToken(); 882 883 if (PPStartHash == PPEndHash) { 884 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 885 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 886 // Update the opening line to add the forward reference as well 887 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 888 CurrentLines->size() - 1; 889 } 890 } 891 892 return IfLBrace; 893 } 894 895 static bool isGoogScope(const UnwrappedLine &Line) { 896 // FIXME: Closure-library specific stuff should not be hard-coded but be 897 // configurable. 898 if (Line.Tokens.size() < 4) 899 return false; 900 auto I = Line.Tokens.begin(); 901 if (I->Tok->TokenText != "goog") 902 return false; 903 ++I; 904 if (I->Tok->isNot(tok::period)) 905 return false; 906 ++I; 907 if (I->Tok->TokenText != "scope") 908 return false; 909 ++I; 910 return I->Tok->is(tok::l_paren); 911 } 912 913 static bool isIIFE(const UnwrappedLine &Line, 914 const AdditionalKeywords &Keywords) { 915 // Look for the start of an immediately invoked anonymous function. 916 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 917 // This is commonly done in JavaScript to create a new, anonymous scope. 918 // Example: (function() { ... })() 919 if (Line.Tokens.size() < 3) 920 return false; 921 auto I = Line.Tokens.begin(); 922 if (I->Tok->isNot(tok::l_paren)) 923 return false; 924 ++I; 925 if (I->Tok->isNot(Keywords.kw_function)) 926 return false; 927 ++I; 928 return I->Tok->is(tok::l_paren); 929 } 930 931 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 932 const FormatToken &InitialToken) { 933 tok::TokenKind Kind = InitialToken.Tok.getKind(); 934 if (InitialToken.is(TT_NamespaceMacro)) 935 Kind = tok::kw_namespace; 936 937 switch (Kind) { 938 case tok::kw_namespace: 939 return Style.BraceWrapping.AfterNamespace; 940 case tok::kw_class: 941 return Style.BraceWrapping.AfterClass; 942 case tok::kw_union: 943 return Style.BraceWrapping.AfterUnion; 944 case tok::kw_struct: 945 return Style.BraceWrapping.AfterStruct; 946 case tok::kw_enum: 947 return Style.BraceWrapping.AfterEnum; 948 default: 949 return false; 950 } 951 } 952 953 void UnwrappedLineParser::parseChildBlock( 954 bool CanContainBracedList, clang::format::TokenType NextLBracesType) { 955 assert(FormatTok->is(tok::l_brace)); 956 FormatTok->setBlockKind(BK_Block); 957 const FormatToken *OpeningBrace = FormatTok; 958 nextToken(); 959 { 960 bool SkipIndent = (Style.isJavaScript() && 961 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 962 ScopedLineState LineState(*this); 963 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 964 /*MustBeDeclaration=*/false); 965 Line->Level += SkipIndent ? 0 : 1; 966 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType); 967 flushComments(isOnNewLine(*FormatTok)); 968 Line->Level -= SkipIndent ? 0 : 1; 969 } 970 nextToken(); 971 } 972 973 void UnwrappedLineParser::parsePPDirective() { 974 assert(FormatTok->is(tok::hash) && "'#' expected"); 975 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 976 977 nextToken(); 978 979 if (!FormatTok->Tok.getIdentifierInfo()) { 980 parsePPUnknown(); 981 return; 982 } 983 984 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 985 case tok::pp_define: 986 parsePPDefine(); 987 return; 988 case tok::pp_if: 989 parsePPIf(/*IfDef=*/false); 990 break; 991 case tok::pp_ifdef: 992 case tok::pp_ifndef: 993 parsePPIf(/*IfDef=*/true); 994 break; 995 case tok::pp_else: 996 case tok::pp_elifdef: 997 case tok::pp_elifndef: 998 case tok::pp_elif: 999 parsePPElse(); 1000 break; 1001 case tok::pp_endif: 1002 parsePPEndIf(); 1003 break; 1004 case tok::pp_pragma: 1005 parsePPPragma(); 1006 break; 1007 default: 1008 parsePPUnknown(); 1009 break; 1010 } 1011 } 1012 1013 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1014 size_t Line = CurrentLines->size(); 1015 if (CurrentLines == &PreprocessorDirectives) 1016 Line += Lines.size(); 1017 1018 if (Unreachable || 1019 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1020 PPStack.push_back({PP_Unreachable, Line}); 1021 } else { 1022 PPStack.push_back({PP_Conditional, Line}); 1023 } 1024 } 1025 1026 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1027 ++PPBranchLevel; 1028 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1029 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1030 PPLevelBranchIndex.push_back(0); 1031 PPLevelBranchCount.push_back(0); 1032 } 1033 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1034 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1035 conditionalCompilationCondition(Unreachable || Skip); 1036 } 1037 1038 void UnwrappedLineParser::conditionalCompilationAlternative() { 1039 if (!PPStack.empty()) 1040 PPStack.pop_back(); 1041 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1042 if (!PPChainBranchIndex.empty()) 1043 ++PPChainBranchIndex.top(); 1044 conditionalCompilationCondition( 1045 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1046 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1047 } 1048 1049 void UnwrappedLineParser::conditionalCompilationEnd() { 1050 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1051 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1052 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1053 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1054 } 1055 // Guard against #endif's without #if. 1056 if (PPBranchLevel > -1) 1057 --PPBranchLevel; 1058 if (!PPChainBranchIndex.empty()) 1059 PPChainBranchIndex.pop(); 1060 if (!PPStack.empty()) 1061 PPStack.pop_back(); 1062 } 1063 1064 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1065 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1066 nextToken(); 1067 bool Unreachable = false; 1068 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1069 Unreachable = true; 1070 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1071 Unreachable = true; 1072 conditionalCompilationStart(Unreachable); 1073 FormatToken *IfCondition = FormatTok; 1074 // If there's a #ifndef on the first line, and the only lines before it are 1075 // comments, it could be an include guard. 1076 bool MaybeIncludeGuard = IfNDef; 1077 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1078 for (auto &Line : Lines) { 1079 if (!Line.Tokens.front().Tok->is(tok::comment)) { 1080 MaybeIncludeGuard = false; 1081 IncludeGuard = IG_Rejected; 1082 break; 1083 } 1084 } 1085 } 1086 --PPBranchLevel; 1087 parsePPUnknown(); 1088 ++PPBranchLevel; 1089 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1090 IncludeGuard = IG_IfNdefed; 1091 IncludeGuardToken = IfCondition; 1092 } 1093 } 1094 1095 void UnwrappedLineParser::parsePPElse() { 1096 // If a potential include guard has an #else, it's not an include guard. 1097 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1098 IncludeGuard = IG_Rejected; 1099 // Don't crash when there is an #else without an #if. 1100 assert(PPBranchLevel >= -1); 1101 if (PPBranchLevel == -1) 1102 conditionalCompilationStart(/*Unreachable=*/true); 1103 conditionalCompilationAlternative(); 1104 --PPBranchLevel; 1105 parsePPUnknown(); 1106 ++PPBranchLevel; 1107 } 1108 1109 void UnwrappedLineParser::parsePPEndIf() { 1110 conditionalCompilationEnd(); 1111 parsePPUnknown(); 1112 // If the #endif of a potential include guard is the last thing in the file, 1113 // then we found an include guard. 1114 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1115 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1116 IncludeGuard = IG_Found; 1117 } 1118 } 1119 1120 void UnwrappedLineParser::parsePPDefine() { 1121 nextToken(); 1122 1123 if (!FormatTok->Tok.getIdentifierInfo()) { 1124 IncludeGuard = IG_Rejected; 1125 IncludeGuardToken = nullptr; 1126 parsePPUnknown(); 1127 return; 1128 } 1129 1130 if (IncludeGuard == IG_IfNdefed && 1131 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1132 IncludeGuard = IG_Defined; 1133 IncludeGuardToken = nullptr; 1134 for (auto &Line : Lines) { 1135 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1136 IncludeGuard = IG_Rejected; 1137 break; 1138 } 1139 } 1140 } 1141 1142 // In the context of a define, even keywords should be treated as normal 1143 // identifiers. Setting the kind to identifier is not enough, because we need 1144 // to treat additional keywords like __except as well, which are already 1145 // identifiers. Setting the identifier info to null interferes with include 1146 // guard processing above, and changes preprocessing nesting. 1147 FormatTok->Tok.setKind(tok::identifier); 1148 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1149 nextToken(); 1150 if (FormatTok->Tok.getKind() == tok::l_paren && 1151 !FormatTok->hasWhitespaceBefore()) { 1152 parseParens(); 1153 } 1154 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1155 Line->Level += PPBranchLevel + 1; 1156 addUnwrappedLine(); 1157 ++Line->Level; 1158 1159 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1160 assert((int)Line->PPLevel >= 0); 1161 Line->InMacroBody = true; 1162 1163 // Errors during a preprocessor directive can only affect the layout of the 1164 // preprocessor directive, and thus we ignore them. An alternative approach 1165 // would be to use the same approach we use on the file level (no 1166 // re-indentation if there was a structural error) within the macro 1167 // definition. 1168 parseFile(); 1169 } 1170 1171 void UnwrappedLineParser::parsePPPragma() { 1172 Line->InPragmaDirective = true; 1173 parsePPUnknown(); 1174 } 1175 1176 void UnwrappedLineParser::parsePPUnknown() { 1177 do { 1178 nextToken(); 1179 } while (!eof()); 1180 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1181 Line->Level += PPBranchLevel + 1; 1182 addUnwrappedLine(); 1183 } 1184 1185 // Here we exclude certain tokens that are not usually the first token in an 1186 // unwrapped line. This is used in attempt to distinguish macro calls without 1187 // trailing semicolons from other constructs split to several lines. 1188 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1189 // Semicolon can be a null-statement, l_square can be a start of a macro or 1190 // a C++11 attribute, but this doesn't seem to be common. 1191 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1192 Tok.isNot(TT_AttributeSquare) && 1193 // Tokens that can only be used as binary operators and a part of 1194 // overloaded operator names. 1195 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1196 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1197 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1198 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1199 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1200 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1201 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1202 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1203 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1204 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1205 Tok.isNot(tok::lesslessequal) && 1206 // Colon is used in labels, base class lists, initializer lists, 1207 // range-based for loops, ternary operator, but should never be the 1208 // first token in an unwrapped line. 1209 Tok.isNot(tok::colon) && 1210 // 'noexcept' is a trailing annotation. 1211 Tok.isNot(tok::kw_noexcept); 1212 } 1213 1214 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1215 const FormatToken *FormatTok) { 1216 // FIXME: This returns true for C/C++ keywords like 'struct'. 1217 return FormatTok->is(tok::identifier) && 1218 (!FormatTok->Tok.getIdentifierInfo() || 1219 !FormatTok->isOneOf( 1220 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1221 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1222 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1223 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1224 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1225 Keywords.kw_instanceof, Keywords.kw_interface, 1226 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1227 } 1228 1229 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1230 const FormatToken *FormatTok) { 1231 return FormatTok->Tok.isLiteral() || 1232 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1233 mustBeJSIdent(Keywords, FormatTok); 1234 } 1235 1236 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1237 // when encountered after a value (see mustBeJSIdentOrValue). 1238 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1239 const FormatToken *FormatTok) { 1240 return FormatTok->isOneOf( 1241 tok::kw_return, Keywords.kw_yield, 1242 // conditionals 1243 tok::kw_if, tok::kw_else, 1244 // loops 1245 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1246 // switch/case 1247 tok::kw_switch, tok::kw_case, 1248 // exceptions 1249 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1250 // declaration 1251 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1252 Keywords.kw_async, Keywords.kw_function, 1253 // import/export 1254 Keywords.kw_import, tok::kw_export); 1255 } 1256 1257 // Checks whether a token is a type in K&R C (aka C78). 1258 static bool isC78Type(const FormatToken &Tok) { 1259 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1260 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1261 tok::identifier); 1262 } 1263 1264 // This function checks whether a token starts the first parameter declaration 1265 // in a K&R C (aka C78) function definition, e.g.: 1266 // int f(a, b) 1267 // short a, b; 1268 // { 1269 // return a + b; 1270 // } 1271 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1272 const FormatToken *FuncName) { 1273 assert(Tok); 1274 assert(Next); 1275 assert(FuncName); 1276 1277 if (FuncName->isNot(tok::identifier)) 1278 return false; 1279 1280 const FormatToken *Prev = FuncName->Previous; 1281 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1282 return false; 1283 1284 if (!isC78Type(*Tok) && 1285 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1286 return false; 1287 } 1288 1289 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1290 return false; 1291 1292 Tok = Tok->Previous; 1293 if (!Tok || Tok->isNot(tok::r_paren)) 1294 return false; 1295 1296 Tok = Tok->Previous; 1297 if (!Tok || Tok->isNot(tok::identifier)) 1298 return false; 1299 1300 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1301 } 1302 1303 bool UnwrappedLineParser::parseModuleImport() { 1304 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1305 1306 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1307 !Token->Tok.getIdentifierInfo() && 1308 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1309 return false; 1310 } 1311 1312 nextToken(); 1313 while (!eof()) { 1314 if (FormatTok->is(tok::colon)) { 1315 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1316 } 1317 // Handle import <foo/bar.h> as we would an include statement. 1318 else if (FormatTok->is(tok::less)) { 1319 nextToken(); 1320 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1321 // Mark tokens up to the trailing line comments as implicit string 1322 // literals. 1323 if (FormatTok->isNot(tok::comment) && 1324 !FormatTok->TokenText.startswith("//")) { 1325 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1326 } 1327 nextToken(); 1328 } 1329 } 1330 if (FormatTok->is(tok::semi)) { 1331 nextToken(); 1332 break; 1333 } 1334 nextToken(); 1335 } 1336 1337 addUnwrappedLine(); 1338 return true; 1339 } 1340 1341 // readTokenWithJavaScriptASI reads the next token and terminates the current 1342 // line if JavaScript Automatic Semicolon Insertion must 1343 // happen between the current token and the next token. 1344 // 1345 // This method is conservative - it cannot cover all edge cases of JavaScript, 1346 // but only aims to correctly handle certain well known cases. It *must not* 1347 // return true in speculative cases. 1348 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1349 FormatToken *Previous = FormatTok; 1350 readToken(); 1351 FormatToken *Next = FormatTok; 1352 1353 bool IsOnSameLine = 1354 CommentsBeforeNextToken.empty() 1355 ? Next->NewlinesBefore == 0 1356 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1357 if (IsOnSameLine) 1358 return; 1359 1360 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1361 bool PreviousStartsTemplateExpr = 1362 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1363 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1364 // If the line contains an '@' sign, the previous token might be an 1365 // annotation, which can precede another identifier/value. 1366 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1367 return LineNode.Tok->is(tok::at); 1368 }); 1369 if (HasAt) 1370 return; 1371 } 1372 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1373 return addUnwrappedLine(); 1374 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1375 bool NextEndsTemplateExpr = 1376 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1377 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1378 (PreviousMustBeValue || 1379 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1380 tok::minusminus))) { 1381 return addUnwrappedLine(); 1382 } 1383 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1384 isJSDeclOrStmt(Keywords, Next)) { 1385 return addUnwrappedLine(); 1386 } 1387 } 1388 1389 void UnwrappedLineParser::parseStructuralElement( 1390 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind, 1391 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1392 if (Style.Language == FormatStyle::LK_TableGen && 1393 FormatTok->is(tok::pp_include)) { 1394 nextToken(); 1395 if (FormatTok->is(tok::string_literal)) 1396 nextToken(); 1397 addUnwrappedLine(); 1398 return; 1399 } 1400 1401 if (Style.isVerilog()) { 1402 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1403 parseForOrWhileLoop(/*HasParens=*/false); 1404 return; 1405 } 1406 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1407 parseForOrWhileLoop(); 1408 return; 1409 } 1410 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1411 Keywords.kw_assume, Keywords.kw_cover)) { 1412 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1413 return; 1414 } 1415 1416 // Skip things that can exist before keywords like 'if' and 'case'. 1417 while (true) { 1418 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1419 Keywords.kw_unique0)) { 1420 nextToken(); 1421 } else if (FormatTok->is(tok::l_paren) && 1422 Tokens->peekNextToken()->is(tok::star)) { 1423 parseParens(); 1424 } else { 1425 break; 1426 } 1427 } 1428 } 1429 1430 // Tokens that only make sense at the beginning of a line. 1431 switch (FormatTok->Tok.getKind()) { 1432 case tok::kw_asm: 1433 nextToken(); 1434 if (FormatTok->is(tok::l_brace)) { 1435 FormatTok->setFinalizedType(TT_InlineASMBrace); 1436 nextToken(); 1437 while (FormatTok && !eof()) { 1438 if (FormatTok->is(tok::r_brace)) { 1439 FormatTok->setFinalizedType(TT_InlineASMBrace); 1440 nextToken(); 1441 addUnwrappedLine(); 1442 break; 1443 } 1444 FormatTok->Finalized = true; 1445 nextToken(); 1446 } 1447 } 1448 break; 1449 case tok::kw_namespace: 1450 parseNamespace(); 1451 return; 1452 case tok::kw_public: 1453 case tok::kw_protected: 1454 case tok::kw_private: 1455 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1456 Style.isCSharp()) { 1457 nextToken(); 1458 } else { 1459 parseAccessSpecifier(); 1460 } 1461 return; 1462 case tok::kw_if: { 1463 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1464 // field/method declaration. 1465 break; 1466 } 1467 FormatToken *Tok = parseIfThenElse(IfKind); 1468 if (IfLeftBrace) 1469 *IfLeftBrace = Tok; 1470 return; 1471 } 1472 case tok::kw_for: 1473 case tok::kw_while: 1474 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1475 // field/method declaration. 1476 break; 1477 } 1478 parseForOrWhileLoop(); 1479 return; 1480 case tok::kw_do: 1481 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1482 // field/method declaration. 1483 break; 1484 } 1485 parseDoWhile(); 1486 if (HasDoWhile) 1487 *HasDoWhile = true; 1488 return; 1489 case tok::kw_switch: 1490 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1491 // 'switch: string' field declaration. 1492 break; 1493 } 1494 parseSwitch(); 1495 return; 1496 case tok::kw_default: 1497 // In Verilog default along with other labels are handled in the next loop. 1498 if (Style.isVerilog()) 1499 break; 1500 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1501 // 'default: string' field declaration. 1502 break; 1503 } 1504 nextToken(); 1505 if (FormatTok->is(tok::colon)) { 1506 FormatTok->setFinalizedType(TT_CaseLabelColon); 1507 parseLabel(); 1508 return; 1509 } 1510 // e.g. "default void f() {}" in a Java interface. 1511 break; 1512 case tok::kw_case: 1513 // Proto: there are no switch/case statements. 1514 if (Style.isProto()) { 1515 nextToken(); 1516 return; 1517 } 1518 if (Style.isVerilog()) { 1519 parseBlock(); 1520 addUnwrappedLine(); 1521 return; 1522 } 1523 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1524 // 'case: string' field declaration. 1525 nextToken(); 1526 break; 1527 } 1528 parseCaseLabel(); 1529 return; 1530 case tok::kw_try: 1531 case tok::kw___try: 1532 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1533 // field/method declaration. 1534 break; 1535 } 1536 parseTryCatch(); 1537 return; 1538 case tok::kw_extern: 1539 nextToken(); 1540 if (Style.isVerilog()) { 1541 // In Verilog and extern module declaration looks like a start of module. 1542 // But there is no body and endmodule. So we handle it separately. 1543 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1544 parseVerilogHierarchyHeader(); 1545 return; 1546 } 1547 } else if (FormatTok->is(tok::string_literal)) { 1548 nextToken(); 1549 if (FormatTok->is(tok::l_brace)) { 1550 if (Style.BraceWrapping.AfterExternBlock) 1551 addUnwrappedLine(); 1552 // Either we indent or for backwards compatibility we follow the 1553 // AfterExternBlock style. 1554 unsigned AddLevels = 1555 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1556 (Style.BraceWrapping.AfterExternBlock && 1557 Style.IndentExternBlock == 1558 FormatStyle::IEBS_AfterExternBlock) 1559 ? 1u 1560 : 0u; 1561 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1562 addUnwrappedLine(); 1563 return; 1564 } 1565 } 1566 break; 1567 case tok::kw_export: 1568 if (Style.isJavaScript()) { 1569 parseJavaScriptEs6ImportExport(); 1570 return; 1571 } 1572 if (Style.isCpp()) { 1573 nextToken(); 1574 if (FormatTok->is(tok::kw_namespace)) { 1575 parseNamespace(); 1576 return; 1577 } 1578 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1579 return; 1580 } 1581 break; 1582 case tok::kw_inline: 1583 nextToken(); 1584 if (FormatTok->is(tok::kw_namespace)) { 1585 parseNamespace(); 1586 return; 1587 } 1588 break; 1589 case tok::identifier: 1590 if (FormatTok->is(TT_ForEachMacro)) { 1591 parseForOrWhileLoop(); 1592 return; 1593 } 1594 if (FormatTok->is(TT_MacroBlockBegin)) { 1595 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1596 /*MunchSemi=*/false); 1597 return; 1598 } 1599 if (FormatTok->is(Keywords.kw_import)) { 1600 if (Style.isJavaScript()) { 1601 parseJavaScriptEs6ImportExport(); 1602 return; 1603 } 1604 if (Style.Language == FormatStyle::LK_Proto) { 1605 nextToken(); 1606 if (FormatTok->is(tok::kw_public)) 1607 nextToken(); 1608 if (!FormatTok->is(tok::string_literal)) 1609 return; 1610 nextToken(); 1611 if (FormatTok->is(tok::semi)) 1612 nextToken(); 1613 addUnwrappedLine(); 1614 return; 1615 } 1616 if (Style.isCpp() && parseModuleImport()) 1617 return; 1618 } 1619 if (Style.isCpp() && 1620 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1621 Keywords.kw_slots, Keywords.kw_qslots)) { 1622 nextToken(); 1623 if (FormatTok->is(tok::colon)) { 1624 nextToken(); 1625 addUnwrappedLine(); 1626 return; 1627 } 1628 } 1629 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1630 parseStatementMacro(); 1631 return; 1632 } 1633 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1634 parseNamespace(); 1635 return; 1636 } 1637 // In all other cases, parse the declaration. 1638 break; 1639 default: 1640 break; 1641 } 1642 do { 1643 const FormatToken *Previous = FormatTok->Previous; 1644 switch (FormatTok->Tok.getKind()) { 1645 case tok::at: 1646 nextToken(); 1647 if (FormatTok->is(tok::l_brace)) { 1648 nextToken(); 1649 parseBracedList(); 1650 break; 1651 } else if (Style.Language == FormatStyle::LK_Java && 1652 FormatTok->is(Keywords.kw_interface)) { 1653 nextToken(); 1654 break; 1655 } 1656 switch (FormatTok->Tok.getObjCKeywordID()) { 1657 case tok::objc_public: 1658 case tok::objc_protected: 1659 case tok::objc_package: 1660 case tok::objc_private: 1661 return parseAccessSpecifier(); 1662 case tok::objc_interface: 1663 case tok::objc_implementation: 1664 return parseObjCInterfaceOrImplementation(); 1665 case tok::objc_protocol: 1666 if (parseObjCProtocol()) 1667 return; 1668 break; 1669 case tok::objc_end: 1670 return; // Handled by the caller. 1671 case tok::objc_optional: 1672 case tok::objc_required: 1673 nextToken(); 1674 addUnwrappedLine(); 1675 return; 1676 case tok::objc_autoreleasepool: 1677 nextToken(); 1678 if (FormatTok->is(tok::l_brace)) { 1679 if (Style.BraceWrapping.AfterControlStatement == 1680 FormatStyle::BWACS_Always) { 1681 addUnwrappedLine(); 1682 } 1683 parseBlock(); 1684 } 1685 addUnwrappedLine(); 1686 return; 1687 case tok::objc_synchronized: 1688 nextToken(); 1689 if (FormatTok->is(tok::l_paren)) { 1690 // Skip synchronization object 1691 parseParens(); 1692 } 1693 if (FormatTok->is(tok::l_brace)) { 1694 if (Style.BraceWrapping.AfterControlStatement == 1695 FormatStyle::BWACS_Always) { 1696 addUnwrappedLine(); 1697 } 1698 parseBlock(); 1699 } 1700 addUnwrappedLine(); 1701 return; 1702 case tok::objc_try: 1703 // This branch isn't strictly necessary (the kw_try case below would 1704 // do this too after the tok::at is parsed above). But be explicit. 1705 parseTryCatch(); 1706 return; 1707 default: 1708 break; 1709 } 1710 break; 1711 case tok::kw_requires: { 1712 if (Style.isCpp()) { 1713 bool ParsedClause = parseRequires(); 1714 if (ParsedClause) 1715 return; 1716 } else { 1717 nextToken(); 1718 } 1719 break; 1720 } 1721 case tok::kw_enum: 1722 // Ignore if this is part of "template <enum ...". 1723 if (Previous && Previous->is(tok::less)) { 1724 nextToken(); 1725 break; 1726 } 1727 1728 // parseEnum falls through and does not yet add an unwrapped line as an 1729 // enum definition can start a structural element. 1730 if (!parseEnum()) 1731 break; 1732 // This only applies to C++ and Verilog. 1733 if (!Style.isCpp() && !Style.isVerilog()) { 1734 addUnwrappedLine(); 1735 return; 1736 } 1737 break; 1738 case tok::kw_typedef: 1739 nextToken(); 1740 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1741 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1742 Keywords.kw_CF_CLOSED_ENUM, 1743 Keywords.kw_NS_CLOSED_ENUM)) { 1744 parseEnum(); 1745 } 1746 break; 1747 case tok::kw_class: 1748 if (Style.isVerilog()) { 1749 parseBlock(); 1750 addUnwrappedLine(); 1751 return; 1752 } 1753 [[fallthrough]]; 1754 case tok::kw_struct: 1755 case tok::kw_union: 1756 if (parseStructLike()) 1757 return; 1758 break; 1759 case tok::period: 1760 nextToken(); 1761 // In Java, classes have an implicit static member "class". 1762 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1763 FormatTok->is(tok::kw_class)) { 1764 nextToken(); 1765 } 1766 if (Style.isJavaScript() && FormatTok && 1767 FormatTok->Tok.getIdentifierInfo()) { 1768 // JavaScript only has pseudo keywords, all keywords are allowed to 1769 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1770 nextToken(); 1771 } 1772 break; 1773 case tok::semi: 1774 nextToken(); 1775 addUnwrappedLine(); 1776 return; 1777 case tok::r_brace: 1778 addUnwrappedLine(); 1779 return; 1780 case tok::l_paren: { 1781 parseParens(); 1782 // Break the unwrapped line if a K&R C function definition has a parameter 1783 // declaration. 1784 if (!IsTopLevel || !Style.isCpp() || !Previous || eof()) 1785 break; 1786 if (isC78ParameterDecl(FormatTok, 1787 Tokens->peekNextToken(/*SkipComment=*/true), 1788 Previous)) { 1789 addUnwrappedLine(); 1790 return; 1791 } 1792 break; 1793 } 1794 case tok::kw_operator: 1795 nextToken(); 1796 if (FormatTok->isBinaryOperator()) 1797 nextToken(); 1798 break; 1799 case tok::caret: 1800 nextToken(); 1801 // Block return type. 1802 if (FormatTok->Tok.isAnyIdentifier() || 1803 FormatTok->isSimpleTypeSpecifier()) { 1804 nextToken(); 1805 // Return types: pointers are ok too. 1806 while (FormatTok->is(tok::star)) 1807 nextToken(); 1808 } 1809 // Block argument list. 1810 if (FormatTok->is(tok::l_paren)) 1811 parseParens(); 1812 // Block body. 1813 if (FormatTok->is(tok::l_brace)) 1814 parseChildBlock(); 1815 break; 1816 case tok::l_brace: 1817 if (NextLBracesType != TT_Unknown) 1818 FormatTok->setFinalizedType(NextLBracesType); 1819 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1820 // A block outside of parentheses must be the last part of a 1821 // structural element. 1822 // FIXME: Figure out cases where this is not true, and add projections 1823 // for them (the one we know is missing are lambdas). 1824 if (Style.Language == FormatStyle::LK_Java && 1825 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1826 // If necessary, we could set the type to something different than 1827 // TT_FunctionLBrace. 1828 if (Style.BraceWrapping.AfterControlStatement == 1829 FormatStyle::BWACS_Always) { 1830 addUnwrappedLine(); 1831 } 1832 } else if (Style.BraceWrapping.AfterFunction) { 1833 addUnwrappedLine(); 1834 } 1835 FormatTok->setFinalizedType(TT_FunctionLBrace); 1836 parseBlock(); 1837 addUnwrappedLine(); 1838 return; 1839 } 1840 // Otherwise this was a braced init list, and the structural 1841 // element continues. 1842 break; 1843 case tok::kw_try: 1844 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1845 // field/method declaration. 1846 nextToken(); 1847 break; 1848 } 1849 // We arrive here when parsing function-try blocks. 1850 if (Style.BraceWrapping.AfterFunction) 1851 addUnwrappedLine(); 1852 parseTryCatch(); 1853 return; 1854 case tok::identifier: { 1855 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1856 Line->MustBeDeclaration) { 1857 addUnwrappedLine(); 1858 parseCSharpGenericTypeConstraint(); 1859 break; 1860 } 1861 if (FormatTok->is(TT_MacroBlockEnd)) { 1862 addUnwrappedLine(); 1863 return; 1864 } 1865 1866 // Function declarations (as opposed to function expressions) are parsed 1867 // on their own unwrapped line by continuing this loop. Function 1868 // expressions (functions that are not on their own line) must not create 1869 // a new unwrapped line, so they are special cased below. 1870 size_t TokenCount = Line->Tokens.size(); 1871 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1872 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1873 Keywords.kw_async)))) { 1874 tryToParseJSFunction(); 1875 break; 1876 } 1877 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1878 FormatTok->is(Keywords.kw_interface)) { 1879 if (Style.isJavaScript()) { 1880 // In JavaScript/TypeScript, "interface" can be used as a standalone 1881 // identifier, e.g. in `var interface = 1;`. If "interface" is 1882 // followed by another identifier, it is very like to be an actual 1883 // interface declaration. 1884 unsigned StoredPosition = Tokens->getPosition(); 1885 FormatToken *Next = Tokens->getNextToken(); 1886 FormatTok = Tokens->setPosition(StoredPosition); 1887 if (!mustBeJSIdent(Keywords, Next)) { 1888 nextToken(); 1889 break; 1890 } 1891 } 1892 parseRecord(); 1893 addUnwrappedLine(); 1894 return; 1895 } 1896 1897 if (Style.isVerilog()) { 1898 if (FormatTok->is(Keywords.kw_table)) { 1899 parseVerilogTable(); 1900 return; 1901 } 1902 if (Keywords.isVerilogBegin(*FormatTok) || 1903 Keywords.isVerilogHierarchy(*FormatTok)) { 1904 parseBlock(); 1905 addUnwrappedLine(); 1906 return; 1907 } 1908 } 1909 1910 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) { 1911 if (parseStructLike()) 1912 return; 1913 break; 1914 } 1915 1916 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1917 parseStatementMacro(); 1918 return; 1919 } 1920 1921 // See if the following token should start a new unwrapped line. 1922 StringRef Text = FormatTok->TokenText; 1923 1924 FormatToken *PreviousToken = FormatTok; 1925 nextToken(); 1926 1927 // JS doesn't have macros, and within classes colons indicate fields, not 1928 // labels. 1929 if (Style.isJavaScript()) 1930 break; 1931 1932 auto OneTokenSoFar = [&]() { 1933 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 1934 while (I != E && I->Tok->is(tok::comment)) 1935 ++I; 1936 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash)) 1937 ++I; 1938 return I != E && (++I == E); 1939 }; 1940 if (OneTokenSoFar()) { 1941 // In Verilog labels can be any expression, so we don't do them here. 1942 if (!Style.isVerilog() && FormatTok->is(tok::colon) && 1943 !Line->MustBeDeclaration) { 1944 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1945 FormatTok->setFinalizedType(TT_GotoLabelColon); 1946 parseLabel(!Style.IndentGotoLabels); 1947 if (HasLabel) 1948 *HasLabel = true; 1949 return; 1950 } 1951 // Recognize function-like macro usages without trailing semicolon as 1952 // well as free-standing macros like Q_OBJECT. 1953 bool FunctionLike = FormatTok->is(tok::l_paren); 1954 if (FunctionLike) 1955 parseParens(); 1956 1957 bool FollowedByNewline = 1958 CommentsBeforeNextToken.empty() 1959 ? FormatTok->NewlinesBefore > 0 1960 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1961 1962 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1963 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1964 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 1965 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 1966 addUnwrappedLine(); 1967 return; 1968 } 1969 } 1970 break; 1971 } 1972 case tok::equal: 1973 if ((Style.isJavaScript() || Style.isCSharp()) && 1974 FormatTok->is(TT_FatArrow)) { 1975 tryToParseChildBlock(); 1976 break; 1977 } 1978 1979 nextToken(); 1980 if (FormatTok->is(tok::l_brace)) { 1981 // Block kind should probably be set to BK_BracedInit for any language. 1982 // C# needs this change to ensure that array initialisers and object 1983 // initialisers are indented the same way. 1984 if (Style.isCSharp()) 1985 FormatTok->setBlockKind(BK_BracedInit); 1986 nextToken(); 1987 parseBracedList(); 1988 } else if (Style.Language == FormatStyle::LK_Proto && 1989 FormatTok->is(tok::less)) { 1990 nextToken(); 1991 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1992 /*ClosingBraceKind=*/tok::greater); 1993 } 1994 break; 1995 case tok::l_square: 1996 parseSquare(); 1997 break; 1998 case tok::kw_new: 1999 parseNew(); 2000 break; 2001 case tok::kw_case: 2002 // Proto: there are no switch/case statements. 2003 if (Style.isProto()) { 2004 nextToken(); 2005 return; 2006 } 2007 // In Verilog switch is called case. 2008 if (Style.isVerilog()) { 2009 parseBlock(); 2010 addUnwrappedLine(); 2011 return; 2012 } 2013 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2014 // 'case: string' field declaration. 2015 nextToken(); 2016 break; 2017 } 2018 parseCaseLabel(); 2019 break; 2020 case tok::kw_default: 2021 nextToken(); 2022 if (Style.isVerilog()) { 2023 if (FormatTok->is(tok::colon)) { 2024 // The label will be handled in the next iteration. 2025 break; 2026 } 2027 if (FormatTok->is(Keywords.kw_clocking)) { 2028 // A default clocking block. 2029 parseBlock(); 2030 addUnwrappedLine(); 2031 return; 2032 } 2033 parseVerilogCaseLabel(); 2034 return; 2035 } 2036 break; 2037 case tok::colon: 2038 nextToken(); 2039 if (Style.isVerilog()) { 2040 parseVerilogCaseLabel(); 2041 return; 2042 } 2043 break; 2044 default: 2045 nextToken(); 2046 break; 2047 } 2048 } while (!eof()); 2049 } 2050 2051 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2052 assert(FormatTok->is(tok::l_brace)); 2053 if (!Style.isCSharp()) 2054 return false; 2055 // See if it's a property accessor. 2056 if (FormatTok->Previous->isNot(tok::identifier)) 2057 return false; 2058 2059 // See if we are inside a property accessor. 2060 // 2061 // Record the current tokenPosition so that we can advance and 2062 // reset the current token. `Next` is not set yet so we need 2063 // another way to advance along the token stream. 2064 unsigned int StoredPosition = Tokens->getPosition(); 2065 FormatToken *Tok = Tokens->getNextToken(); 2066 2067 // A trivial property accessor is of the form: 2068 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2069 // Track these as they do not require line breaks to be introduced. 2070 bool HasSpecialAccessor = false; 2071 bool IsTrivialPropertyAccessor = true; 2072 while (!eof()) { 2073 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 2074 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 2075 Keywords.kw_init, Keywords.kw_set)) { 2076 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2077 HasSpecialAccessor = true; 2078 Tok = Tokens->getNextToken(); 2079 continue; 2080 } 2081 if (Tok->isNot(tok::r_brace)) 2082 IsTrivialPropertyAccessor = false; 2083 break; 2084 } 2085 2086 if (!HasSpecialAccessor) { 2087 Tokens->setPosition(StoredPosition); 2088 return false; 2089 } 2090 2091 // Try to parse the property accessor: 2092 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2093 Tokens->setPosition(StoredPosition); 2094 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2095 addUnwrappedLine(); 2096 nextToken(); 2097 do { 2098 switch (FormatTok->Tok.getKind()) { 2099 case tok::r_brace: 2100 nextToken(); 2101 if (FormatTok->is(tok::equal)) { 2102 while (!eof() && FormatTok->isNot(tok::semi)) 2103 nextToken(); 2104 nextToken(); 2105 } 2106 addUnwrappedLine(); 2107 return true; 2108 case tok::l_brace: 2109 ++Line->Level; 2110 parseBlock(/*MustBeDeclaration=*/true); 2111 addUnwrappedLine(); 2112 --Line->Level; 2113 break; 2114 case tok::equal: 2115 if (FormatTok->is(TT_FatArrow)) { 2116 ++Line->Level; 2117 do { 2118 nextToken(); 2119 } while (!eof() && FormatTok->isNot(tok::semi)); 2120 nextToken(); 2121 addUnwrappedLine(); 2122 --Line->Level; 2123 break; 2124 } 2125 nextToken(); 2126 break; 2127 default: 2128 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2129 Keywords.kw_set) && 2130 !IsTrivialPropertyAccessor) { 2131 // Non-trivial get/set needs to be on its own line. 2132 addUnwrappedLine(); 2133 } 2134 nextToken(); 2135 } 2136 } while (!eof()); 2137 2138 // Unreachable for well-formed code (paired '{' and '}'). 2139 return true; 2140 } 2141 2142 bool UnwrappedLineParser::tryToParseLambda() { 2143 assert(FormatTok->is(tok::l_square)); 2144 if (!Style.isCpp()) { 2145 nextToken(); 2146 return false; 2147 } 2148 FormatToken &LSquare = *FormatTok; 2149 if (!tryToParseLambdaIntroducer()) 2150 return false; 2151 2152 bool SeenArrow = false; 2153 bool InTemplateParameterList = false; 2154 2155 while (FormatTok->isNot(tok::l_brace)) { 2156 if (FormatTok->isSimpleTypeSpecifier()) { 2157 nextToken(); 2158 continue; 2159 } 2160 switch (FormatTok->Tok.getKind()) { 2161 case tok::l_brace: 2162 break; 2163 case tok::l_paren: 2164 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2165 break; 2166 case tok::l_square: 2167 parseSquare(); 2168 break; 2169 case tok::less: 2170 assert(FormatTok->Previous); 2171 if (FormatTok->Previous->is(tok::r_square)) 2172 InTemplateParameterList = true; 2173 nextToken(); 2174 break; 2175 case tok::kw_auto: 2176 case tok::kw_class: 2177 case tok::kw_template: 2178 case tok::kw_typename: 2179 case tok::amp: 2180 case tok::star: 2181 case tok::kw_const: 2182 case tok::kw_constexpr: 2183 case tok::kw_consteval: 2184 case tok::comma: 2185 case tok::greater: 2186 case tok::identifier: 2187 case tok::numeric_constant: 2188 case tok::coloncolon: 2189 case tok::kw_mutable: 2190 case tok::kw_noexcept: 2191 case tok::kw_static: 2192 nextToken(); 2193 break; 2194 // Specialization of a template with an integer parameter can contain 2195 // arithmetic, logical, comparison and ternary operators. 2196 // 2197 // FIXME: This also accepts sequences of operators that are not in the scope 2198 // of a template argument list. 2199 // 2200 // In a C++ lambda a template type can only occur after an arrow. We use 2201 // this as an heuristic to distinguish between Objective-C expressions 2202 // followed by an `a->b` expression, such as: 2203 // ([obj func:arg] + a->b) 2204 // Otherwise the code below would parse as a lambda. 2205 // 2206 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 2207 // explicit template lists: []<bool b = true && false>(U &&u){} 2208 case tok::plus: 2209 case tok::minus: 2210 case tok::exclaim: 2211 case tok::tilde: 2212 case tok::slash: 2213 case tok::percent: 2214 case tok::lessless: 2215 case tok::pipe: 2216 case tok::pipepipe: 2217 case tok::ampamp: 2218 case tok::caret: 2219 case tok::equalequal: 2220 case tok::exclaimequal: 2221 case tok::greaterequal: 2222 case tok::lessequal: 2223 case tok::question: 2224 case tok::colon: 2225 case tok::ellipsis: 2226 case tok::kw_true: 2227 case tok::kw_false: 2228 if (SeenArrow || InTemplateParameterList) { 2229 nextToken(); 2230 break; 2231 } 2232 return true; 2233 case tok::arrow: 2234 // This might or might not actually be a lambda arrow (this could be an 2235 // ObjC method invocation followed by a dereferencing arrow). We might 2236 // reset this back to TT_Unknown in TokenAnnotator. 2237 FormatTok->setFinalizedType(TT_LambdaArrow); 2238 SeenArrow = true; 2239 nextToken(); 2240 break; 2241 case tok::kw_requires: { 2242 auto *RequiresToken = FormatTok; 2243 nextToken(); 2244 parseRequiresClause(RequiresToken); 2245 break; 2246 } 2247 default: 2248 return true; 2249 } 2250 } 2251 FormatTok->setFinalizedType(TT_LambdaLBrace); 2252 LSquare.setFinalizedType(TT_LambdaLSquare); 2253 parseChildBlock(); 2254 return true; 2255 } 2256 2257 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2258 const FormatToken *Previous = FormatTok->Previous; 2259 const FormatToken *LeftSquare = FormatTok; 2260 nextToken(); 2261 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2262 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2263 tok::kw_co_yield, tok::kw_co_return)) || 2264 Previous->closesScope())) || 2265 LeftSquare->isCppStructuredBinding(Style)) { 2266 return false; 2267 } 2268 if (FormatTok->is(tok::l_square)) 2269 return false; 2270 if (FormatTok->is(tok::r_square)) { 2271 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2272 if (Next->is(tok::greater)) 2273 return false; 2274 } 2275 parseSquare(/*LambdaIntroducer=*/true); 2276 return true; 2277 } 2278 2279 void UnwrappedLineParser::tryToParseJSFunction() { 2280 assert(FormatTok->is(Keywords.kw_function) || 2281 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 2282 if (FormatTok->is(Keywords.kw_async)) 2283 nextToken(); 2284 // Consume "function". 2285 nextToken(); 2286 2287 // Consume * (generator function). Treat it like C++'s overloaded operators. 2288 if (FormatTok->is(tok::star)) { 2289 FormatTok->setFinalizedType(TT_OverloadedOperator); 2290 nextToken(); 2291 } 2292 2293 // Consume function name. 2294 if (FormatTok->is(tok::identifier)) 2295 nextToken(); 2296 2297 if (FormatTok->isNot(tok::l_paren)) 2298 return; 2299 2300 // Parse formal parameter list. 2301 parseParens(); 2302 2303 if (FormatTok->is(tok::colon)) { 2304 // Parse a type definition. 2305 nextToken(); 2306 2307 // Eat the type declaration. For braced inline object types, balance braces, 2308 // otherwise just parse until finding an l_brace for the function body. 2309 if (FormatTok->is(tok::l_brace)) 2310 tryToParseBracedList(); 2311 else 2312 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2313 nextToken(); 2314 } 2315 2316 if (FormatTok->is(tok::semi)) 2317 return; 2318 2319 parseChildBlock(); 2320 } 2321 2322 bool UnwrappedLineParser::tryToParseBracedList() { 2323 if (FormatTok->is(BK_Unknown)) 2324 calculateBraceTypes(); 2325 assert(FormatTok->isNot(BK_Unknown)); 2326 if (FormatTok->is(BK_Block)) 2327 return false; 2328 nextToken(); 2329 parseBracedList(); 2330 return true; 2331 } 2332 2333 bool UnwrappedLineParser::tryToParseChildBlock() { 2334 assert(Style.isJavaScript() || Style.isCSharp()); 2335 assert(FormatTok->is(TT_FatArrow)); 2336 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2337 // They always start an expression or a child block if followed by a curly 2338 // brace. 2339 nextToken(); 2340 if (FormatTok->isNot(tok::l_brace)) 2341 return false; 2342 parseChildBlock(); 2343 return true; 2344 } 2345 2346 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2347 bool IsEnum, 2348 tok::TokenKind ClosingBraceKind) { 2349 bool HasError = false; 2350 2351 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2352 // replace this by using parseAssignmentExpression() inside. 2353 do { 2354 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2355 tryToParseChildBlock()) { 2356 continue; 2357 } 2358 if (Style.isJavaScript()) { 2359 if (FormatTok->is(Keywords.kw_function) || 2360 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2361 tryToParseJSFunction(); 2362 continue; 2363 } 2364 if (FormatTok->is(tok::l_brace)) { 2365 // Could be a method inside of a braced list `{a() { return 1; }}`. 2366 if (tryToParseBracedList()) 2367 continue; 2368 parseChildBlock(); 2369 } 2370 } 2371 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2372 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2373 addUnwrappedLine(); 2374 nextToken(); 2375 return !HasError; 2376 } 2377 switch (FormatTok->Tok.getKind()) { 2378 case tok::l_square: 2379 if (Style.isCSharp()) 2380 parseSquare(); 2381 else 2382 tryToParseLambda(); 2383 break; 2384 case tok::l_paren: 2385 parseParens(); 2386 // JavaScript can just have free standing methods and getters/setters in 2387 // object literals. Detect them by a "{" following ")". 2388 if (Style.isJavaScript()) { 2389 if (FormatTok->is(tok::l_brace)) 2390 parseChildBlock(); 2391 break; 2392 } 2393 break; 2394 case tok::l_brace: 2395 // Assume there are no blocks inside a braced init list apart 2396 // from the ones we explicitly parse out (like lambdas). 2397 FormatTok->setBlockKind(BK_BracedInit); 2398 nextToken(); 2399 parseBracedList(); 2400 break; 2401 case tok::less: 2402 if (Style.Language == FormatStyle::LK_Proto || 2403 ClosingBraceKind == tok::greater) { 2404 nextToken(); 2405 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2406 /*ClosingBraceKind=*/tok::greater); 2407 } else { 2408 nextToken(); 2409 } 2410 break; 2411 case tok::semi: 2412 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2413 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2414 // used for error recovery if we have otherwise determined that this is 2415 // a braced list. 2416 if (Style.isJavaScript()) { 2417 nextToken(); 2418 break; 2419 } 2420 HasError = true; 2421 if (!ContinueOnSemicolons) 2422 return !HasError; 2423 nextToken(); 2424 break; 2425 case tok::comma: 2426 nextToken(); 2427 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2428 addUnwrappedLine(); 2429 break; 2430 default: 2431 nextToken(); 2432 break; 2433 } 2434 } while (!eof()); 2435 return false; 2436 } 2437 2438 /// \brief Parses a pair of parentheses (and everything between them). 2439 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2440 /// double ampersands. This applies for all nested scopes as well. 2441 /// 2442 /// Returns whether there is a `=` token between the parentheses. 2443 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2444 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2445 auto *LeftParen = FormatTok; 2446 bool SeenEqual = false; 2447 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2448 nextToken(); 2449 do { 2450 switch (FormatTok->Tok.getKind()) { 2451 case tok::l_paren: 2452 if (parseParens(AmpAmpTokenType)) 2453 SeenEqual = true; 2454 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2455 parseChildBlock(); 2456 break; 2457 case tok::r_paren: 2458 if (!MightBeStmtExpr && 2459 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2460 const auto *Prev = LeftParen->Previous; 2461 const auto *Next = Tokens->peekNextToken(); 2462 const bool DoubleParens = 2463 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2464 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2465 const bool Blacklisted = 2466 PrevPrev && 2467 (PrevPrev->is(tok::kw___attribute) || 2468 (SeenEqual && 2469 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2470 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2471 const bool ReturnParens = 2472 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2473 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2474 Next->is(tok::semi); 2475 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2476 LeftParen->Optional = true; 2477 FormatTok->Optional = true; 2478 } 2479 } 2480 nextToken(); 2481 return SeenEqual; 2482 case tok::r_brace: 2483 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2484 return SeenEqual; 2485 case tok::l_square: 2486 tryToParseLambda(); 2487 break; 2488 case tok::l_brace: 2489 if (!tryToParseBracedList()) 2490 parseChildBlock(); 2491 break; 2492 case tok::at: 2493 nextToken(); 2494 if (FormatTok->is(tok::l_brace)) { 2495 nextToken(); 2496 parseBracedList(); 2497 } 2498 break; 2499 case tok::equal: 2500 SeenEqual = true; 2501 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2502 tryToParseChildBlock(); 2503 else 2504 nextToken(); 2505 break; 2506 case tok::kw_class: 2507 if (Style.isJavaScript()) 2508 parseRecord(/*ParseAsExpr=*/true); 2509 else 2510 nextToken(); 2511 break; 2512 case tok::identifier: 2513 if (Style.isJavaScript() && 2514 (FormatTok->is(Keywords.kw_function) || 2515 FormatTok->startsSequence(Keywords.kw_async, 2516 Keywords.kw_function))) { 2517 tryToParseJSFunction(); 2518 } else { 2519 nextToken(); 2520 } 2521 break; 2522 case tok::kw_requires: { 2523 auto RequiresToken = FormatTok; 2524 nextToken(); 2525 parseRequiresExpression(RequiresToken); 2526 break; 2527 } 2528 case tok::ampamp: 2529 if (AmpAmpTokenType != TT_Unknown) 2530 FormatTok->setFinalizedType(AmpAmpTokenType); 2531 [[fallthrough]]; 2532 default: 2533 nextToken(); 2534 break; 2535 } 2536 } while (!eof()); 2537 return SeenEqual; 2538 } 2539 2540 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2541 if (!LambdaIntroducer) { 2542 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2543 if (tryToParseLambda()) 2544 return; 2545 } 2546 do { 2547 switch (FormatTok->Tok.getKind()) { 2548 case tok::l_paren: 2549 parseParens(); 2550 break; 2551 case tok::r_square: 2552 nextToken(); 2553 return; 2554 case tok::r_brace: 2555 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2556 return; 2557 case tok::l_square: 2558 parseSquare(); 2559 break; 2560 case tok::l_brace: { 2561 if (!tryToParseBracedList()) 2562 parseChildBlock(); 2563 break; 2564 } 2565 case tok::at: 2566 nextToken(); 2567 if (FormatTok->is(tok::l_brace)) { 2568 nextToken(); 2569 parseBracedList(); 2570 } 2571 break; 2572 default: 2573 nextToken(); 2574 break; 2575 } 2576 } while (!eof()); 2577 } 2578 2579 void UnwrappedLineParser::keepAncestorBraces() { 2580 if (!Style.RemoveBracesLLVM) 2581 return; 2582 2583 const int MaxNestingLevels = 2; 2584 const int Size = NestedTooDeep.size(); 2585 if (Size >= MaxNestingLevels) 2586 NestedTooDeep[Size - MaxNestingLevels] = true; 2587 NestedTooDeep.push_back(false); 2588 } 2589 2590 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2591 for (const auto &Token : llvm::reverse(Line.Tokens)) 2592 if (Token.Tok->isNot(tok::comment)) 2593 return Token.Tok; 2594 2595 return nullptr; 2596 } 2597 2598 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2599 FormatToken *Tok = nullptr; 2600 2601 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2602 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2603 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2604 ? getLastNonComment(*Line) 2605 : Line->Tokens.back().Tok; 2606 assert(Tok); 2607 if (Tok->BraceCount < 0) { 2608 assert(Tok->BraceCount == -1); 2609 Tok = nullptr; 2610 } else { 2611 Tok->BraceCount = -1; 2612 } 2613 } 2614 2615 addUnwrappedLine(); 2616 ++Line->Level; 2617 parseStructuralElement(); 2618 2619 if (Tok) { 2620 assert(!Line->InPPDirective); 2621 Tok = nullptr; 2622 for (const auto &L : llvm::reverse(*CurrentLines)) { 2623 if (!L.InPPDirective && getLastNonComment(L)) { 2624 Tok = L.Tokens.back().Tok; 2625 break; 2626 } 2627 } 2628 assert(Tok); 2629 ++Tok->BraceCount; 2630 } 2631 2632 if (CheckEOF && eof()) 2633 addUnwrappedLine(); 2634 2635 --Line->Level; 2636 } 2637 2638 static void markOptionalBraces(FormatToken *LeftBrace) { 2639 if (!LeftBrace) 2640 return; 2641 2642 assert(LeftBrace->is(tok::l_brace)); 2643 2644 FormatToken *RightBrace = LeftBrace->MatchingParen; 2645 if (!RightBrace) { 2646 assert(!LeftBrace->Optional); 2647 return; 2648 } 2649 2650 assert(RightBrace->is(tok::r_brace)); 2651 assert(RightBrace->MatchingParen == LeftBrace); 2652 assert(LeftBrace->Optional == RightBrace->Optional); 2653 2654 LeftBrace->Optional = true; 2655 RightBrace->Optional = true; 2656 } 2657 2658 void UnwrappedLineParser::handleAttributes() { 2659 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2660 if (FormatTok->is(TT_AttributeMacro)) 2661 nextToken(); 2662 if (FormatTok->is(tok::l_square)) 2663 handleCppAttributes(); 2664 } 2665 2666 bool UnwrappedLineParser::handleCppAttributes() { 2667 // Handle [[likely]] / [[unlikely]] attributes. 2668 assert(FormatTok->is(tok::l_square)); 2669 if (!tryToParseSimpleAttribute()) 2670 return false; 2671 parseSquare(); 2672 return true; 2673 } 2674 2675 /// Returns whether \c Tok begins a block. 2676 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2677 // FIXME: rename the function or make 2678 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2679 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2680 : Tok.is(tok::l_brace); 2681 } 2682 2683 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2684 bool KeepBraces, 2685 bool IsVerilogAssert) { 2686 assert((FormatTok->is(tok::kw_if) || 2687 (Style.isVerilog() && 2688 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2689 Keywords.kw_assume, Keywords.kw_cover))) && 2690 "'if' expected"); 2691 nextToken(); 2692 2693 if (IsVerilogAssert) { 2694 // Handle `assert #0` and `assert final`. 2695 if (FormatTok->is(Keywords.kw_verilogHash)) { 2696 nextToken(); 2697 if (FormatTok->is(tok::numeric_constant)) 2698 nextToken(); 2699 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2700 Keywords.kw_sequence)) { 2701 nextToken(); 2702 } 2703 } 2704 2705 // Handle `if !consteval`. 2706 if (FormatTok->is(tok::exclaim)) 2707 nextToken(); 2708 2709 bool KeepIfBraces = true; 2710 if (FormatTok->is(tok::kw_consteval)) { 2711 nextToken(); 2712 } else { 2713 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2714 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2715 nextToken(); 2716 if (FormatTok->is(tok::l_paren)) { 2717 FormatTok->setFinalizedType(TT_ConditionLParen); 2718 parseParens(); 2719 } 2720 } 2721 handleAttributes(); 2722 // The then action is optional in Verilog assert statements. 2723 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2724 nextToken(); 2725 addUnwrappedLine(); 2726 return nullptr; 2727 } 2728 2729 bool NeedsUnwrappedLine = false; 2730 keepAncestorBraces(); 2731 2732 FormatToken *IfLeftBrace = nullptr; 2733 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2734 2735 if (isBlockBegin(*FormatTok)) { 2736 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2737 IfLeftBrace = FormatTok; 2738 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2739 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2740 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2741 if (Style.BraceWrapping.BeforeElse) 2742 addUnwrappedLine(); 2743 else 2744 NeedsUnwrappedLine = true; 2745 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2746 addUnwrappedLine(); 2747 } else { 2748 parseUnbracedBody(); 2749 } 2750 2751 if (Style.RemoveBracesLLVM) { 2752 assert(!NestedTooDeep.empty()); 2753 KeepIfBraces = KeepIfBraces || 2754 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2755 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2756 IfBlockKind == IfStmtKind::IfElseIf; 2757 } 2758 2759 bool KeepElseBraces = KeepIfBraces; 2760 FormatToken *ElseLeftBrace = nullptr; 2761 IfStmtKind Kind = IfStmtKind::IfOnly; 2762 2763 if (FormatTok->is(tok::kw_else)) { 2764 if (Style.RemoveBracesLLVM) { 2765 NestedTooDeep.back() = false; 2766 Kind = IfStmtKind::IfElse; 2767 } 2768 nextToken(); 2769 handleAttributes(); 2770 if (isBlockBegin(*FormatTok)) { 2771 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2772 FormatTok->setFinalizedType(TT_ElseLBrace); 2773 ElseLeftBrace = FormatTok; 2774 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2775 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2776 FormatToken *IfLBrace = 2777 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2778 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2779 if (FormatTok->is(tok::kw_else)) { 2780 KeepElseBraces = KeepElseBraces || 2781 ElseBlockKind == IfStmtKind::IfOnly || 2782 ElseBlockKind == IfStmtKind::IfElseIf; 2783 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2784 KeepElseBraces = true; 2785 assert(ElseLeftBrace->MatchingParen); 2786 markOptionalBraces(ElseLeftBrace); 2787 } 2788 addUnwrappedLine(); 2789 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2790 const FormatToken *Previous = Tokens->getPreviousToken(); 2791 assert(Previous); 2792 const bool IsPrecededByComment = Previous->is(tok::comment); 2793 if (IsPrecededByComment) { 2794 addUnwrappedLine(); 2795 ++Line->Level; 2796 } 2797 bool TooDeep = true; 2798 if (Style.RemoveBracesLLVM) { 2799 Kind = IfStmtKind::IfElseIf; 2800 TooDeep = NestedTooDeep.pop_back_val(); 2801 } 2802 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2803 if (Style.RemoveBracesLLVM) 2804 NestedTooDeep.push_back(TooDeep); 2805 if (IsPrecededByComment) 2806 --Line->Level; 2807 } else { 2808 parseUnbracedBody(/*CheckEOF=*/true); 2809 } 2810 } else { 2811 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2812 if (NeedsUnwrappedLine) 2813 addUnwrappedLine(); 2814 } 2815 2816 if (!Style.RemoveBracesLLVM) 2817 return nullptr; 2818 2819 assert(!NestedTooDeep.empty()); 2820 KeepElseBraces = KeepElseBraces || 2821 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2822 NestedTooDeep.back(); 2823 2824 NestedTooDeep.pop_back(); 2825 2826 if (!KeepIfBraces && !KeepElseBraces) { 2827 markOptionalBraces(IfLeftBrace); 2828 markOptionalBraces(ElseLeftBrace); 2829 } else if (IfLeftBrace) { 2830 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2831 if (IfRightBrace) { 2832 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2833 assert(!IfLeftBrace->Optional); 2834 assert(!IfRightBrace->Optional); 2835 IfLeftBrace->MatchingParen = nullptr; 2836 IfRightBrace->MatchingParen = nullptr; 2837 } 2838 } 2839 2840 if (IfKind) 2841 *IfKind = Kind; 2842 2843 return IfLeftBrace; 2844 } 2845 2846 void UnwrappedLineParser::parseTryCatch() { 2847 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2848 nextToken(); 2849 bool NeedsUnwrappedLine = false; 2850 if (FormatTok->is(tok::colon)) { 2851 // We are in a function try block, what comes is an initializer list. 2852 nextToken(); 2853 2854 // In case identifiers were removed by clang-tidy, what might follow is 2855 // multiple commas in sequence - before the first identifier. 2856 while (FormatTok->is(tok::comma)) 2857 nextToken(); 2858 2859 while (FormatTok->is(tok::identifier)) { 2860 nextToken(); 2861 if (FormatTok->is(tok::l_paren)) 2862 parseParens(); 2863 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2864 FormatTok->is(tok::l_brace)) { 2865 do { 2866 nextToken(); 2867 } while (!FormatTok->is(tok::r_brace)); 2868 nextToken(); 2869 } 2870 2871 // In case identifiers were removed by clang-tidy, what might follow is 2872 // multiple commas in sequence - after the first identifier. 2873 while (FormatTok->is(tok::comma)) 2874 nextToken(); 2875 } 2876 } 2877 // Parse try with resource. 2878 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2879 parseParens(); 2880 2881 keepAncestorBraces(); 2882 2883 if (FormatTok->is(tok::l_brace)) { 2884 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2885 parseBlock(); 2886 if (Style.BraceWrapping.BeforeCatch) 2887 addUnwrappedLine(); 2888 else 2889 NeedsUnwrappedLine = true; 2890 } else if (!FormatTok->is(tok::kw_catch)) { 2891 // The C++ standard requires a compound-statement after a try. 2892 // If there's none, we try to assume there's a structuralElement 2893 // and try to continue. 2894 addUnwrappedLine(); 2895 ++Line->Level; 2896 parseStructuralElement(); 2897 --Line->Level; 2898 } 2899 while (true) { 2900 if (FormatTok->is(tok::at)) 2901 nextToken(); 2902 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2903 tok::kw___finally) || 2904 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2905 FormatTok->is(Keywords.kw_finally)) || 2906 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2907 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2908 break; 2909 } 2910 nextToken(); 2911 while (FormatTok->isNot(tok::l_brace)) { 2912 if (FormatTok->is(tok::l_paren)) { 2913 parseParens(); 2914 continue; 2915 } 2916 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2917 if (Style.RemoveBracesLLVM) 2918 NestedTooDeep.pop_back(); 2919 return; 2920 } 2921 nextToken(); 2922 } 2923 NeedsUnwrappedLine = false; 2924 Line->MustBeDeclaration = false; 2925 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2926 parseBlock(); 2927 if (Style.BraceWrapping.BeforeCatch) 2928 addUnwrappedLine(); 2929 else 2930 NeedsUnwrappedLine = true; 2931 } 2932 2933 if (Style.RemoveBracesLLVM) 2934 NestedTooDeep.pop_back(); 2935 2936 if (NeedsUnwrappedLine) 2937 addUnwrappedLine(); 2938 } 2939 2940 void UnwrappedLineParser::parseNamespace() { 2941 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2942 "'namespace' expected"); 2943 2944 const FormatToken &InitialToken = *FormatTok; 2945 nextToken(); 2946 if (InitialToken.is(TT_NamespaceMacro)) { 2947 parseParens(); 2948 } else { 2949 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2950 tok::l_square, tok::period, tok::l_paren) || 2951 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 2952 if (FormatTok->is(tok::l_square)) 2953 parseSquare(); 2954 else if (FormatTok->is(tok::l_paren)) 2955 parseParens(); 2956 else 2957 nextToken(); 2958 } 2959 } 2960 if (FormatTok->is(tok::l_brace)) { 2961 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2962 addUnwrappedLine(); 2963 2964 unsigned AddLevels = 2965 Style.NamespaceIndentation == FormatStyle::NI_All || 2966 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2967 DeclarationScopeStack.size() > 1) 2968 ? 1u 2969 : 0u; 2970 bool ManageWhitesmithsBraces = 2971 AddLevels == 0u && 2972 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2973 2974 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2975 // the whole block. 2976 if (ManageWhitesmithsBraces) 2977 ++Line->Level; 2978 2979 // Munch the semicolon after a namespace. This is more common than one would 2980 // think. Putting the semicolon into its own line is very ugly. 2981 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 2982 /*KeepBraces=*/true, /*IfKind=*/nullptr, 2983 ManageWhitesmithsBraces); 2984 2985 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2986 2987 if (ManageWhitesmithsBraces) 2988 --Line->Level; 2989 } 2990 // FIXME: Add error handling. 2991 } 2992 2993 void UnwrappedLineParser::parseNew() { 2994 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2995 nextToken(); 2996 2997 if (Style.isCSharp()) { 2998 do { 2999 // Handle constructor invocation, e.g. `new(field: value)`. 3000 if (FormatTok->is(tok::l_paren)) 3001 parseParens(); 3002 3003 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3004 if (FormatTok->is(tok::l_brace)) 3005 parseBracedList(); 3006 3007 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3008 return; 3009 3010 nextToken(); 3011 } while (!eof()); 3012 } 3013 3014 if (Style.Language != FormatStyle::LK_Java) 3015 return; 3016 3017 // In Java, we can parse everything up to the parens, which aren't optional. 3018 do { 3019 // There should not be a ;, { or } before the new's open paren. 3020 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3021 return; 3022 3023 // Consume the parens. 3024 if (FormatTok->is(tok::l_paren)) { 3025 parseParens(); 3026 3027 // If there is a class body of an anonymous class, consume that as child. 3028 if (FormatTok->is(tok::l_brace)) 3029 parseChildBlock(); 3030 return; 3031 } 3032 nextToken(); 3033 } while (!eof()); 3034 } 3035 3036 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3037 keepAncestorBraces(); 3038 3039 if (isBlockBegin(*FormatTok)) { 3040 if (!KeepBraces) 3041 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3042 FormatToken *LeftBrace = FormatTok; 3043 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3044 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3045 /*MunchSemi=*/true, KeepBraces); 3046 if (!KeepBraces) { 3047 assert(!NestedTooDeep.empty()); 3048 if (!NestedTooDeep.back()) 3049 markOptionalBraces(LeftBrace); 3050 } 3051 if (WrapRightBrace) 3052 addUnwrappedLine(); 3053 } else { 3054 parseUnbracedBody(); 3055 } 3056 3057 if (!KeepBraces) 3058 NestedTooDeep.pop_back(); 3059 } 3060 3061 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3062 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3063 (Style.isVerilog() && 3064 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3065 Keywords.kw_always_ff, Keywords.kw_always_latch, 3066 Keywords.kw_final, Keywords.kw_initial, 3067 Keywords.kw_foreach, Keywords.kw_forever, 3068 Keywords.kw_repeat))) && 3069 "'for', 'while' or foreach macro expected"); 3070 const bool KeepBraces = !Style.RemoveBracesLLVM || 3071 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3072 3073 nextToken(); 3074 // JS' for await ( ... 3075 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3076 nextToken(); 3077 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 3078 nextToken(); 3079 if (HasParens && FormatTok->is(tok::l_paren)) { 3080 // The type is only set for Verilog basically because we were afraid to 3081 // change the existing behavior for loops. See the discussion on D121756 for 3082 // details. 3083 if (Style.isVerilog()) 3084 FormatTok->setFinalizedType(TT_ConditionLParen); 3085 parseParens(); 3086 } 3087 // Event control. 3088 if (Style.isVerilog()) 3089 parseVerilogSensitivityList(); 3090 3091 handleAttributes(); 3092 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3093 } 3094 3095 void UnwrappedLineParser::parseDoWhile() { 3096 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3097 nextToken(); 3098 3099 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3100 3101 // FIXME: Add error handling. 3102 if (!FormatTok->is(tok::kw_while)) { 3103 addUnwrappedLine(); 3104 return; 3105 } 3106 3107 // If in Whitesmiths mode, the line with the while() needs to be indented 3108 // to the same level as the block. 3109 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3110 ++Line->Level; 3111 3112 nextToken(); 3113 parseStructuralElement(); 3114 } 3115 3116 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3117 nextToken(); 3118 unsigned OldLineLevel = Line->Level; 3119 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3120 --Line->Level; 3121 if (LeftAlignLabel) 3122 Line->Level = 0; 3123 3124 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3125 FormatTok->is(tok::l_brace)) { 3126 3127 CompoundStatementIndenter Indenter(this, Line->Level, 3128 Style.BraceWrapping.AfterCaseLabel, 3129 Style.BraceWrapping.IndentBraces); 3130 parseBlock(); 3131 if (FormatTok->is(tok::kw_break)) { 3132 if (Style.BraceWrapping.AfterControlStatement == 3133 FormatStyle::BWACS_Always) { 3134 addUnwrappedLine(); 3135 if (!Style.IndentCaseBlocks && 3136 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3137 ++Line->Level; 3138 } 3139 } 3140 parseStructuralElement(); 3141 } 3142 addUnwrappedLine(); 3143 } else { 3144 if (FormatTok->is(tok::semi)) 3145 nextToken(); 3146 addUnwrappedLine(); 3147 } 3148 Line->Level = OldLineLevel; 3149 if (FormatTok->isNot(tok::l_brace)) { 3150 parseStructuralElement(); 3151 addUnwrappedLine(); 3152 } 3153 } 3154 3155 void UnwrappedLineParser::parseCaseLabel() { 3156 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3157 3158 // FIXME: fix handling of complex expressions here. 3159 do { 3160 nextToken(); 3161 if (FormatTok->is(tok::colon)) { 3162 FormatTok->setFinalizedType(TT_CaseLabelColon); 3163 break; 3164 } 3165 } while (!eof()); 3166 parseLabel(); 3167 } 3168 3169 void UnwrappedLineParser::parseSwitch() { 3170 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3171 nextToken(); 3172 if (FormatTok->is(tok::l_paren)) 3173 parseParens(); 3174 3175 keepAncestorBraces(); 3176 3177 if (FormatTok->is(tok::l_brace)) { 3178 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3179 parseBlock(); 3180 addUnwrappedLine(); 3181 } else { 3182 addUnwrappedLine(); 3183 ++Line->Level; 3184 parseStructuralElement(); 3185 --Line->Level; 3186 } 3187 3188 if (Style.RemoveBracesLLVM) 3189 NestedTooDeep.pop_back(); 3190 } 3191 3192 // Operators that can follow a C variable. 3193 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 3194 switch (kind) { 3195 case tok::ampamp: 3196 case tok::ampequal: 3197 case tok::arrow: 3198 case tok::caret: 3199 case tok::caretequal: 3200 case tok::comma: 3201 case tok::ellipsis: 3202 case tok::equal: 3203 case tok::equalequal: 3204 case tok::exclaim: 3205 case tok::exclaimequal: 3206 case tok::greater: 3207 case tok::greaterequal: 3208 case tok::greatergreater: 3209 case tok::greatergreaterequal: 3210 case tok::l_paren: 3211 case tok::l_square: 3212 case tok::less: 3213 case tok::lessequal: 3214 case tok::lessless: 3215 case tok::lesslessequal: 3216 case tok::minus: 3217 case tok::minusequal: 3218 case tok::minusminus: 3219 case tok::percent: 3220 case tok::percentequal: 3221 case tok::period: 3222 case tok::pipe: 3223 case tok::pipeequal: 3224 case tok::pipepipe: 3225 case tok::plus: 3226 case tok::plusequal: 3227 case tok::plusplus: 3228 case tok::question: 3229 case tok::r_brace: 3230 case tok::r_paren: 3231 case tok::r_square: 3232 case tok::semi: 3233 case tok::slash: 3234 case tok::slashequal: 3235 case tok::star: 3236 case tok::starequal: 3237 return true; 3238 default: 3239 return false; 3240 } 3241 } 3242 3243 void UnwrappedLineParser::parseAccessSpecifier() { 3244 FormatToken *AccessSpecifierCandidate = FormatTok; 3245 nextToken(); 3246 // Understand Qt's slots. 3247 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3248 nextToken(); 3249 // Otherwise, we don't know what it is, and we'd better keep the next token. 3250 if (FormatTok->is(tok::colon)) { 3251 nextToken(); 3252 addUnwrappedLine(); 3253 } else if (!FormatTok->is(tok::coloncolon) && 3254 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3255 // Not a variable name nor namespace name. 3256 addUnwrappedLine(); 3257 } else if (AccessSpecifierCandidate) { 3258 // Consider the access specifier to be a C identifier. 3259 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3260 } 3261 } 3262 3263 /// \brief Parses a requires, decides if it is a clause or an expression. 3264 /// \pre The current token has to be the requires keyword. 3265 /// \returns true if it parsed a clause. 3266 bool clang::format::UnwrappedLineParser::parseRequires() { 3267 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3268 auto RequiresToken = FormatTok; 3269 3270 // We try to guess if it is a requires clause, or a requires expression. For 3271 // that we first consume the keyword and check the next token. 3272 nextToken(); 3273 3274 switch (FormatTok->Tok.getKind()) { 3275 case tok::l_brace: 3276 // This can only be an expression, never a clause. 3277 parseRequiresExpression(RequiresToken); 3278 return false; 3279 case tok::l_paren: 3280 // Clauses and expression can start with a paren, it's unclear what we have. 3281 break; 3282 default: 3283 // All other tokens can only be a clause. 3284 parseRequiresClause(RequiresToken); 3285 return true; 3286 } 3287 3288 // Looking forward we would have to decide if there are function declaration 3289 // like arguments to the requires expression: 3290 // requires (T t) { 3291 // Or there is a constraint expression for the requires clause: 3292 // requires (C<T> && ... 3293 3294 // But first let's look behind. 3295 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3296 3297 if (!PreviousNonComment || 3298 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3299 // If there is no token, or an expression left brace, we are a requires 3300 // clause within a requires expression. 3301 parseRequiresClause(RequiresToken); 3302 return true; 3303 } 3304 3305 switch (PreviousNonComment->Tok.getKind()) { 3306 case tok::greater: 3307 case tok::r_paren: 3308 case tok::kw_noexcept: 3309 case tok::kw_const: 3310 // This is a requires clause. 3311 parseRequiresClause(RequiresToken); 3312 return true; 3313 case tok::amp: 3314 case tok::ampamp: { 3315 // This can be either: 3316 // if (... && requires (T t) ...) 3317 // Or 3318 // void member(...) && requires (C<T> ... 3319 // We check the one token before that for a const: 3320 // void member(...) const && requires (C<T> ... 3321 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3322 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3323 parseRequiresClause(RequiresToken); 3324 return true; 3325 } 3326 break; 3327 } 3328 default: 3329 if (PreviousNonComment->isTypeOrIdentifier()) { 3330 // This is a requires clause. 3331 parseRequiresClause(RequiresToken); 3332 return true; 3333 } 3334 // It's an expression. 3335 parseRequiresExpression(RequiresToken); 3336 return false; 3337 } 3338 3339 // Now we look forward and try to check if the paren content is a parameter 3340 // list. The parameters can be cv-qualified and contain references or 3341 // pointers. 3342 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3343 // of stuff: typename, const, *, &, &&, ::, identifiers. 3344 3345 unsigned StoredPosition = Tokens->getPosition(); 3346 FormatToken *NextToken = Tokens->getNextToken(); 3347 int Lookahead = 0; 3348 auto PeekNext = [&Lookahead, &NextToken, this] { 3349 ++Lookahead; 3350 NextToken = Tokens->getNextToken(); 3351 }; 3352 3353 bool FoundType = false; 3354 bool LastWasColonColon = false; 3355 int OpenAngles = 0; 3356 3357 for (; Lookahead < 50; PeekNext()) { 3358 switch (NextToken->Tok.getKind()) { 3359 case tok::kw_volatile: 3360 case tok::kw_const: 3361 case tok::comma: 3362 FormatTok = Tokens->setPosition(StoredPosition); 3363 parseRequiresExpression(RequiresToken); 3364 return false; 3365 case tok::r_paren: 3366 case tok::pipepipe: 3367 FormatTok = Tokens->setPosition(StoredPosition); 3368 parseRequiresClause(RequiresToken); 3369 return true; 3370 case tok::eof: 3371 // Break out of the loop. 3372 Lookahead = 50; 3373 break; 3374 case tok::coloncolon: 3375 LastWasColonColon = true; 3376 break; 3377 case tok::identifier: 3378 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3379 FormatTok = Tokens->setPosition(StoredPosition); 3380 parseRequiresExpression(RequiresToken); 3381 return false; 3382 } 3383 FoundType = true; 3384 LastWasColonColon = false; 3385 break; 3386 case tok::less: 3387 ++OpenAngles; 3388 break; 3389 case tok::greater: 3390 --OpenAngles; 3391 break; 3392 default: 3393 if (NextToken->isSimpleTypeSpecifier()) { 3394 FormatTok = Tokens->setPosition(StoredPosition); 3395 parseRequiresExpression(RequiresToken); 3396 return false; 3397 } 3398 break; 3399 } 3400 } 3401 // This seems to be a complicated expression, just assume it's a clause. 3402 FormatTok = Tokens->setPosition(StoredPosition); 3403 parseRequiresClause(RequiresToken); 3404 return true; 3405 } 3406 3407 /// \brief Parses a requires clause. 3408 /// \param RequiresToken The requires keyword token, which starts this clause. 3409 /// \pre We need to be on the next token after the requires keyword. 3410 /// \sa parseRequiresExpression 3411 /// 3412 /// Returns if it either has finished parsing the clause, or it detects, that 3413 /// the clause is incorrect. 3414 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3415 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3416 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3417 3418 // If there is no previous token, we are within a requires expression, 3419 // otherwise we will always have the template or function declaration in front 3420 // of it. 3421 bool InRequiresExpression = 3422 !RequiresToken->Previous || 3423 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3424 3425 RequiresToken->setFinalizedType(InRequiresExpression 3426 ? TT_RequiresClauseInARequiresExpression 3427 : TT_RequiresClause); 3428 3429 // NOTE: parseConstraintExpression is only ever called from this function. 3430 // It could be inlined into here. 3431 parseConstraintExpression(); 3432 3433 if (!InRequiresExpression) 3434 FormatTok->Previous->ClosesRequiresClause = true; 3435 } 3436 3437 /// \brief Parses a requires expression. 3438 /// \param RequiresToken The requires keyword token, which starts this clause. 3439 /// \pre We need to be on the next token after the requires keyword. 3440 /// \sa parseRequiresClause 3441 /// 3442 /// Returns if it either has finished parsing the expression, or it detects, 3443 /// that the expression is incorrect. 3444 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3445 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3446 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3447 3448 RequiresToken->setFinalizedType(TT_RequiresExpression); 3449 3450 if (FormatTok->is(tok::l_paren)) { 3451 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3452 parseParens(); 3453 } 3454 3455 if (FormatTok->is(tok::l_brace)) { 3456 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3457 parseChildBlock(/*CanContainBracedList=*/false, 3458 /*NextLBracesType=*/TT_CompoundRequirementLBrace); 3459 } 3460 } 3461 3462 /// \brief Parses a constraint expression. 3463 /// 3464 /// This is the body of a requires clause. It returns, when the parsing is 3465 /// complete, or the expression is incorrect. 3466 void UnwrappedLineParser::parseConstraintExpression() { 3467 // The special handling for lambdas is needed since tryToParseLambda() eats a 3468 // token and if a requires expression is the last part of a requires clause 3469 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3470 // not set on the correct token. Thus we need to be aware if we even expect a 3471 // lambda to be possible. 3472 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3473 bool LambdaNextTimeAllowed = true; 3474 3475 // Within lambda declarations, it is permitted to put a requires clause after 3476 // its template parameter list, which would place the requires clause right 3477 // before the parentheses of the parameters of the lambda declaration. Thus, 3478 // we track if we expect to see grouping parentheses at all. 3479 // Without this check, `requires foo<T> (T t)` in the below example would be 3480 // seen as the whole requires clause, accidentally eating the parameters of 3481 // the lambda. 3482 // [&]<typename T> requires foo<T> (T t) { ... }; 3483 bool TopLevelParensAllowed = true; 3484 3485 do { 3486 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3487 3488 switch (FormatTok->Tok.getKind()) { 3489 case tok::kw_requires: { 3490 auto RequiresToken = FormatTok; 3491 nextToken(); 3492 parseRequiresExpression(RequiresToken); 3493 break; 3494 } 3495 3496 case tok::l_paren: 3497 if (!TopLevelParensAllowed) 3498 return; 3499 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3500 TopLevelParensAllowed = false; 3501 break; 3502 3503 case tok::l_square: 3504 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3505 return; 3506 break; 3507 3508 case tok::kw_const: 3509 case tok::semi: 3510 case tok::kw_class: 3511 case tok::kw_struct: 3512 case tok::kw_union: 3513 return; 3514 3515 case tok::l_brace: 3516 // Potential function body. 3517 return; 3518 3519 case tok::ampamp: 3520 case tok::pipepipe: 3521 FormatTok->setFinalizedType(TT_BinaryOperator); 3522 nextToken(); 3523 LambdaNextTimeAllowed = true; 3524 TopLevelParensAllowed = true; 3525 break; 3526 3527 case tok::comma: 3528 case tok::comment: 3529 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3530 nextToken(); 3531 break; 3532 3533 case tok::kw_sizeof: 3534 case tok::greater: 3535 case tok::greaterequal: 3536 case tok::greatergreater: 3537 case tok::less: 3538 case tok::lessequal: 3539 case tok::lessless: 3540 case tok::equalequal: 3541 case tok::exclaim: 3542 case tok::exclaimequal: 3543 case tok::plus: 3544 case tok::minus: 3545 case tok::star: 3546 case tok::slash: 3547 LambdaNextTimeAllowed = true; 3548 TopLevelParensAllowed = true; 3549 // Just eat them. 3550 nextToken(); 3551 break; 3552 3553 case tok::numeric_constant: 3554 case tok::coloncolon: 3555 case tok::kw_true: 3556 case tok::kw_false: 3557 TopLevelParensAllowed = false; 3558 // Just eat them. 3559 nextToken(); 3560 break; 3561 3562 case tok::kw_static_cast: 3563 case tok::kw_const_cast: 3564 case tok::kw_reinterpret_cast: 3565 case tok::kw_dynamic_cast: 3566 nextToken(); 3567 if (!FormatTok->is(tok::less)) 3568 return; 3569 3570 nextToken(); 3571 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3572 /*ClosingBraceKind=*/tok::greater); 3573 break; 3574 3575 default: 3576 if (!FormatTok->Tok.getIdentifierInfo()) { 3577 // Identifiers are part of the default case, we check for more then 3578 // tok::identifier to handle builtin type traits. 3579 return; 3580 } 3581 3582 // We need to differentiate identifiers for a template deduction guide, 3583 // variables, or function return types (the constraint expression has 3584 // ended before that), and basically all other cases. But it's easier to 3585 // check the other way around. 3586 assert(FormatTok->Previous); 3587 switch (FormatTok->Previous->Tok.getKind()) { 3588 case tok::coloncolon: // Nested identifier. 3589 case tok::ampamp: // Start of a function or variable for the 3590 case tok::pipepipe: // constraint expression. (binary) 3591 case tok::exclaim: // The same as above, but unary. 3592 case tok::kw_requires: // Initial identifier of a requires clause. 3593 case tok::equal: // Initial identifier of a concept declaration. 3594 break; 3595 default: 3596 return; 3597 } 3598 3599 // Read identifier with optional template declaration. 3600 nextToken(); 3601 if (FormatTok->is(tok::less)) { 3602 nextToken(); 3603 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3604 /*ClosingBraceKind=*/tok::greater); 3605 } 3606 TopLevelParensAllowed = false; 3607 break; 3608 } 3609 } while (!eof()); 3610 } 3611 3612 bool UnwrappedLineParser::parseEnum() { 3613 const FormatToken &InitialToken = *FormatTok; 3614 3615 // Won't be 'enum' for NS_ENUMs. 3616 if (FormatTok->is(tok::kw_enum)) 3617 nextToken(); 3618 3619 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3620 // declarations. An "enum" keyword followed by a colon would be a syntax 3621 // error and thus assume it is just an identifier. 3622 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3623 return false; 3624 3625 // In protobuf, "enum" can be used as a field name. 3626 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3627 return false; 3628 3629 // Eat up enum class ... 3630 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3631 nextToken(); 3632 3633 while (FormatTok->Tok.getIdentifierInfo() || 3634 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3635 tok::greater, tok::comma, tok::question, 3636 tok::l_square, tok::r_square)) { 3637 if (Style.isVerilog()) { 3638 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3639 nextToken(); 3640 // In Verilog the base type can have dimensions. 3641 while (FormatTok->is(tok::l_square)) 3642 parseSquare(); 3643 } else { 3644 nextToken(); 3645 } 3646 // We can have macros or attributes in between 'enum' and the enum name. 3647 if (FormatTok->is(tok::l_paren)) 3648 parseParens(); 3649 if (FormatTok->is(TT_AttributeSquare)) { 3650 parseSquare(); 3651 // Consume the closing TT_AttributeSquare. 3652 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3653 nextToken(); 3654 } 3655 if (FormatTok->is(tok::identifier)) { 3656 nextToken(); 3657 // If there are two identifiers in a row, this is likely an elaborate 3658 // return type. In Java, this can be "implements", etc. 3659 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3660 return false; 3661 } 3662 } 3663 3664 // Just a declaration or something is wrong. 3665 if (FormatTok->isNot(tok::l_brace)) 3666 return true; 3667 FormatTok->setFinalizedType(TT_EnumLBrace); 3668 FormatTok->setBlockKind(BK_Block); 3669 3670 if (Style.Language == FormatStyle::LK_Java) { 3671 // Java enums are different. 3672 parseJavaEnumBody(); 3673 return true; 3674 } 3675 if (Style.Language == FormatStyle::LK_Proto) { 3676 parseBlock(/*MustBeDeclaration=*/true); 3677 return true; 3678 } 3679 3680 if (!Style.AllowShortEnumsOnASingleLine && 3681 ShouldBreakBeforeBrace(Style, InitialToken)) { 3682 addUnwrappedLine(); 3683 } 3684 // Parse enum body. 3685 nextToken(); 3686 if (!Style.AllowShortEnumsOnASingleLine) { 3687 addUnwrappedLine(); 3688 Line->Level += 1; 3689 } 3690 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 3691 /*IsEnum=*/true); 3692 if (!Style.AllowShortEnumsOnASingleLine) 3693 Line->Level -= 1; 3694 if (HasError) { 3695 if (FormatTok->is(tok::semi)) 3696 nextToken(); 3697 addUnwrappedLine(); 3698 } 3699 return true; 3700 3701 // There is no addUnwrappedLine() here so that we fall through to parsing a 3702 // structural element afterwards. Thus, in "enum A {} n, m;", 3703 // "} n, m;" will end up in one unwrapped line. 3704 } 3705 3706 bool UnwrappedLineParser::parseStructLike() { 3707 // parseRecord falls through and does not yet add an unwrapped line as a 3708 // record declaration or definition can start a structural element. 3709 parseRecord(); 3710 // This does not apply to Java, JavaScript and C#. 3711 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3712 Style.isCSharp()) { 3713 if (FormatTok->is(tok::semi)) 3714 nextToken(); 3715 addUnwrappedLine(); 3716 return true; 3717 } 3718 return false; 3719 } 3720 3721 namespace { 3722 // A class used to set and restore the Token position when peeking 3723 // ahead in the token source. 3724 class ScopedTokenPosition { 3725 unsigned StoredPosition; 3726 FormatTokenSource *Tokens; 3727 3728 public: 3729 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3730 assert(Tokens && "Tokens expected to not be null"); 3731 StoredPosition = Tokens->getPosition(); 3732 } 3733 3734 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3735 }; 3736 } // namespace 3737 3738 // Look to see if we have [[ by looking ahead, if 3739 // its not then rewind to the original position. 3740 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3741 ScopedTokenPosition AutoPosition(Tokens); 3742 FormatToken *Tok = Tokens->getNextToken(); 3743 // We already read the first [ check for the second. 3744 if (!Tok->is(tok::l_square)) 3745 return false; 3746 // Double check that the attribute is just something 3747 // fairly simple. 3748 while (Tok->isNot(tok::eof)) { 3749 if (Tok->is(tok::r_square)) 3750 break; 3751 Tok = Tokens->getNextToken(); 3752 } 3753 if (Tok->is(tok::eof)) 3754 return false; 3755 Tok = Tokens->getNextToken(); 3756 if (!Tok->is(tok::r_square)) 3757 return false; 3758 Tok = Tokens->getNextToken(); 3759 if (Tok->is(tok::semi)) 3760 return false; 3761 return true; 3762 } 3763 3764 void UnwrappedLineParser::parseJavaEnumBody() { 3765 assert(FormatTok->is(tok::l_brace)); 3766 const FormatToken *OpeningBrace = FormatTok; 3767 3768 // Determine whether the enum is simple, i.e. does not have a semicolon or 3769 // constants with class bodies. Simple enums can be formatted like braced 3770 // lists, contracted to a single line, etc. 3771 unsigned StoredPosition = Tokens->getPosition(); 3772 bool IsSimple = true; 3773 FormatToken *Tok = Tokens->getNextToken(); 3774 while (!Tok->is(tok::eof)) { 3775 if (Tok->is(tok::r_brace)) 3776 break; 3777 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3778 IsSimple = false; 3779 break; 3780 } 3781 // FIXME: This will also mark enums with braces in the arguments to enum 3782 // constants as "not simple". This is probably fine in practice, though. 3783 Tok = Tokens->getNextToken(); 3784 } 3785 FormatTok = Tokens->setPosition(StoredPosition); 3786 3787 if (IsSimple) { 3788 nextToken(); 3789 parseBracedList(); 3790 addUnwrappedLine(); 3791 return; 3792 } 3793 3794 // Parse the body of a more complex enum. 3795 // First add a line for everything up to the "{". 3796 nextToken(); 3797 addUnwrappedLine(); 3798 ++Line->Level; 3799 3800 // Parse the enum constants. 3801 while (!eof()) { 3802 if (FormatTok->is(tok::l_brace)) { 3803 // Parse the constant's class body. 3804 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3805 /*MunchSemi=*/false); 3806 } else if (FormatTok->is(tok::l_paren)) { 3807 parseParens(); 3808 } else if (FormatTok->is(tok::comma)) { 3809 nextToken(); 3810 addUnwrappedLine(); 3811 } else if (FormatTok->is(tok::semi)) { 3812 nextToken(); 3813 addUnwrappedLine(); 3814 break; 3815 } else if (FormatTok->is(tok::r_brace)) { 3816 addUnwrappedLine(); 3817 break; 3818 } else { 3819 nextToken(); 3820 } 3821 } 3822 3823 // Parse the class body after the enum's ";" if any. 3824 parseLevel(OpeningBrace); 3825 nextToken(); 3826 --Line->Level; 3827 addUnwrappedLine(); 3828 } 3829 3830 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3831 const FormatToken &InitialToken = *FormatTok; 3832 nextToken(); 3833 3834 // The actual identifier can be a nested name specifier, and in macros 3835 // it is often token-pasted. 3836 // An [[attribute]] can be before the identifier. 3837 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3838 tok::kw___attribute, tok::kw___declspec, 3839 tok::kw_alignas, tok::l_square) || 3840 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3841 FormatTok->isOneOf(tok::period, tok::comma))) { 3842 if (Style.isJavaScript() && 3843 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3844 // JavaScript/TypeScript supports inline object types in 3845 // extends/implements positions: 3846 // class Foo implements {bar: number} { } 3847 nextToken(); 3848 if (FormatTok->is(tok::l_brace)) { 3849 tryToParseBracedList(); 3850 continue; 3851 } 3852 } 3853 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 3854 continue; 3855 bool IsNonMacroIdentifier = 3856 FormatTok->is(tok::identifier) && 3857 FormatTok->TokenText != FormatTok->TokenText.upper(); 3858 nextToken(); 3859 // We can have macros in between 'class' and the class name. 3860 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren)) 3861 parseParens(); 3862 } 3863 3864 // Note that parsing away template declarations here leads to incorrectly 3865 // accepting function declarations as record declarations. 3866 // In general, we cannot solve this problem. Consider: 3867 // class A<int> B() {} 3868 // which can be a function definition or a class definition when B() is a 3869 // macro. If we find enough real-world cases where this is a problem, we 3870 // can parse for the 'template' keyword in the beginning of the statement, 3871 // and thus rule out the record production in case there is no template 3872 // (this would still leave us with an ambiguity between template function 3873 // and class declarations). 3874 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3875 do { 3876 if (FormatTok->is(tok::l_brace)) { 3877 calculateBraceTypes(/*ExpectClassBody=*/true); 3878 if (!tryToParseBracedList()) 3879 break; 3880 } 3881 if (FormatTok->is(tok::l_square)) { 3882 FormatToken *Previous = FormatTok->Previous; 3883 if (!Previous || 3884 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3885 // Don't try parsing a lambda if we had a closing parenthesis before, 3886 // it was probably a pointer to an array: int (*)[]. 3887 if (!tryToParseLambda()) 3888 continue; 3889 } else { 3890 parseSquare(); 3891 continue; 3892 } 3893 } 3894 if (FormatTok->is(tok::semi)) 3895 return; 3896 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3897 addUnwrappedLine(); 3898 nextToken(); 3899 parseCSharpGenericTypeConstraint(); 3900 break; 3901 } 3902 nextToken(); 3903 } while (!eof()); 3904 } 3905 3906 auto GetBraceType = [](const FormatToken &RecordTok) { 3907 switch (RecordTok.Tok.getKind()) { 3908 case tok::kw_class: 3909 return TT_ClassLBrace; 3910 case tok::kw_struct: 3911 return TT_StructLBrace; 3912 case tok::kw_union: 3913 return TT_UnionLBrace; 3914 default: 3915 // Useful for e.g. interface. 3916 return TT_RecordLBrace; 3917 } 3918 }; 3919 if (FormatTok->is(tok::l_brace)) { 3920 FormatTok->setFinalizedType(GetBraceType(InitialToken)); 3921 if (ParseAsExpr) { 3922 parseChildBlock(); 3923 } else { 3924 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3925 addUnwrappedLine(); 3926 3927 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3928 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3929 } 3930 } 3931 // There is no addUnwrappedLine() here so that we fall through to parsing a 3932 // structural element afterwards. Thus, in "class A {} n, m;", 3933 // "} n, m;" will end up in one unwrapped line. 3934 } 3935 3936 void UnwrappedLineParser::parseObjCMethod() { 3937 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 3938 "'(' or identifier expected."); 3939 do { 3940 if (FormatTok->is(tok::semi)) { 3941 nextToken(); 3942 addUnwrappedLine(); 3943 return; 3944 } else if (FormatTok->is(tok::l_brace)) { 3945 if (Style.BraceWrapping.AfterFunction) 3946 addUnwrappedLine(); 3947 parseBlock(); 3948 addUnwrappedLine(); 3949 return; 3950 } else { 3951 nextToken(); 3952 } 3953 } while (!eof()); 3954 } 3955 3956 void UnwrappedLineParser::parseObjCProtocolList() { 3957 assert(FormatTok->is(tok::less) && "'<' expected."); 3958 do { 3959 nextToken(); 3960 // Early exit in case someone forgot a close angle. 3961 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3962 FormatTok->isObjCAtKeyword(tok::objc_end)) { 3963 return; 3964 } 3965 } while (!eof() && FormatTok->isNot(tok::greater)); 3966 nextToken(); // Skip '>'. 3967 } 3968 3969 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3970 do { 3971 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 3972 nextToken(); 3973 addUnwrappedLine(); 3974 break; 3975 } 3976 if (FormatTok->is(tok::l_brace)) { 3977 parseBlock(); 3978 // In ObjC interfaces, nothing should be following the "}". 3979 addUnwrappedLine(); 3980 } else if (FormatTok->is(tok::r_brace)) { 3981 // Ignore stray "}". parseStructuralElement doesn't consume them. 3982 nextToken(); 3983 addUnwrappedLine(); 3984 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3985 nextToken(); 3986 parseObjCMethod(); 3987 } else { 3988 parseStructuralElement(); 3989 } 3990 } while (!eof()); 3991 } 3992 3993 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3994 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3995 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3996 nextToken(); 3997 nextToken(); // interface name 3998 3999 // @interface can be followed by a lightweight generic 4000 // specialization list, then either a base class or a category. 4001 if (FormatTok->is(tok::less)) 4002 parseObjCLightweightGenerics(); 4003 if (FormatTok->is(tok::colon)) { 4004 nextToken(); 4005 nextToken(); // base class name 4006 // The base class can also have lightweight generics applied to it. 4007 if (FormatTok->is(tok::less)) 4008 parseObjCLightweightGenerics(); 4009 } else if (FormatTok->is(tok::l_paren)) { 4010 // Skip category, if present. 4011 parseParens(); 4012 } 4013 4014 if (FormatTok->is(tok::less)) 4015 parseObjCProtocolList(); 4016 4017 if (FormatTok->is(tok::l_brace)) { 4018 if (Style.BraceWrapping.AfterObjCDeclaration) 4019 addUnwrappedLine(); 4020 parseBlock(/*MustBeDeclaration=*/true); 4021 } 4022 4023 // With instance variables, this puts '}' on its own line. Without instance 4024 // variables, this ends the @interface line. 4025 addUnwrappedLine(); 4026 4027 parseObjCUntilAtEnd(); 4028 } 4029 4030 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4031 assert(FormatTok->is(tok::less)); 4032 // Unlike protocol lists, generic parameterizations support 4033 // nested angles: 4034 // 4035 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4036 // NSObject <NSCopying, NSSecureCoding> 4037 // 4038 // so we need to count how many open angles we have left. 4039 unsigned NumOpenAngles = 1; 4040 do { 4041 nextToken(); 4042 // Early exit in case someone forgot a close angle. 4043 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4044 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4045 break; 4046 } 4047 if (FormatTok->is(tok::less)) { 4048 ++NumOpenAngles; 4049 } else if (FormatTok->is(tok::greater)) { 4050 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4051 --NumOpenAngles; 4052 } 4053 } while (!eof() && NumOpenAngles != 0); 4054 nextToken(); // Skip '>'. 4055 } 4056 4057 // Returns true for the declaration/definition form of @protocol, 4058 // false for the expression form. 4059 bool UnwrappedLineParser::parseObjCProtocol() { 4060 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4061 nextToken(); 4062 4063 if (FormatTok->is(tok::l_paren)) { 4064 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4065 return false; 4066 } 4067 4068 // The definition/declaration form, 4069 // @protocol Foo 4070 // - (int)someMethod; 4071 // @end 4072 4073 nextToken(); // protocol name 4074 4075 if (FormatTok->is(tok::less)) 4076 parseObjCProtocolList(); 4077 4078 // Check for protocol declaration. 4079 if (FormatTok->is(tok::semi)) { 4080 nextToken(); 4081 addUnwrappedLine(); 4082 return true; 4083 } 4084 4085 addUnwrappedLine(); 4086 parseObjCUntilAtEnd(); 4087 return true; 4088 } 4089 4090 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4091 bool IsImport = FormatTok->is(Keywords.kw_import); 4092 assert(IsImport || FormatTok->is(tok::kw_export)); 4093 nextToken(); 4094 4095 // Consume the "default" in "export default class/function". 4096 if (FormatTok->is(tok::kw_default)) 4097 nextToken(); 4098 4099 // Consume "async function", "function" and "default function", so that these 4100 // get parsed as free-standing JS functions, i.e. do not require a trailing 4101 // semicolon. 4102 if (FormatTok->is(Keywords.kw_async)) 4103 nextToken(); 4104 if (FormatTok->is(Keywords.kw_function)) { 4105 nextToken(); 4106 return; 4107 } 4108 4109 // For imports, `export *`, `export {...}`, consume the rest of the line up 4110 // to the terminating `;`. For everything else, just return and continue 4111 // parsing the structural element, i.e. the declaration or expression for 4112 // `export default`. 4113 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4114 !FormatTok->isStringLiteral() && 4115 !(FormatTok->is(Keywords.kw_type) && 4116 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4117 return; 4118 } 4119 4120 while (!eof()) { 4121 if (FormatTok->is(tok::semi)) 4122 return; 4123 if (Line->Tokens.empty()) { 4124 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4125 // import statement should terminate. 4126 return; 4127 } 4128 if (FormatTok->is(tok::l_brace)) { 4129 FormatTok->setBlockKind(BK_Block); 4130 nextToken(); 4131 parseBracedList(); 4132 } else { 4133 nextToken(); 4134 } 4135 } 4136 } 4137 4138 void UnwrappedLineParser::parseStatementMacro() { 4139 nextToken(); 4140 if (FormatTok->is(tok::l_paren)) 4141 parseParens(); 4142 if (FormatTok->is(tok::semi)) 4143 nextToken(); 4144 addUnwrappedLine(); 4145 } 4146 4147 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4148 // consume things like a::`b.c[d:e] or a::* 4149 while (true) { 4150 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4151 tok::coloncolon, tok::hash) || 4152 Keywords.isVerilogIdentifier(*FormatTok)) { 4153 nextToken(); 4154 } else if (FormatTok->is(tok::l_square)) { 4155 parseSquare(); 4156 } else { 4157 break; 4158 } 4159 } 4160 } 4161 4162 void UnwrappedLineParser::parseVerilogSensitivityList() { 4163 if (!FormatTok->is(tok::at)) 4164 return; 4165 nextToken(); 4166 // A block event expression has 2 at signs. 4167 if (FormatTok->is(tok::at)) 4168 nextToken(); 4169 switch (FormatTok->Tok.getKind()) { 4170 case tok::star: 4171 nextToken(); 4172 break; 4173 case tok::l_paren: 4174 parseParens(); 4175 break; 4176 default: 4177 parseVerilogHierarchyIdentifier(); 4178 break; 4179 } 4180 } 4181 4182 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4183 unsigned AddLevels = 0; 4184 4185 if (FormatTok->is(Keywords.kw_clocking)) { 4186 nextToken(); 4187 if (Keywords.isVerilogIdentifier(*FormatTok)) 4188 nextToken(); 4189 parseVerilogSensitivityList(); 4190 if (FormatTok->is(tok::semi)) 4191 nextToken(); 4192 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4193 Keywords.kw_casez, Keywords.kw_randcase, 4194 Keywords.kw_randsequence)) { 4195 if (Style.IndentCaseLabels) 4196 AddLevels++; 4197 nextToken(); 4198 if (FormatTok->is(tok::l_paren)) { 4199 FormatTok->setFinalizedType(TT_ConditionLParen); 4200 parseParens(); 4201 } 4202 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4203 nextToken(); 4204 // The case header has no semicolon. 4205 } else { 4206 // "module" etc. 4207 nextToken(); 4208 // all the words like the name of the module and specifiers like 4209 // "automatic" and the width of function return type 4210 while (true) { 4211 if (FormatTok->is(tok::l_square)) { 4212 auto Prev = FormatTok->getPreviousNonComment(); 4213 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4214 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4215 parseSquare(); 4216 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4217 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4218 nextToken(); 4219 } else { 4220 break; 4221 } 4222 } 4223 4224 auto NewLine = [this]() { 4225 addUnwrappedLine(); 4226 Line->IsContinuation = true; 4227 }; 4228 4229 // package imports 4230 while (FormatTok->is(Keywords.kw_import)) { 4231 NewLine(); 4232 nextToken(); 4233 parseVerilogHierarchyIdentifier(); 4234 if (FormatTok->is(tok::semi)) 4235 nextToken(); 4236 } 4237 4238 // parameters and ports 4239 if (FormatTok->is(Keywords.kw_verilogHash)) { 4240 NewLine(); 4241 nextToken(); 4242 if (FormatTok->is(tok::l_paren)) { 4243 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4244 parseParens(); 4245 } 4246 } 4247 if (FormatTok->is(tok::l_paren)) { 4248 NewLine(); 4249 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4250 parseParens(); 4251 } 4252 4253 // extends and implements 4254 if (FormatTok->is(Keywords.kw_extends)) { 4255 NewLine(); 4256 nextToken(); 4257 parseVerilogHierarchyIdentifier(); 4258 if (FormatTok->is(tok::l_paren)) 4259 parseParens(); 4260 } 4261 if (FormatTok->is(Keywords.kw_implements)) { 4262 NewLine(); 4263 do { 4264 nextToken(); 4265 parseVerilogHierarchyIdentifier(); 4266 } while (FormatTok->is(tok::comma)); 4267 } 4268 4269 // Coverage event for cover groups. 4270 if (FormatTok->is(tok::at)) { 4271 NewLine(); 4272 parseVerilogSensitivityList(); 4273 } 4274 4275 if (FormatTok->is(tok::semi)) 4276 nextToken(/*LevelDifference=*/1); 4277 addUnwrappedLine(); 4278 } 4279 4280 return AddLevels; 4281 } 4282 4283 void UnwrappedLineParser::parseVerilogTable() { 4284 assert(FormatTok->is(Keywords.kw_table)); 4285 nextToken(/*LevelDifference=*/1); 4286 addUnwrappedLine(); 4287 4288 auto InitialLevel = Line->Level++; 4289 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4290 FormatToken *Tok = FormatTok; 4291 nextToken(); 4292 if (Tok->is(tok::semi)) 4293 addUnwrappedLine(); 4294 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4295 Tok->setFinalizedType(TT_VerilogTableItem); 4296 } 4297 Line->Level = InitialLevel; 4298 nextToken(/*LevelDifference=*/-1); 4299 addUnwrappedLine(); 4300 } 4301 4302 void UnwrappedLineParser::parseVerilogCaseLabel() { 4303 // The label will get unindented in AnnotatingParser. If there are no leading 4304 // spaces, indent the rest here so that things inside the block will be 4305 // indented relative to things outside. We don't use parseLabel because we 4306 // don't know whether this colon is a label or a ternary expression at this 4307 // point. 4308 auto OrigLevel = Line->Level; 4309 auto FirstLine = CurrentLines->size(); 4310 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4311 ++Line->Level; 4312 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4313 --Line->Level; 4314 parseStructuralElement(); 4315 // Restore the indentation in both the new line and the line that has the 4316 // label. 4317 if (CurrentLines->size() > FirstLine) 4318 (*CurrentLines)[FirstLine].Level = OrigLevel; 4319 Line->Level = OrigLevel; 4320 } 4321 4322 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4323 for (const auto &N : Line.Tokens) { 4324 if (N.Tok->MacroCtx) 4325 return true; 4326 for (const UnwrappedLine &Child : N.Children) 4327 if (containsExpansion(Child)) 4328 return true; 4329 } 4330 return false; 4331 } 4332 4333 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4334 if (Line->Tokens.empty()) 4335 return; 4336 LLVM_DEBUG({ 4337 if (!parsingPPDirective()) { 4338 llvm::dbgs() << "Adding unwrapped line:\n"; 4339 printDebugInfo(*Line); 4340 } 4341 }); 4342 4343 // If this line closes a block when in Whitesmiths mode, remember that 4344 // information so that the level can be decreased after the line is added. 4345 // This has to happen after the addition of the line since the line itself 4346 // needs to be indented. 4347 bool ClosesWhitesmithsBlock = 4348 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4349 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4350 4351 // If the current line was expanded from a macro call, we use it to 4352 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4353 // line and the unexpanded token stream. 4354 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4355 if (!Reconstruct) 4356 Reconstruct.emplace(Line->Level, Unexpanded); 4357 Reconstruct->addLine(*Line); 4358 4359 // While the reconstructed unexpanded lines are stored in the normal 4360 // flow of lines, the expanded lines are stored on the side to be analyzed 4361 // in an extra step. 4362 CurrentExpandedLines.push_back(std::move(*Line)); 4363 4364 if (Reconstruct->finished()) { 4365 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4366 assert(!Reconstructed.Tokens.empty() && 4367 "Reconstructed must at least contain the macro identifier."); 4368 assert(!parsingPPDirective()); 4369 LLVM_DEBUG({ 4370 llvm::dbgs() << "Adding unexpanded line:\n"; 4371 printDebugInfo(Reconstructed); 4372 }); 4373 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4374 Lines.push_back(std::move(Reconstructed)); 4375 CurrentExpandedLines.clear(); 4376 Reconstruct.reset(); 4377 } 4378 } else { 4379 // At the top level we only get here when no unexpansion is going on, or 4380 // when conditional formatting led to unfinished macro reconstructions. 4381 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4382 CurrentLines->push_back(std::move(*Line)); 4383 } 4384 Line->Tokens.clear(); 4385 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4386 Line->FirstStartColumn = 0; 4387 Line->IsContinuation = false; 4388 4389 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4390 --Line->Level; 4391 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4392 CurrentLines->append( 4393 std::make_move_iterator(PreprocessorDirectives.begin()), 4394 std::make_move_iterator(PreprocessorDirectives.end())); 4395 PreprocessorDirectives.clear(); 4396 } 4397 // Disconnect the current token from the last token on the previous line. 4398 FormatTok->Previous = nullptr; 4399 } 4400 4401 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4402 4403 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4404 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4405 FormatTok.NewlinesBefore > 0; 4406 } 4407 4408 // Checks if \p FormatTok is a line comment that continues the line comment 4409 // section on \p Line. 4410 static bool 4411 continuesLineCommentSection(const FormatToken &FormatTok, 4412 const UnwrappedLine &Line, 4413 const llvm::Regex &CommentPragmasRegex) { 4414 if (Line.Tokens.empty()) 4415 return false; 4416 4417 StringRef IndentContent = FormatTok.TokenText; 4418 if (FormatTok.TokenText.startswith("//") || 4419 FormatTok.TokenText.startswith("/*")) { 4420 IndentContent = FormatTok.TokenText.substr(2); 4421 } 4422 if (CommentPragmasRegex.match(IndentContent)) 4423 return false; 4424 4425 // If Line starts with a line comment, then FormatTok continues the comment 4426 // section if its original column is greater or equal to the original start 4427 // column of the line. 4428 // 4429 // Define the min column token of a line as follows: if a line ends in '{' or 4430 // contains a '{' followed by a line comment, then the min column token is 4431 // that '{'. Otherwise, the min column token of the line is the first token of 4432 // the line. 4433 // 4434 // If Line starts with a token other than a line comment, then FormatTok 4435 // continues the comment section if its original column is greater than the 4436 // original start column of the min column token of the line. 4437 // 4438 // For example, the second line comment continues the first in these cases: 4439 // 4440 // // first line 4441 // // second line 4442 // 4443 // and: 4444 // 4445 // // first line 4446 // // second line 4447 // 4448 // and: 4449 // 4450 // int i; // first line 4451 // // second line 4452 // 4453 // and: 4454 // 4455 // do { // first line 4456 // // second line 4457 // int i; 4458 // } while (true); 4459 // 4460 // and: 4461 // 4462 // enum { 4463 // a, // first line 4464 // // second line 4465 // b 4466 // }; 4467 // 4468 // The second line comment doesn't continue the first in these cases: 4469 // 4470 // // first line 4471 // // second line 4472 // 4473 // and: 4474 // 4475 // int i; // first line 4476 // // second line 4477 // 4478 // and: 4479 // 4480 // do { // first line 4481 // // second line 4482 // int i; 4483 // } while (true); 4484 // 4485 // and: 4486 // 4487 // enum { 4488 // a, // first line 4489 // // second line 4490 // }; 4491 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4492 4493 // Scan for '{//'. If found, use the column of '{' as a min column for line 4494 // comment section continuation. 4495 const FormatToken *PreviousToken = nullptr; 4496 for (const UnwrappedLineNode &Node : Line.Tokens) { 4497 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4498 isLineComment(*Node.Tok)) { 4499 MinColumnToken = PreviousToken; 4500 break; 4501 } 4502 PreviousToken = Node.Tok; 4503 4504 // Grab the last newline preceding a token in this unwrapped line. 4505 if (Node.Tok->NewlinesBefore > 0) 4506 MinColumnToken = Node.Tok; 4507 } 4508 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4509 MinColumnToken = PreviousToken; 4510 4511 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4512 MinColumnToken); 4513 } 4514 4515 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4516 bool JustComments = Line->Tokens.empty(); 4517 for (FormatToken *Tok : CommentsBeforeNextToken) { 4518 // Line comments that belong to the same line comment section are put on the 4519 // same line since later we might want to reflow content between them. 4520 // Additional fine-grained breaking of line comment sections is controlled 4521 // by the class BreakableLineCommentSection in case it is desirable to keep 4522 // several line comment sections in the same unwrapped line. 4523 // 4524 // FIXME: Consider putting separate line comment sections as children to the 4525 // unwrapped line instead. 4526 Tok->ContinuesLineCommentSection = 4527 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4528 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4529 addUnwrappedLine(); 4530 pushToken(Tok); 4531 } 4532 if (NewlineBeforeNext && JustComments) 4533 addUnwrappedLine(); 4534 CommentsBeforeNextToken.clear(); 4535 } 4536 4537 void UnwrappedLineParser::nextToken(int LevelDifference) { 4538 if (eof()) 4539 return; 4540 flushComments(isOnNewLine(*FormatTok)); 4541 pushToken(FormatTok); 4542 FormatToken *Previous = FormatTok; 4543 if (!Style.isJavaScript()) 4544 readToken(LevelDifference); 4545 else 4546 readTokenWithJavaScriptASI(); 4547 FormatTok->Previous = Previous; 4548 if (Style.isVerilog()) { 4549 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4550 // keywords like `begin`, we can't treat them the same as left braces 4551 // because some contexts require one of them. For example structs use 4552 // braces and if blocks use keywords, and a left brace can occur in an if 4553 // statement, but it is not a block. For keywords like `end`, we simply 4554 // treat them the same as right braces. 4555 if (Keywords.isVerilogEnd(*FormatTok)) 4556 FormatTok->Tok.setKind(tok::r_brace); 4557 } 4558 } 4559 4560 void UnwrappedLineParser::distributeComments( 4561 const SmallVectorImpl<FormatToken *> &Comments, 4562 const FormatToken *NextTok) { 4563 // Whether or not a line comment token continues a line is controlled by 4564 // the method continuesLineCommentSection, with the following caveat: 4565 // 4566 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4567 // that each comment line from the trail is aligned with the next token, if 4568 // the next token exists. If a trail exists, the beginning of the maximal 4569 // trail is marked as a start of a new comment section. 4570 // 4571 // For example in this code: 4572 // 4573 // int a; // line about a 4574 // // line 1 about b 4575 // // line 2 about b 4576 // int b; 4577 // 4578 // the two lines about b form a maximal trail, so there are two sections, the 4579 // first one consisting of the single comment "// line about a" and the 4580 // second one consisting of the next two comments. 4581 if (Comments.empty()) 4582 return; 4583 bool ShouldPushCommentsInCurrentLine = true; 4584 bool HasTrailAlignedWithNextToken = false; 4585 unsigned StartOfTrailAlignedWithNextToken = 0; 4586 if (NextTok) { 4587 // We are skipping the first element intentionally. 4588 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4589 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4590 HasTrailAlignedWithNextToken = true; 4591 StartOfTrailAlignedWithNextToken = i; 4592 } 4593 } 4594 } 4595 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4596 FormatToken *FormatTok = Comments[i]; 4597 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4598 FormatTok->ContinuesLineCommentSection = false; 4599 } else { 4600 FormatTok->ContinuesLineCommentSection = 4601 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4602 } 4603 if (!FormatTok->ContinuesLineCommentSection && 4604 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4605 ShouldPushCommentsInCurrentLine = false; 4606 } 4607 if (ShouldPushCommentsInCurrentLine) 4608 pushToken(FormatTok); 4609 else 4610 CommentsBeforeNextToken.push_back(FormatTok); 4611 } 4612 } 4613 4614 void UnwrappedLineParser::readToken(int LevelDifference) { 4615 SmallVector<FormatToken *, 1> Comments; 4616 bool PreviousWasComment = false; 4617 bool FirstNonCommentOnLine = false; 4618 do { 4619 FormatTok = Tokens->getNextToken(); 4620 assert(FormatTok); 4621 while (FormatTok->getType() == TT_ConflictStart || 4622 FormatTok->getType() == TT_ConflictEnd || 4623 FormatTok->getType() == TT_ConflictAlternative) { 4624 if (FormatTok->getType() == TT_ConflictStart) 4625 conditionalCompilationStart(/*Unreachable=*/false); 4626 else if (FormatTok->getType() == TT_ConflictAlternative) 4627 conditionalCompilationAlternative(); 4628 else if (FormatTok->getType() == TT_ConflictEnd) 4629 conditionalCompilationEnd(); 4630 FormatTok = Tokens->getNextToken(); 4631 FormatTok->MustBreakBefore = true; 4632 } 4633 4634 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4635 const FormatToken &Tok, 4636 bool PreviousWasComment) { 4637 auto IsFirstOnLine = [](const FormatToken &Tok) { 4638 return Tok.HasUnescapedNewline || Tok.IsFirst; 4639 }; 4640 4641 // Consider preprocessor directives preceded by block comments as first 4642 // on line. 4643 if (PreviousWasComment) 4644 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4645 return IsFirstOnLine(Tok); 4646 }; 4647 4648 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4649 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4650 PreviousWasComment = FormatTok->is(tok::comment); 4651 4652 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4653 (!Style.isVerilog() || 4654 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4655 FirstNonCommentOnLine) { 4656 distributeComments(Comments, FormatTok); 4657 Comments.clear(); 4658 // If there is an unfinished unwrapped line, we flush the preprocessor 4659 // directives only after that unwrapped line was finished later. 4660 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4661 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4662 assert((LevelDifference >= 0 || 4663 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4664 "LevelDifference makes Line->Level negative"); 4665 Line->Level += LevelDifference; 4666 // Comments stored before the preprocessor directive need to be output 4667 // before the preprocessor directive, at the same level as the 4668 // preprocessor directive, as we consider them to apply to the directive. 4669 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4670 PPBranchLevel > 0) { 4671 Line->Level += PPBranchLevel; 4672 } 4673 flushComments(isOnNewLine(*FormatTok)); 4674 parsePPDirective(); 4675 PreviousWasComment = FormatTok->is(tok::comment); 4676 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4677 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4678 } 4679 4680 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4681 !Line->InPPDirective) { 4682 continue; 4683 } 4684 4685 if (FormatTok->is(tok::identifier) && 4686 Macros.defined(FormatTok->TokenText) && 4687 // FIXME: Allow expanding macros in preprocessor directives. 4688 !Line->InPPDirective) { 4689 FormatToken *ID = FormatTok; 4690 unsigned Position = Tokens->getPosition(); 4691 4692 // To correctly parse the code, we need to replace the tokens of the macro 4693 // call with its expansion. 4694 auto PreCall = std::move(Line); 4695 Line.reset(new UnwrappedLine); 4696 bool OldInExpansion = InExpansion; 4697 InExpansion = true; 4698 // We parse the macro call into a new line. 4699 auto Args = parseMacroCall(); 4700 InExpansion = OldInExpansion; 4701 assert(Line->Tokens.front().Tok == ID); 4702 // And remember the unexpanded macro call tokens. 4703 auto UnexpandedLine = std::move(Line); 4704 // Reset to the old line. 4705 Line = std::move(PreCall); 4706 4707 LLVM_DEBUG({ 4708 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4709 if (Args) { 4710 llvm::dbgs() << "("; 4711 for (const auto &Arg : Args.value()) 4712 for (const auto &T : Arg) 4713 llvm::dbgs() << T->TokenText << " "; 4714 llvm::dbgs() << ")"; 4715 } 4716 llvm::dbgs() << "\n"; 4717 }); 4718 if (Macros.objectLike(ID->TokenText) && Args && 4719 !Macros.hasArity(ID->TokenText, Args->size())) { 4720 // The macro is either 4721 // - object-like, but we got argumnets, or 4722 // - overloaded to be both object-like and function-like, but none of 4723 // the function-like arities match the number of arguments. 4724 // Thus, expand as object-like macro. 4725 LLVM_DEBUG(llvm::dbgs() 4726 << "Macro \"" << ID->TokenText 4727 << "\" not overloaded for arity " << Args->size() 4728 << "or not function-like, using object-like overload."); 4729 Args.reset(); 4730 UnexpandedLine->Tokens.resize(1); 4731 Tokens->setPosition(Position); 4732 nextToken(); 4733 assert(!Args && Macros.objectLike(ID->TokenText)); 4734 } 4735 if ((!Args && Macros.objectLike(ID->TokenText)) || 4736 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4737 // Next, we insert the expanded tokens in the token stream at the 4738 // current position, and continue parsing. 4739 Unexpanded[ID] = std::move(UnexpandedLine); 4740 SmallVector<FormatToken *, 8> Expansion = 4741 Macros.expand(ID, std::move(Args)); 4742 if (!Expansion.empty()) 4743 FormatTok = Tokens->insertTokens(Expansion); 4744 4745 LLVM_DEBUG({ 4746 llvm::dbgs() << "Expanded: "; 4747 for (const auto &T : Expansion) 4748 llvm::dbgs() << T->TokenText << " "; 4749 llvm::dbgs() << "\n"; 4750 }); 4751 } else { 4752 LLVM_DEBUG({ 4753 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4754 << "\", because it was used "; 4755 if (Args) 4756 llvm::dbgs() << "with " << Args->size(); 4757 else 4758 llvm::dbgs() << "without"; 4759 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4760 }); 4761 Tokens->setPosition(Position); 4762 FormatTok = ID; 4763 } 4764 } 4765 4766 if (!FormatTok->is(tok::comment)) { 4767 distributeComments(Comments, FormatTok); 4768 Comments.clear(); 4769 return; 4770 } 4771 4772 Comments.push_back(FormatTok); 4773 } while (!eof()); 4774 4775 distributeComments(Comments, nullptr); 4776 Comments.clear(); 4777 } 4778 4779 namespace { 4780 template <typename Iterator> 4781 void pushTokens(Iterator Begin, Iterator End, 4782 llvm::SmallVectorImpl<FormatToken *> &Into) { 4783 for (auto I = Begin; I != End; ++I) { 4784 Into.push_back(I->Tok); 4785 for (const auto &Child : I->Children) 4786 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4787 } 4788 } 4789 } // namespace 4790 4791 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4792 UnwrappedLineParser::parseMacroCall() { 4793 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4794 assert(Line->Tokens.empty()); 4795 nextToken(); 4796 if (!FormatTok->is(tok::l_paren)) 4797 return Args; 4798 unsigned Position = Tokens->getPosition(); 4799 FormatToken *Tok = FormatTok; 4800 nextToken(); 4801 Args.emplace(); 4802 auto ArgStart = std::prev(Line->Tokens.end()); 4803 4804 int Parens = 0; 4805 do { 4806 switch (FormatTok->Tok.getKind()) { 4807 case tok::l_paren: 4808 ++Parens; 4809 nextToken(); 4810 break; 4811 case tok::r_paren: { 4812 if (Parens > 0) { 4813 --Parens; 4814 nextToken(); 4815 break; 4816 } 4817 Args->push_back({}); 4818 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4819 nextToken(); 4820 return Args; 4821 } 4822 case tok::comma: { 4823 if (Parens > 0) { 4824 nextToken(); 4825 break; 4826 } 4827 Args->push_back({}); 4828 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4829 nextToken(); 4830 ArgStart = std::prev(Line->Tokens.end()); 4831 break; 4832 } 4833 default: 4834 nextToken(); 4835 break; 4836 } 4837 } while (!eof()); 4838 Line->Tokens.resize(1); 4839 Tokens->setPosition(Position); 4840 FormatTok = Tok; 4841 return {}; 4842 } 4843 4844 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4845 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4846 if (MustBreakBeforeNextToken) { 4847 Line->Tokens.back().Tok->MustBreakBefore = true; 4848 MustBreakBeforeNextToken = false; 4849 } 4850 } 4851 4852 } // end namespace format 4853 } // end namespace clang 4854