1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (SmallVectorImpl<UnwrappedLine>::const_iterator 55 CI = I->Children.begin(), 56 CE = I->Children.end(); 57 CI != CE; ++CI) { 58 OS << "\n"; 59 printLine(OS, *CI, (Prefix + " ").str()); 60 NewLine = true; 61 } 62 } 63 if (!NewLine) 64 OS << "\n"; 65 } 66 67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 68 printLine(llvm::dbgs(), Line); 69 } 70 71 class ScopedDeclarationState { 72 public: 73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 74 bool MustBeDeclaration) 75 : Line(Line), Stack(Stack) { 76 Line.MustBeDeclaration = MustBeDeclaration; 77 Stack.push_back(MustBeDeclaration); 78 } 79 ~ScopedDeclarationState() { 80 Stack.pop_back(); 81 if (!Stack.empty()) 82 Line.MustBeDeclaration = Stack.back(); 83 else 84 Line.MustBeDeclaration = true; 85 } 86 87 private: 88 UnwrappedLine &Line; 89 llvm::BitVector &Stack; 90 }; 91 92 } // end anonymous namespace 93 94 class ScopedLineState { 95 public: 96 ScopedLineState(UnwrappedLineParser &Parser, 97 bool SwitchToPreprocessorLines = false) 98 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 99 if (SwitchToPreprocessorLines) 100 Parser.CurrentLines = &Parser.PreprocessorDirectives; 101 else if (!Parser.Line->Tokens.empty()) 102 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 103 PreBlockLine = std::move(Parser.Line); 104 Parser.Line = std::make_unique<UnwrappedLine>(); 105 Parser.Line->Level = PreBlockLine->Level; 106 Parser.Line->PPLevel = PreBlockLine->PPLevel; 107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 108 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 109 } 110 111 ~ScopedLineState() { 112 if (!Parser.Line->Tokens.empty()) 113 Parser.addUnwrappedLine(); 114 assert(Parser.Line->Tokens.empty()); 115 Parser.Line = std::move(PreBlockLine); 116 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 117 Parser.MustBreakBeforeNextToken = true; 118 Parser.CurrentLines = OriginalLines; 119 } 120 121 private: 122 UnwrappedLineParser &Parser; 123 124 std::unique_ptr<UnwrappedLine> PreBlockLine; 125 SmallVectorImpl<UnwrappedLine> *OriginalLines; 126 }; 127 128 class CompoundStatementIndenter { 129 public: 130 CompoundStatementIndenter(UnwrappedLineParser *Parser, 131 const FormatStyle &Style, unsigned &LineLevel) 132 : CompoundStatementIndenter(Parser, LineLevel, 133 Style.BraceWrapping.AfterControlStatement, 134 Style.BraceWrapping.IndentBraces) {} 135 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 136 bool WrapBrace, bool IndentBrace) 137 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 138 if (WrapBrace) 139 Parser->addUnwrappedLine(); 140 if (IndentBrace) 141 ++LineLevel; 142 } 143 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 144 145 private: 146 unsigned &LineLevel; 147 unsigned OldLineLevel; 148 }; 149 150 UnwrappedLineParser::UnwrappedLineParser( 151 SourceManager &SourceMgr, const FormatStyle &Style, 152 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 153 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 154 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 155 IdentifierTable &IdentTable) 156 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 157 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 158 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 159 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 160 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 161 ? IG_Rejected 162 : IG_Inited), 163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 164 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} 165 166 void UnwrappedLineParser::reset() { 167 PPBranchLevel = -1; 168 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 169 ? IG_Rejected 170 : IG_Inited; 171 IncludeGuardToken = nullptr; 172 Line.reset(new UnwrappedLine); 173 CommentsBeforeNextToken.clear(); 174 FormatTok = nullptr; 175 MustBreakBeforeNextToken = false; 176 IsDecltypeAutoFunction = false; 177 PreprocessorDirectives.clear(); 178 CurrentLines = &Lines; 179 DeclarationScopeStack.clear(); 180 NestedTooDeep.clear(); 181 NestedLambdas.clear(); 182 PPStack.clear(); 183 Line->FirstStartColumn = FirstStartColumn; 184 185 if (!Unexpanded.empty()) 186 for (FormatToken *Token : AllTokens) 187 Token->MacroCtx.reset(); 188 CurrentExpandedLines.clear(); 189 ExpandedLines.clear(); 190 Unexpanded.clear(); 191 InExpansion = false; 192 Reconstruct.reset(); 193 } 194 195 void UnwrappedLineParser::parse() { 196 IndexedTokenSource TokenSource(AllTokens); 197 Line->FirstStartColumn = FirstStartColumn; 198 do { 199 LLVM_DEBUG(llvm::dbgs() << "----\n"); 200 reset(); 201 Tokens = &TokenSource; 202 TokenSource.reset(); 203 204 readToken(); 205 parseFile(); 206 207 // If we found an include guard then all preprocessor directives (other than 208 // the guard) are over-indented by one. 209 if (IncludeGuard == IG_Found) { 210 for (auto &Line : Lines) 211 if (Line.InPPDirective && Line.Level > 0) 212 --Line.Level; 213 } 214 215 // Create line with eof token. 216 assert(FormatTok->is(tok::eof)); 217 pushToken(FormatTok); 218 addUnwrappedLine(); 219 220 // In a first run, format everything with the lines containing macro calls 221 // replaced by the expansion. 222 if (!ExpandedLines.empty()) { 223 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 224 for (const auto &Line : Lines) { 225 if (!Line.Tokens.empty()) { 226 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 227 if (it != ExpandedLines.end()) { 228 for (const auto &Expanded : it->second) { 229 LLVM_DEBUG(printDebugInfo(Expanded)); 230 Callback.consumeUnwrappedLine(Expanded); 231 } 232 continue; 233 } 234 } 235 LLVM_DEBUG(printDebugInfo(Line)); 236 Callback.consumeUnwrappedLine(Line); 237 } 238 Callback.finishRun(); 239 } 240 241 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 242 for (const UnwrappedLine &Line : Lines) { 243 LLVM_DEBUG(printDebugInfo(Line)); 244 Callback.consumeUnwrappedLine(Line); 245 } 246 Callback.finishRun(); 247 Lines.clear(); 248 while (!PPLevelBranchIndex.empty() && 249 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 250 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 251 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 252 } 253 if (!PPLevelBranchIndex.empty()) { 254 ++PPLevelBranchIndex.back(); 255 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 256 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 257 } 258 } while (!PPLevelBranchIndex.empty()); 259 } 260 261 void UnwrappedLineParser::parseFile() { 262 // The top-level context in a file always has declarations, except for pre- 263 // processor directives and JavaScript files. 264 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 265 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 266 MustBeDeclaration); 267 if (Style.Language == FormatStyle::LK_TextProto) 268 parseBracedList(); 269 else 270 parseLevel(); 271 // Make sure to format the remaining tokens. 272 // 273 // LK_TextProto is special since its top-level is parsed as the body of a 274 // braced list, which does not necessarily have natural line separators such 275 // as a semicolon. Comments after the last entry that have been determined to 276 // not belong to that line, as in: 277 // key: value 278 // // endfile comment 279 // do not have a chance to be put on a line of their own until this point. 280 // Here we add this newline before end-of-file comments. 281 if (Style.Language == FormatStyle::LK_TextProto && 282 !CommentsBeforeNextToken.empty()) { 283 addUnwrappedLine(); 284 } 285 flushComments(true); 286 addUnwrappedLine(); 287 } 288 289 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 290 do { 291 switch (FormatTok->Tok.getKind()) { 292 case tok::l_brace: 293 return; 294 default: 295 if (FormatTok->is(Keywords.kw_where)) { 296 addUnwrappedLine(); 297 nextToken(); 298 parseCSharpGenericTypeConstraint(); 299 break; 300 } 301 nextToken(); 302 break; 303 } 304 } while (!eof()); 305 } 306 307 void UnwrappedLineParser::parseCSharpAttribute() { 308 int UnpairedSquareBrackets = 1; 309 do { 310 switch (FormatTok->Tok.getKind()) { 311 case tok::r_square: 312 nextToken(); 313 --UnpairedSquareBrackets; 314 if (UnpairedSquareBrackets == 0) { 315 addUnwrappedLine(); 316 return; 317 } 318 break; 319 case tok::l_square: 320 ++UnpairedSquareBrackets; 321 nextToken(); 322 break; 323 default: 324 nextToken(); 325 break; 326 } 327 } while (!eof()); 328 } 329 330 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 331 if (!Lines.empty() && Lines.back().InPPDirective) 332 return true; 333 334 const FormatToken *Previous = Tokens->getPreviousToken(); 335 return Previous && Previous->is(tok::comment) && 336 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 337 } 338 339 /// \brief Parses a level, that is ???. 340 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level 341 /// \param CanContainBracedList If the content can contain (at any level) a 342 /// braced list. 343 /// \param NextLBracesType The type for left brace found in this level. 344 /// \param IfKind The \p if statement kind in the level. 345 /// \param IfLeftBrace The left brace of the \p if block in the level. 346 /// \returns true if a simple block of if/else/for/while, or false otherwise. 347 /// (A simple block has a single statement.) 348 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 349 bool CanContainBracedList, 350 TokenType NextLBracesType, 351 IfStmtKind *IfKind, 352 FormatToken **IfLeftBrace) { 353 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace 354 ? TT_BracedListLBrace 355 : TT_Unknown; 356 const bool IsPrecededByCommentOrPPDirective = 357 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 358 FormatToken *IfLBrace = nullptr; 359 bool HasDoWhile = false; 360 bool HasLabel = false; 361 unsigned StatementCount = 0; 362 bool SwitchLabelEncountered = false; 363 364 do { 365 if (FormatTok->getType() == TT_AttributeMacro) { 366 nextToken(); 367 continue; 368 } 369 tok::TokenKind kind = FormatTok->Tok.getKind(); 370 if (FormatTok->getType() == TT_MacroBlockBegin) 371 kind = tok::l_brace; 372 else if (FormatTok->getType() == TT_MacroBlockEnd) 373 kind = tok::r_brace; 374 375 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind, 376 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] { 377 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind, 378 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile, 379 HasLabel ? nullptr : &HasLabel); 380 ++StatementCount; 381 assert(StatementCount > 0 && "StatementCount overflow!"); 382 }; 383 384 switch (kind) { 385 case tok::comment: 386 nextToken(); 387 addUnwrappedLine(); 388 break; 389 case tok::l_brace: 390 if (NextLBracesType != TT_Unknown) { 391 FormatTok->setFinalizedType(NextLBracesType); 392 } else if (FormatTok->Previous && 393 FormatTok->Previous->ClosesRequiresClause) { 394 // We need the 'default' case here to correctly parse a function 395 // l_brace. 396 ParseDefault(); 397 continue; 398 } 399 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && 400 tryToParseBracedList()) { 401 continue; 402 } 403 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 404 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr, 405 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, 406 NextLBracesType); 407 ++StatementCount; 408 assert(StatementCount > 0 && "StatementCount overflow!"); 409 addUnwrappedLine(); 410 break; 411 case tok::r_brace: 412 if (OpeningBrace) { 413 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 414 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 415 return false; 416 } 417 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 418 HasDoWhile || IsPrecededByCommentOrPPDirective || 419 precededByCommentOrPPDirective()) { 420 return false; 421 } 422 const FormatToken *Next = Tokens->peekNextToken(); 423 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 424 return false; 425 if (IfLeftBrace) 426 *IfLeftBrace = IfLBrace; 427 return true; 428 } 429 nextToken(); 430 addUnwrappedLine(); 431 break; 432 case tok::kw_default: { 433 unsigned StoredPosition = Tokens->getPosition(); 434 FormatToken *Next; 435 do { 436 Next = Tokens->getNextToken(); 437 assert(Next); 438 } while (Next->is(tok::comment)); 439 FormatTok = Tokens->setPosition(StoredPosition); 440 if (Next->isNot(tok::colon)) { 441 // default not followed by ':' is not a case label; treat it like 442 // an identifier. 443 parseStructuralElement(); 444 break; 445 } 446 // Else, if it is 'default:', fall through to the case handling. 447 [[fallthrough]]; 448 } 449 case tok::kw_case: 450 if (Style.isProto() || Style.isVerilog() || 451 (Style.isJavaScript() && Line->MustBeDeclaration)) { 452 // Proto: there are no switch/case statements 453 // Verilog: Case labels don't have this word. We handle case 454 // labels including default in TokenAnnotator. 455 // JavaScript: A 'case: string' style field declaration. 456 ParseDefault(); 457 break; 458 } 459 if (!SwitchLabelEncountered && 460 (Style.IndentCaseLabels || 461 (Line->InPPDirective && Line->Level == 1))) { 462 ++Line->Level; 463 } 464 SwitchLabelEncountered = true; 465 parseStructuralElement(); 466 break; 467 case tok::l_square: 468 if (Style.isCSharp()) { 469 nextToken(); 470 parseCSharpAttribute(); 471 break; 472 } 473 if (handleCppAttributes()) 474 break; 475 [[fallthrough]]; 476 default: 477 ParseDefault(); 478 break; 479 } 480 } while (!eof()); 481 482 return false; 483 } 484 485 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 486 // We'll parse forward through the tokens until we hit 487 // a closing brace or eof - note that getNextToken() will 488 // parse macros, so this will magically work inside macro 489 // definitions, too. 490 unsigned StoredPosition = Tokens->getPosition(); 491 FormatToken *Tok = FormatTok; 492 const FormatToken *PrevTok = Tok->Previous; 493 // Keep a stack of positions of lbrace tokens. We will 494 // update information about whether an lbrace starts a 495 // braced init list or a different block during the loop. 496 struct StackEntry { 497 FormatToken *Tok; 498 const FormatToken *PrevTok; 499 }; 500 SmallVector<StackEntry, 8> LBraceStack; 501 assert(Tok->is(tok::l_brace)); 502 do { 503 // Get next non-comment token. 504 FormatToken *NextTok; 505 do { 506 NextTok = Tokens->getNextToken(); 507 } while (NextTok->is(tok::comment)); 508 509 switch (Tok->Tok.getKind()) { 510 case tok::l_brace: 511 if (Style.isJavaScript() && PrevTok) { 512 if (PrevTok->isOneOf(tok::colon, tok::less)) { 513 // A ':' indicates this code is in a type, or a braced list 514 // following a label in an object literal ({a: {b: 1}}). 515 // A '<' could be an object used in a comparison, but that is nonsense 516 // code (can never return true), so more likely it is a generic type 517 // argument (`X<{a: string; b: number}>`). 518 // The code below could be confused by semicolons between the 519 // individual members in a type member list, which would normally 520 // trigger BK_Block. In both cases, this must be parsed as an inline 521 // braced init. 522 Tok->setBlockKind(BK_BracedInit); 523 } else if (PrevTok->is(tok::r_paren)) { 524 // `) { }` can only occur in function or method declarations in JS. 525 Tok->setBlockKind(BK_Block); 526 } 527 } else { 528 Tok->setBlockKind(BK_Unknown); 529 } 530 LBraceStack.push_back({Tok, PrevTok}); 531 break; 532 case tok::r_brace: 533 if (LBraceStack.empty()) 534 break; 535 if (LBraceStack.back().Tok->is(BK_Unknown)) { 536 bool ProbablyBracedList = false; 537 if (Style.Language == FormatStyle::LK_Proto) { 538 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 539 } else { 540 // Skip NextTok over preprocessor lines, otherwise we may not 541 // properly diagnose the block as a braced intializer 542 // if the comma separator appears after the pp directive. 543 while (NextTok->is(tok::hash)) { 544 ScopedMacroState MacroState(*Line, Tokens, NextTok); 545 do { 546 NextTok = Tokens->getNextToken(); 547 } while (NextTok->isNot(tok::eof)); 548 } 549 550 // Using OriginalColumn to distinguish between ObjC methods and 551 // binary operators is a bit hacky. 552 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 553 NextTok->OriginalColumn == 0; 554 555 // Try to detect a braced list. Note that regardless how we mark inner 556 // braces here, we will overwrite the BlockKind later if we parse a 557 // braced list (where all blocks inside are by default braced lists), 558 // or when we explicitly detect blocks (for example while parsing 559 // lambdas). 560 561 // If we already marked the opening brace as braced list, the closing 562 // must also be part of it. 563 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace); 564 565 ProbablyBracedList = ProbablyBracedList || 566 (Style.isJavaScript() && 567 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 568 Keywords.kw_as)); 569 ProbablyBracedList = ProbablyBracedList || 570 (Style.isCpp() && NextTok->is(tok::l_paren)); 571 572 // If there is a comma, semicolon or right paren after the closing 573 // brace, we assume this is a braced initializer list. 574 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 575 // braced list in JS. 576 ProbablyBracedList = 577 ProbablyBracedList || 578 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 579 tok::r_paren, tok::r_square, tok::ellipsis); 580 581 // Distinguish between braced list in a constructor initializer list 582 // followed by constructor body, or just adjacent blocks. 583 ProbablyBracedList = 584 ProbablyBracedList || 585 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 586 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 587 tok::greater)); 588 589 ProbablyBracedList = 590 ProbablyBracedList || 591 (NextTok->is(tok::identifier) && 592 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 593 594 ProbablyBracedList = ProbablyBracedList || 595 (NextTok->is(tok::semi) && 596 (!ExpectClassBody || LBraceStack.size() != 1)); 597 598 ProbablyBracedList = 599 ProbablyBracedList || 600 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 601 602 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 603 // We can have an array subscript after a braced init 604 // list, but C++11 attributes are expected after blocks. 605 NextTok = Tokens->getNextToken(); 606 ProbablyBracedList = NextTok->isNot(tok::l_square); 607 } 608 } 609 if (ProbablyBracedList) { 610 Tok->setBlockKind(BK_BracedInit); 611 LBraceStack.back().Tok->setBlockKind(BK_BracedInit); 612 } else { 613 Tok->setBlockKind(BK_Block); 614 LBraceStack.back().Tok->setBlockKind(BK_Block); 615 } 616 } 617 LBraceStack.pop_back(); 618 break; 619 case tok::identifier: 620 if (!Tok->is(TT_StatementMacro)) 621 break; 622 [[fallthrough]]; 623 case tok::at: 624 case tok::semi: 625 case tok::kw_if: 626 case tok::kw_while: 627 case tok::kw_for: 628 case tok::kw_switch: 629 case tok::kw_try: 630 case tok::kw___try: 631 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 632 LBraceStack.back().Tok->setBlockKind(BK_Block); 633 break; 634 default: 635 break; 636 } 637 PrevTok = Tok; 638 Tok = NextTok; 639 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 640 641 // Assume other blocks for all unclosed opening braces. 642 for (const auto &Entry : LBraceStack) 643 if (Entry.Tok->is(BK_Unknown)) 644 Entry.Tok->setBlockKind(BK_Block); 645 646 FormatTok = Tokens->setPosition(StoredPosition); 647 } 648 649 template <class T> 650 static inline void hash_combine(std::size_t &seed, const T &v) { 651 std::hash<T> hasher; 652 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 653 } 654 655 size_t UnwrappedLineParser::computePPHash() const { 656 size_t h = 0; 657 for (const auto &i : PPStack) { 658 hash_combine(h, size_t(i.Kind)); 659 hash_combine(h, i.Line); 660 } 661 return h; 662 } 663 664 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 665 // is not null, subtracts its length (plus the preceding space) when computing 666 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 667 // running the token annotator on it so that we can restore them afterward. 668 bool UnwrappedLineParser::mightFitOnOneLine( 669 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 670 const auto ColumnLimit = Style.ColumnLimit; 671 if (ColumnLimit == 0) 672 return true; 673 674 auto &Tokens = ParsedLine.Tokens; 675 assert(!Tokens.empty()); 676 677 const auto *LastToken = Tokens.back().Tok; 678 assert(LastToken); 679 680 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 681 682 int Index = 0; 683 for (const auto &Token : Tokens) { 684 assert(Token.Tok); 685 auto &SavedToken = SavedTokens[Index++]; 686 SavedToken.Tok = new FormatToken; 687 SavedToken.Tok->copyFrom(*Token.Tok); 688 SavedToken.Children = std::move(Token.Children); 689 } 690 691 AnnotatedLine Line(ParsedLine); 692 assert(Line.Last == LastToken); 693 694 TokenAnnotator Annotator(Style, Keywords); 695 Annotator.annotate(Line); 696 Annotator.calculateFormattingInformation(Line); 697 698 auto Length = LastToken->TotalLength; 699 if (OpeningBrace) { 700 assert(OpeningBrace != Tokens.front().Tok); 701 if (auto Prev = OpeningBrace->Previous; 702 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 703 Length -= ColumnLimit; 704 } 705 Length -= OpeningBrace->TokenText.size() + 1; 706 } 707 708 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 709 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 710 Length -= FirstToken->TokenText.size() + 1; 711 } 712 713 Index = 0; 714 for (auto &Token : Tokens) { 715 const auto &SavedToken = SavedTokens[Index++]; 716 Token.Tok->copyFrom(*SavedToken.Tok); 717 Token.Children = std::move(SavedToken.Children); 718 delete SavedToken.Tok; 719 } 720 721 // If these change PPLevel needs to be used for get correct indentation. 722 assert(!Line.InMacroBody); 723 assert(!Line.InPPDirective); 724 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 725 } 726 727 FormatToken *UnwrappedLineParser::parseBlock( 728 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, 729 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces, 730 bool CanContainBracedList, TokenType NextLBracesType) { 731 auto HandleVerilogBlockLabel = [this]() { 732 // ":" name 733 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 734 nextToken(); 735 if (Keywords.isVerilogIdentifier(*FormatTok)) 736 nextToken(); 737 } 738 }; 739 740 // Whether this is a Verilog-specific block that has a special header like a 741 // module. 742 const bool VerilogHierarchy = 743 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 744 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 745 (Style.isVerilog() && 746 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 747 "'{' or macro block token expected"); 748 FormatToken *Tok = FormatTok; 749 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 750 auto Index = CurrentLines->size(); 751 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 752 FormatTok->setBlockKind(BK_Block); 753 754 // For Whitesmiths mode, jump to the next level prior to skipping over the 755 // braces. 756 if (!VerilogHierarchy && AddLevels > 0 && 757 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 758 ++Line->Level; 759 } 760 761 size_t PPStartHash = computePPHash(); 762 763 const unsigned InitialLevel = Line->Level; 764 if (VerilogHierarchy) { 765 AddLevels += parseVerilogHierarchyHeader(); 766 } else { 767 nextToken(/*LevelDifference=*/AddLevels); 768 HandleVerilogBlockLabel(); 769 } 770 771 // Bail out if there are too many levels. Otherwise, the stack might overflow. 772 if (Line->Level > 300) 773 return nullptr; 774 775 if (MacroBlock && FormatTok->is(tok::l_paren)) 776 parseParens(); 777 778 size_t NbPreprocessorDirectives = 779 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 780 addUnwrappedLine(); 781 size_t OpeningLineIndex = 782 CurrentLines->empty() 783 ? (UnwrappedLine::kInvalidIndex) 784 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 785 786 // Whitesmiths is weird here. The brace needs to be indented for the namespace 787 // block, but the block itself may not be indented depending on the style 788 // settings. This allows the format to back up one level in those cases. 789 if (UnindentWhitesmithsBraces) 790 --Line->Level; 791 792 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 793 MustBeDeclaration); 794 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 795 Line->Level += AddLevels; 796 797 FormatToken *IfLBrace = nullptr; 798 const bool SimpleBlock = 799 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace); 800 801 if (eof()) 802 return IfLBrace; 803 804 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 805 : !FormatTok->is(tok::r_brace)) { 806 Line->Level = InitialLevel; 807 FormatTok->setBlockKind(BK_Block); 808 return IfLBrace; 809 } 810 811 const bool IsFunctionRBrace = 812 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 813 814 auto RemoveBraces = [=]() mutable { 815 if (!SimpleBlock) 816 return false; 817 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 818 assert(FormatTok->is(tok::r_brace)); 819 const bool WrappedOpeningBrace = !Tok->Previous; 820 if (WrappedOpeningBrace && FollowedByComment) 821 return false; 822 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 823 if (KeepBraces && !HasRequiredIfBraces) 824 return false; 825 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 826 const FormatToken *Previous = Tokens->getPreviousToken(); 827 assert(Previous); 828 if (Previous->is(tok::r_brace) && !Previous->Optional) 829 return false; 830 } 831 assert(!CurrentLines->empty()); 832 auto &LastLine = CurrentLines->back(); 833 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 834 return false; 835 if (Tok->is(TT_ElseLBrace)) 836 return true; 837 if (WrappedOpeningBrace) { 838 assert(Index > 0); 839 --Index; // The line above the wrapped l_brace. 840 Tok = nullptr; 841 } 842 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 843 }; 844 if (RemoveBraces()) { 845 Tok->MatchingParen = FormatTok; 846 FormatTok->MatchingParen = Tok; 847 } 848 849 size_t PPEndHash = computePPHash(); 850 851 // Munch the closing brace. 852 nextToken(/*LevelDifference=*/-AddLevels); 853 854 // When this is a function block and there is an unnecessary semicolon 855 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 856 // it later). 857 if (Style.RemoveSemicolon && IsFunctionRBrace) { 858 while (FormatTok->is(tok::semi)) { 859 FormatTok->Optional = true; 860 nextToken(); 861 } 862 } 863 864 HandleVerilogBlockLabel(); 865 866 if (MacroBlock && FormatTok->is(tok::l_paren)) 867 parseParens(); 868 869 Line->Level = InitialLevel; 870 871 if (FormatTok->is(tok::kw_noexcept)) { 872 // A noexcept in a requires expression. 873 nextToken(); 874 } 875 876 if (FormatTok->is(tok::arrow)) { 877 // Following the } or noexcept we can find a trailing return type arrow 878 // as part of an implicit conversion constraint. 879 nextToken(); 880 parseStructuralElement(); 881 } 882 883 if (MunchSemi && FormatTok->is(tok::semi)) 884 nextToken(); 885 886 if (PPStartHash == PPEndHash) { 887 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 888 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 889 // Update the opening line to add the forward reference as well 890 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 891 CurrentLines->size() - 1; 892 } 893 } 894 895 return IfLBrace; 896 } 897 898 static bool isGoogScope(const UnwrappedLine &Line) { 899 // FIXME: Closure-library specific stuff should not be hard-coded but be 900 // configurable. 901 if (Line.Tokens.size() < 4) 902 return false; 903 auto I = Line.Tokens.begin(); 904 if (I->Tok->TokenText != "goog") 905 return false; 906 ++I; 907 if (I->Tok->isNot(tok::period)) 908 return false; 909 ++I; 910 if (I->Tok->TokenText != "scope") 911 return false; 912 ++I; 913 return I->Tok->is(tok::l_paren); 914 } 915 916 static bool isIIFE(const UnwrappedLine &Line, 917 const AdditionalKeywords &Keywords) { 918 // Look for the start of an immediately invoked anonymous function. 919 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 920 // This is commonly done in JavaScript to create a new, anonymous scope. 921 // Example: (function() { ... })() 922 if (Line.Tokens.size() < 3) 923 return false; 924 auto I = Line.Tokens.begin(); 925 if (I->Tok->isNot(tok::l_paren)) 926 return false; 927 ++I; 928 if (I->Tok->isNot(Keywords.kw_function)) 929 return false; 930 ++I; 931 return I->Tok->is(tok::l_paren); 932 } 933 934 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 935 const FormatToken &InitialToken) { 936 tok::TokenKind Kind = InitialToken.Tok.getKind(); 937 if (InitialToken.is(TT_NamespaceMacro)) 938 Kind = tok::kw_namespace; 939 940 switch (Kind) { 941 case tok::kw_namespace: 942 return Style.BraceWrapping.AfterNamespace; 943 case tok::kw_class: 944 return Style.BraceWrapping.AfterClass; 945 case tok::kw_union: 946 return Style.BraceWrapping.AfterUnion; 947 case tok::kw_struct: 948 return Style.BraceWrapping.AfterStruct; 949 case tok::kw_enum: 950 return Style.BraceWrapping.AfterEnum; 951 default: 952 return false; 953 } 954 } 955 956 void UnwrappedLineParser::parseChildBlock( 957 bool CanContainBracedList, clang::format::TokenType NextLBracesType) { 958 assert(FormatTok->is(tok::l_brace)); 959 FormatTok->setBlockKind(BK_Block); 960 const FormatToken *OpeningBrace = FormatTok; 961 nextToken(); 962 { 963 bool SkipIndent = (Style.isJavaScript() && 964 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 965 ScopedLineState LineState(*this); 966 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 967 /*MustBeDeclaration=*/false); 968 Line->Level += SkipIndent ? 0 : 1; 969 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType); 970 flushComments(isOnNewLine(*FormatTok)); 971 Line->Level -= SkipIndent ? 0 : 1; 972 } 973 nextToken(); 974 } 975 976 void UnwrappedLineParser::parsePPDirective() { 977 assert(FormatTok->is(tok::hash) && "'#' expected"); 978 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 979 980 nextToken(); 981 982 if (!FormatTok->Tok.getIdentifierInfo()) { 983 parsePPUnknown(); 984 return; 985 } 986 987 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 988 case tok::pp_define: 989 parsePPDefine(); 990 return; 991 case tok::pp_if: 992 parsePPIf(/*IfDef=*/false); 993 break; 994 case tok::pp_ifdef: 995 case tok::pp_ifndef: 996 parsePPIf(/*IfDef=*/true); 997 break; 998 case tok::pp_else: 999 case tok::pp_elifdef: 1000 case tok::pp_elifndef: 1001 case tok::pp_elif: 1002 parsePPElse(); 1003 break; 1004 case tok::pp_endif: 1005 parsePPEndIf(); 1006 break; 1007 case tok::pp_pragma: 1008 parsePPPragma(); 1009 break; 1010 default: 1011 parsePPUnknown(); 1012 break; 1013 } 1014 } 1015 1016 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1017 size_t Line = CurrentLines->size(); 1018 if (CurrentLines == &PreprocessorDirectives) 1019 Line += Lines.size(); 1020 1021 if (Unreachable || 1022 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1023 PPStack.push_back({PP_Unreachable, Line}); 1024 } else { 1025 PPStack.push_back({PP_Conditional, Line}); 1026 } 1027 } 1028 1029 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1030 ++PPBranchLevel; 1031 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1032 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1033 PPLevelBranchIndex.push_back(0); 1034 PPLevelBranchCount.push_back(0); 1035 } 1036 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1037 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1038 conditionalCompilationCondition(Unreachable || Skip); 1039 } 1040 1041 void UnwrappedLineParser::conditionalCompilationAlternative() { 1042 if (!PPStack.empty()) 1043 PPStack.pop_back(); 1044 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1045 if (!PPChainBranchIndex.empty()) 1046 ++PPChainBranchIndex.top(); 1047 conditionalCompilationCondition( 1048 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1049 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1050 } 1051 1052 void UnwrappedLineParser::conditionalCompilationEnd() { 1053 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1054 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1055 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1056 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1057 } 1058 // Guard against #endif's without #if. 1059 if (PPBranchLevel > -1) 1060 --PPBranchLevel; 1061 if (!PPChainBranchIndex.empty()) 1062 PPChainBranchIndex.pop(); 1063 if (!PPStack.empty()) 1064 PPStack.pop_back(); 1065 } 1066 1067 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1068 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1069 nextToken(); 1070 bool Unreachable = false; 1071 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1072 Unreachable = true; 1073 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1074 Unreachable = true; 1075 conditionalCompilationStart(Unreachable); 1076 FormatToken *IfCondition = FormatTok; 1077 // If there's a #ifndef on the first line, and the only lines before it are 1078 // comments, it could be an include guard. 1079 bool MaybeIncludeGuard = IfNDef; 1080 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1081 for (auto &Line : Lines) { 1082 if (!Line.Tokens.front().Tok->is(tok::comment)) { 1083 MaybeIncludeGuard = false; 1084 IncludeGuard = IG_Rejected; 1085 break; 1086 } 1087 } 1088 } 1089 --PPBranchLevel; 1090 parsePPUnknown(); 1091 ++PPBranchLevel; 1092 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1093 IncludeGuard = IG_IfNdefed; 1094 IncludeGuardToken = IfCondition; 1095 } 1096 } 1097 1098 void UnwrappedLineParser::parsePPElse() { 1099 // If a potential include guard has an #else, it's not an include guard. 1100 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1101 IncludeGuard = IG_Rejected; 1102 // Don't crash when there is an #else without an #if. 1103 assert(PPBranchLevel >= -1); 1104 if (PPBranchLevel == -1) 1105 conditionalCompilationStart(/*Unreachable=*/true); 1106 conditionalCompilationAlternative(); 1107 --PPBranchLevel; 1108 parsePPUnknown(); 1109 ++PPBranchLevel; 1110 } 1111 1112 void UnwrappedLineParser::parsePPEndIf() { 1113 conditionalCompilationEnd(); 1114 parsePPUnknown(); 1115 // If the #endif of a potential include guard is the last thing in the file, 1116 // then we found an include guard. 1117 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1118 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1119 IncludeGuard = IG_Found; 1120 } 1121 } 1122 1123 void UnwrappedLineParser::parsePPDefine() { 1124 nextToken(); 1125 1126 if (!FormatTok->Tok.getIdentifierInfo()) { 1127 IncludeGuard = IG_Rejected; 1128 IncludeGuardToken = nullptr; 1129 parsePPUnknown(); 1130 return; 1131 } 1132 1133 if (IncludeGuard == IG_IfNdefed && 1134 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1135 IncludeGuard = IG_Defined; 1136 IncludeGuardToken = nullptr; 1137 for (auto &Line : Lines) { 1138 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1139 IncludeGuard = IG_Rejected; 1140 break; 1141 } 1142 } 1143 } 1144 1145 // In the context of a define, even keywords should be treated as normal 1146 // identifiers. Setting the kind to identifier is not enough, because we need 1147 // to treat additional keywords like __except as well, which are already 1148 // identifiers. Setting the identifier info to null interferes with include 1149 // guard processing above, and changes preprocessing nesting. 1150 FormatTok->Tok.setKind(tok::identifier); 1151 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1152 nextToken(); 1153 if (FormatTok->Tok.getKind() == tok::l_paren && 1154 !FormatTok->hasWhitespaceBefore()) { 1155 parseParens(); 1156 } 1157 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1158 Line->Level += PPBranchLevel + 1; 1159 addUnwrappedLine(); 1160 ++Line->Level; 1161 1162 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1163 assert((int)Line->PPLevel >= 0); 1164 Line->InMacroBody = true; 1165 1166 // Errors during a preprocessor directive can only affect the layout of the 1167 // preprocessor directive, and thus we ignore them. An alternative approach 1168 // would be to use the same approach we use on the file level (no 1169 // re-indentation if there was a structural error) within the macro 1170 // definition. 1171 parseFile(); 1172 } 1173 1174 void UnwrappedLineParser::parsePPPragma() { 1175 Line->InPragmaDirective = true; 1176 parsePPUnknown(); 1177 } 1178 1179 void UnwrappedLineParser::parsePPUnknown() { 1180 do { 1181 nextToken(); 1182 } while (!eof()); 1183 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1184 Line->Level += PPBranchLevel + 1; 1185 addUnwrappedLine(); 1186 } 1187 1188 // Here we exclude certain tokens that are not usually the first token in an 1189 // unwrapped line. This is used in attempt to distinguish macro calls without 1190 // trailing semicolons from other constructs split to several lines. 1191 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1192 // Semicolon can be a null-statement, l_square can be a start of a macro or 1193 // a C++11 attribute, but this doesn't seem to be common. 1194 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1195 Tok.isNot(TT_AttributeSquare) && 1196 // Tokens that can only be used as binary operators and a part of 1197 // overloaded operator names. 1198 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1199 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1200 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1201 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1202 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1203 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1204 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1205 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1206 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1207 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1208 Tok.isNot(tok::lesslessequal) && 1209 // Colon is used in labels, base class lists, initializer lists, 1210 // range-based for loops, ternary operator, but should never be the 1211 // first token in an unwrapped line. 1212 Tok.isNot(tok::colon) && 1213 // 'noexcept' is a trailing annotation. 1214 Tok.isNot(tok::kw_noexcept); 1215 } 1216 1217 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1218 const FormatToken *FormatTok) { 1219 // FIXME: This returns true for C/C++ keywords like 'struct'. 1220 return FormatTok->is(tok::identifier) && 1221 (!FormatTok->Tok.getIdentifierInfo() || 1222 !FormatTok->isOneOf( 1223 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1224 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1225 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1226 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1227 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1228 Keywords.kw_instanceof, Keywords.kw_interface, 1229 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1230 } 1231 1232 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1233 const FormatToken *FormatTok) { 1234 return FormatTok->Tok.isLiteral() || 1235 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1236 mustBeJSIdent(Keywords, FormatTok); 1237 } 1238 1239 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1240 // when encountered after a value (see mustBeJSIdentOrValue). 1241 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1242 const FormatToken *FormatTok) { 1243 return FormatTok->isOneOf( 1244 tok::kw_return, Keywords.kw_yield, 1245 // conditionals 1246 tok::kw_if, tok::kw_else, 1247 // loops 1248 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1249 // switch/case 1250 tok::kw_switch, tok::kw_case, 1251 // exceptions 1252 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1253 // declaration 1254 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1255 Keywords.kw_async, Keywords.kw_function, 1256 // import/export 1257 Keywords.kw_import, tok::kw_export); 1258 } 1259 1260 // Checks whether a token is a type in K&R C (aka C78). 1261 static bool isC78Type(const FormatToken &Tok) { 1262 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1263 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1264 tok::identifier); 1265 } 1266 1267 // This function checks whether a token starts the first parameter declaration 1268 // in a K&R C (aka C78) function definition, e.g.: 1269 // int f(a, b) 1270 // short a, b; 1271 // { 1272 // return a + b; 1273 // } 1274 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1275 const FormatToken *FuncName) { 1276 assert(Tok); 1277 assert(Next); 1278 assert(FuncName); 1279 1280 if (FuncName->isNot(tok::identifier)) 1281 return false; 1282 1283 const FormatToken *Prev = FuncName->Previous; 1284 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1285 return false; 1286 1287 if (!isC78Type(*Tok) && 1288 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1289 return false; 1290 } 1291 1292 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1293 return false; 1294 1295 Tok = Tok->Previous; 1296 if (!Tok || Tok->isNot(tok::r_paren)) 1297 return false; 1298 1299 Tok = Tok->Previous; 1300 if (!Tok || Tok->isNot(tok::identifier)) 1301 return false; 1302 1303 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1304 } 1305 1306 bool UnwrappedLineParser::parseModuleImport() { 1307 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1308 1309 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1310 !Token->Tok.getIdentifierInfo() && 1311 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1312 return false; 1313 } 1314 1315 nextToken(); 1316 while (!eof()) { 1317 if (FormatTok->is(tok::colon)) { 1318 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1319 } 1320 // Handle import <foo/bar.h> as we would an include statement. 1321 else if (FormatTok->is(tok::less)) { 1322 nextToken(); 1323 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1324 // Mark tokens up to the trailing line comments as implicit string 1325 // literals. 1326 if (FormatTok->isNot(tok::comment) && 1327 !FormatTok->TokenText.startswith("//")) { 1328 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1329 } 1330 nextToken(); 1331 } 1332 } 1333 if (FormatTok->is(tok::semi)) { 1334 nextToken(); 1335 break; 1336 } 1337 nextToken(); 1338 } 1339 1340 addUnwrappedLine(); 1341 return true; 1342 } 1343 1344 // readTokenWithJavaScriptASI reads the next token and terminates the current 1345 // line if JavaScript Automatic Semicolon Insertion must 1346 // happen between the current token and the next token. 1347 // 1348 // This method is conservative - it cannot cover all edge cases of JavaScript, 1349 // but only aims to correctly handle certain well known cases. It *must not* 1350 // return true in speculative cases. 1351 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1352 FormatToken *Previous = FormatTok; 1353 readToken(); 1354 FormatToken *Next = FormatTok; 1355 1356 bool IsOnSameLine = 1357 CommentsBeforeNextToken.empty() 1358 ? Next->NewlinesBefore == 0 1359 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1360 if (IsOnSameLine) 1361 return; 1362 1363 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1364 bool PreviousStartsTemplateExpr = 1365 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1366 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1367 // If the line contains an '@' sign, the previous token might be an 1368 // annotation, which can precede another identifier/value. 1369 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1370 return LineNode.Tok->is(tok::at); 1371 }); 1372 if (HasAt) 1373 return; 1374 } 1375 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1376 return addUnwrappedLine(); 1377 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1378 bool NextEndsTemplateExpr = 1379 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1380 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1381 (PreviousMustBeValue || 1382 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1383 tok::minusminus))) { 1384 return addUnwrappedLine(); 1385 } 1386 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1387 isJSDeclOrStmt(Keywords, Next)) { 1388 return addUnwrappedLine(); 1389 } 1390 } 1391 1392 void UnwrappedLineParser::parseStructuralElement( 1393 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind, 1394 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1395 if (Style.Language == FormatStyle::LK_TableGen && 1396 FormatTok->is(tok::pp_include)) { 1397 nextToken(); 1398 if (FormatTok->is(tok::string_literal)) 1399 nextToken(); 1400 addUnwrappedLine(); 1401 return; 1402 } 1403 1404 if (Style.isVerilog()) { 1405 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1406 parseForOrWhileLoop(/*HasParens=*/false); 1407 return; 1408 } 1409 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1410 parseForOrWhileLoop(); 1411 return; 1412 } 1413 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1414 Keywords.kw_assume, Keywords.kw_cover)) { 1415 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1416 return; 1417 } 1418 1419 // Skip things that can exist before keywords like 'if' and 'case'. 1420 while (true) { 1421 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1422 Keywords.kw_unique0)) { 1423 nextToken(); 1424 } else if (FormatTok->is(tok::l_paren) && 1425 Tokens->peekNextToken()->is(tok::star)) { 1426 parseParens(); 1427 } else { 1428 break; 1429 } 1430 } 1431 } 1432 1433 // Tokens that only make sense at the beginning of a line. 1434 switch (FormatTok->Tok.getKind()) { 1435 case tok::kw_asm: 1436 nextToken(); 1437 if (FormatTok->is(tok::l_brace)) { 1438 FormatTok->setFinalizedType(TT_InlineASMBrace); 1439 nextToken(); 1440 while (FormatTok && !eof()) { 1441 if (FormatTok->is(tok::r_brace)) { 1442 FormatTok->setFinalizedType(TT_InlineASMBrace); 1443 nextToken(); 1444 addUnwrappedLine(); 1445 break; 1446 } 1447 FormatTok->Finalized = true; 1448 nextToken(); 1449 } 1450 } 1451 break; 1452 case tok::kw_namespace: 1453 parseNamespace(); 1454 return; 1455 case tok::kw_public: 1456 case tok::kw_protected: 1457 case tok::kw_private: 1458 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1459 Style.isCSharp()) { 1460 nextToken(); 1461 } else { 1462 parseAccessSpecifier(); 1463 } 1464 return; 1465 case tok::kw_if: { 1466 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1467 // field/method declaration. 1468 break; 1469 } 1470 FormatToken *Tok = parseIfThenElse(IfKind); 1471 if (IfLeftBrace) 1472 *IfLeftBrace = Tok; 1473 return; 1474 } 1475 case tok::kw_for: 1476 case tok::kw_while: 1477 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1478 // field/method declaration. 1479 break; 1480 } 1481 parseForOrWhileLoop(); 1482 return; 1483 case tok::kw_do: 1484 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1485 // field/method declaration. 1486 break; 1487 } 1488 parseDoWhile(); 1489 if (HasDoWhile) 1490 *HasDoWhile = true; 1491 return; 1492 case tok::kw_switch: 1493 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1494 // 'switch: string' field declaration. 1495 break; 1496 } 1497 parseSwitch(); 1498 return; 1499 case tok::kw_default: 1500 // In Verilog default along with other labels are handled in the next loop. 1501 if (Style.isVerilog()) 1502 break; 1503 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1504 // 'default: string' field declaration. 1505 break; 1506 } 1507 nextToken(); 1508 if (FormatTok->is(tok::colon)) { 1509 FormatTok->setFinalizedType(TT_CaseLabelColon); 1510 parseLabel(); 1511 return; 1512 } 1513 // e.g. "default void f() {}" in a Java interface. 1514 break; 1515 case tok::kw_case: 1516 // Proto: there are no switch/case statements. 1517 if (Style.isProto()) { 1518 nextToken(); 1519 return; 1520 } 1521 if (Style.isVerilog()) { 1522 parseBlock(); 1523 addUnwrappedLine(); 1524 return; 1525 } 1526 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1527 // 'case: string' field declaration. 1528 nextToken(); 1529 break; 1530 } 1531 parseCaseLabel(); 1532 return; 1533 case tok::kw_try: 1534 case tok::kw___try: 1535 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1536 // field/method declaration. 1537 break; 1538 } 1539 parseTryCatch(); 1540 return; 1541 case tok::kw_extern: 1542 nextToken(); 1543 if (Style.isVerilog()) { 1544 // In Verilog and extern module declaration looks like a start of module. 1545 // But there is no body and endmodule. So we handle it separately. 1546 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1547 parseVerilogHierarchyHeader(); 1548 return; 1549 } 1550 } else if (FormatTok->is(tok::string_literal)) { 1551 nextToken(); 1552 if (FormatTok->is(tok::l_brace)) { 1553 if (Style.BraceWrapping.AfterExternBlock) 1554 addUnwrappedLine(); 1555 // Either we indent or for backwards compatibility we follow the 1556 // AfterExternBlock style. 1557 unsigned AddLevels = 1558 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1559 (Style.BraceWrapping.AfterExternBlock && 1560 Style.IndentExternBlock == 1561 FormatStyle::IEBS_AfterExternBlock) 1562 ? 1u 1563 : 0u; 1564 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1565 addUnwrappedLine(); 1566 return; 1567 } 1568 } 1569 break; 1570 case tok::kw_export: 1571 if (Style.isJavaScript()) { 1572 parseJavaScriptEs6ImportExport(); 1573 return; 1574 } 1575 if (Style.isCpp()) { 1576 nextToken(); 1577 if (FormatTok->is(tok::kw_namespace)) { 1578 parseNamespace(); 1579 return; 1580 } 1581 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1582 return; 1583 } 1584 break; 1585 case tok::kw_inline: 1586 nextToken(); 1587 if (FormatTok->is(tok::kw_namespace)) { 1588 parseNamespace(); 1589 return; 1590 } 1591 break; 1592 case tok::identifier: 1593 if (FormatTok->is(TT_ForEachMacro)) { 1594 parseForOrWhileLoop(); 1595 return; 1596 } 1597 if (FormatTok->is(TT_MacroBlockBegin)) { 1598 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1599 /*MunchSemi=*/false); 1600 return; 1601 } 1602 if (FormatTok->is(Keywords.kw_import)) { 1603 if (Style.isJavaScript()) { 1604 parseJavaScriptEs6ImportExport(); 1605 return; 1606 } 1607 if (Style.Language == FormatStyle::LK_Proto) { 1608 nextToken(); 1609 if (FormatTok->is(tok::kw_public)) 1610 nextToken(); 1611 if (!FormatTok->is(tok::string_literal)) 1612 return; 1613 nextToken(); 1614 if (FormatTok->is(tok::semi)) 1615 nextToken(); 1616 addUnwrappedLine(); 1617 return; 1618 } 1619 if (Style.isCpp() && parseModuleImport()) 1620 return; 1621 } 1622 if (Style.isCpp() && 1623 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1624 Keywords.kw_slots, Keywords.kw_qslots)) { 1625 nextToken(); 1626 if (FormatTok->is(tok::colon)) { 1627 nextToken(); 1628 addUnwrappedLine(); 1629 return; 1630 } 1631 } 1632 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1633 parseStatementMacro(); 1634 return; 1635 } 1636 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1637 parseNamespace(); 1638 return; 1639 } 1640 // In all other cases, parse the declaration. 1641 break; 1642 default: 1643 break; 1644 } 1645 do { 1646 const FormatToken *Previous = FormatTok->Previous; 1647 switch (FormatTok->Tok.getKind()) { 1648 case tok::at: 1649 nextToken(); 1650 if (FormatTok->is(tok::l_brace)) { 1651 nextToken(); 1652 parseBracedList(); 1653 break; 1654 } else if (Style.Language == FormatStyle::LK_Java && 1655 FormatTok->is(Keywords.kw_interface)) { 1656 nextToken(); 1657 break; 1658 } 1659 switch (FormatTok->Tok.getObjCKeywordID()) { 1660 case tok::objc_public: 1661 case tok::objc_protected: 1662 case tok::objc_package: 1663 case tok::objc_private: 1664 return parseAccessSpecifier(); 1665 case tok::objc_interface: 1666 case tok::objc_implementation: 1667 return parseObjCInterfaceOrImplementation(); 1668 case tok::objc_protocol: 1669 if (parseObjCProtocol()) 1670 return; 1671 break; 1672 case tok::objc_end: 1673 return; // Handled by the caller. 1674 case tok::objc_optional: 1675 case tok::objc_required: 1676 nextToken(); 1677 addUnwrappedLine(); 1678 return; 1679 case tok::objc_autoreleasepool: 1680 nextToken(); 1681 if (FormatTok->is(tok::l_brace)) { 1682 if (Style.BraceWrapping.AfterControlStatement == 1683 FormatStyle::BWACS_Always) { 1684 addUnwrappedLine(); 1685 } 1686 parseBlock(); 1687 } 1688 addUnwrappedLine(); 1689 return; 1690 case tok::objc_synchronized: 1691 nextToken(); 1692 if (FormatTok->is(tok::l_paren)) { 1693 // Skip synchronization object 1694 parseParens(); 1695 } 1696 if (FormatTok->is(tok::l_brace)) { 1697 if (Style.BraceWrapping.AfterControlStatement == 1698 FormatStyle::BWACS_Always) { 1699 addUnwrappedLine(); 1700 } 1701 parseBlock(); 1702 } 1703 addUnwrappedLine(); 1704 return; 1705 case tok::objc_try: 1706 // This branch isn't strictly necessary (the kw_try case below would 1707 // do this too after the tok::at is parsed above). But be explicit. 1708 parseTryCatch(); 1709 return; 1710 default: 1711 break; 1712 } 1713 break; 1714 case tok::kw_requires: { 1715 if (Style.isCpp()) { 1716 bool ParsedClause = parseRequires(); 1717 if (ParsedClause) 1718 return; 1719 } else { 1720 nextToken(); 1721 } 1722 break; 1723 } 1724 case tok::kw_enum: 1725 // Ignore if this is part of "template <enum ...". 1726 if (Previous && Previous->is(tok::less)) { 1727 nextToken(); 1728 break; 1729 } 1730 1731 // parseEnum falls through and does not yet add an unwrapped line as an 1732 // enum definition can start a structural element. 1733 if (!parseEnum()) 1734 break; 1735 // This only applies to C++ and Verilog. 1736 if (!Style.isCpp() && !Style.isVerilog()) { 1737 addUnwrappedLine(); 1738 return; 1739 } 1740 break; 1741 case tok::kw_typedef: 1742 nextToken(); 1743 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1744 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1745 Keywords.kw_CF_CLOSED_ENUM, 1746 Keywords.kw_NS_CLOSED_ENUM)) { 1747 parseEnum(); 1748 } 1749 break; 1750 case tok::kw_class: 1751 if (Style.isVerilog()) { 1752 parseBlock(); 1753 addUnwrappedLine(); 1754 return; 1755 } 1756 [[fallthrough]]; 1757 case tok::kw_struct: 1758 case tok::kw_union: 1759 if (parseStructLike()) 1760 return; 1761 break; 1762 case tok::kw_decltype: 1763 nextToken(); 1764 if (FormatTok->is(tok::l_paren)) { 1765 parseParens(); 1766 assert(FormatTok->Previous); 1767 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1768 tok::l_paren)) { 1769 Line->SeenDecltypeAuto = true; 1770 } 1771 } 1772 break; 1773 case tok::period: 1774 nextToken(); 1775 // In Java, classes have an implicit static member "class". 1776 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1777 FormatTok->is(tok::kw_class)) { 1778 nextToken(); 1779 } 1780 if (Style.isJavaScript() && FormatTok && 1781 FormatTok->Tok.getIdentifierInfo()) { 1782 // JavaScript only has pseudo keywords, all keywords are allowed to 1783 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1784 nextToken(); 1785 } 1786 break; 1787 case tok::semi: 1788 nextToken(); 1789 addUnwrappedLine(); 1790 return; 1791 case tok::r_brace: 1792 addUnwrappedLine(); 1793 return; 1794 case tok::l_paren: { 1795 parseParens(); 1796 // Break the unwrapped line if a K&R C function definition has a parameter 1797 // declaration. 1798 if (!IsTopLevel || !Style.isCpp() || !Previous || eof()) 1799 break; 1800 if (isC78ParameterDecl(FormatTok, 1801 Tokens->peekNextToken(/*SkipComment=*/true), 1802 Previous)) { 1803 addUnwrappedLine(); 1804 return; 1805 } 1806 break; 1807 } 1808 case tok::kw_operator: 1809 nextToken(); 1810 if (FormatTok->isBinaryOperator()) 1811 nextToken(); 1812 break; 1813 case tok::caret: 1814 nextToken(); 1815 // Block return type. 1816 if (FormatTok->Tok.isAnyIdentifier() || 1817 FormatTok->isSimpleTypeSpecifier()) { 1818 nextToken(); 1819 // Return types: pointers are ok too. 1820 while (FormatTok->is(tok::star)) 1821 nextToken(); 1822 } 1823 // Block argument list. 1824 if (FormatTok->is(tok::l_paren)) 1825 parseParens(); 1826 // Block body. 1827 if (FormatTok->is(tok::l_brace)) 1828 parseChildBlock(); 1829 break; 1830 case tok::l_brace: 1831 if (NextLBracesType != TT_Unknown) 1832 FormatTok->setFinalizedType(NextLBracesType); 1833 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1834 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1835 // A block outside of parentheses must be the last part of a 1836 // structural element. 1837 // FIXME: Figure out cases where this is not true, and add projections 1838 // for them (the one we know is missing are lambdas). 1839 if (Style.Language == FormatStyle::LK_Java && 1840 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1841 // If necessary, we could set the type to something different than 1842 // TT_FunctionLBrace. 1843 if (Style.BraceWrapping.AfterControlStatement == 1844 FormatStyle::BWACS_Always) { 1845 addUnwrappedLine(); 1846 } 1847 } else if (Style.BraceWrapping.AfterFunction) { 1848 addUnwrappedLine(); 1849 } 1850 FormatTok->setFinalizedType(TT_FunctionLBrace); 1851 parseBlock(); 1852 IsDecltypeAutoFunction = false; 1853 addUnwrappedLine(); 1854 return; 1855 } 1856 // Otherwise this was a braced init list, and the structural 1857 // element continues. 1858 break; 1859 case tok::kw_try: 1860 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1861 // field/method declaration. 1862 nextToken(); 1863 break; 1864 } 1865 // We arrive here when parsing function-try blocks. 1866 if (Style.BraceWrapping.AfterFunction) 1867 addUnwrappedLine(); 1868 parseTryCatch(); 1869 return; 1870 case tok::identifier: { 1871 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1872 Line->MustBeDeclaration) { 1873 addUnwrappedLine(); 1874 parseCSharpGenericTypeConstraint(); 1875 break; 1876 } 1877 if (FormatTok->is(TT_MacroBlockEnd)) { 1878 addUnwrappedLine(); 1879 return; 1880 } 1881 1882 // Function declarations (as opposed to function expressions) are parsed 1883 // on their own unwrapped line by continuing this loop. Function 1884 // expressions (functions that are not on their own line) must not create 1885 // a new unwrapped line, so they are special cased below. 1886 size_t TokenCount = Line->Tokens.size(); 1887 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1888 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1889 Keywords.kw_async)))) { 1890 tryToParseJSFunction(); 1891 break; 1892 } 1893 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1894 FormatTok->is(Keywords.kw_interface)) { 1895 if (Style.isJavaScript()) { 1896 // In JavaScript/TypeScript, "interface" can be used as a standalone 1897 // identifier, e.g. in `var interface = 1;`. If "interface" is 1898 // followed by another identifier, it is very like to be an actual 1899 // interface declaration. 1900 unsigned StoredPosition = Tokens->getPosition(); 1901 FormatToken *Next = Tokens->getNextToken(); 1902 FormatTok = Tokens->setPosition(StoredPosition); 1903 if (!mustBeJSIdent(Keywords, Next)) { 1904 nextToken(); 1905 break; 1906 } 1907 } 1908 parseRecord(); 1909 addUnwrappedLine(); 1910 return; 1911 } 1912 1913 if (Style.isVerilog()) { 1914 if (FormatTok->is(Keywords.kw_table)) { 1915 parseVerilogTable(); 1916 return; 1917 } 1918 if (Keywords.isVerilogBegin(*FormatTok) || 1919 Keywords.isVerilogHierarchy(*FormatTok)) { 1920 parseBlock(); 1921 addUnwrappedLine(); 1922 return; 1923 } 1924 } 1925 1926 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) { 1927 if (parseStructLike()) 1928 return; 1929 break; 1930 } 1931 1932 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1933 parseStatementMacro(); 1934 return; 1935 } 1936 1937 // See if the following token should start a new unwrapped line. 1938 StringRef Text = FormatTok->TokenText; 1939 1940 FormatToken *PreviousToken = FormatTok; 1941 nextToken(); 1942 1943 // JS doesn't have macros, and within classes colons indicate fields, not 1944 // labels. 1945 if (Style.isJavaScript()) 1946 break; 1947 1948 auto OneTokenSoFar = [&]() { 1949 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 1950 while (I != E && I->Tok->is(tok::comment)) 1951 ++I; 1952 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash)) 1953 ++I; 1954 return I != E && (++I == E); 1955 }; 1956 if (OneTokenSoFar()) { 1957 // In Verilog labels can be any expression, so we don't do them here. 1958 if (!Style.isVerilog() && FormatTok->is(tok::colon) && 1959 !Line->MustBeDeclaration) { 1960 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1961 FormatTok->setFinalizedType(TT_GotoLabelColon); 1962 parseLabel(!Style.IndentGotoLabels); 1963 if (HasLabel) 1964 *HasLabel = true; 1965 return; 1966 } 1967 // Recognize function-like macro usages without trailing semicolon as 1968 // well as free-standing macros like Q_OBJECT. 1969 bool FunctionLike = FormatTok->is(tok::l_paren); 1970 if (FunctionLike) 1971 parseParens(); 1972 1973 bool FollowedByNewline = 1974 CommentsBeforeNextToken.empty() 1975 ? FormatTok->NewlinesBefore > 0 1976 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1977 1978 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1979 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1980 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 1981 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 1982 addUnwrappedLine(); 1983 return; 1984 } 1985 } 1986 break; 1987 } 1988 case tok::equal: 1989 if ((Style.isJavaScript() || Style.isCSharp()) && 1990 FormatTok->is(TT_FatArrow)) { 1991 tryToParseChildBlock(); 1992 break; 1993 } 1994 1995 nextToken(); 1996 if (FormatTok->is(tok::l_brace)) { 1997 // Block kind should probably be set to BK_BracedInit for any language. 1998 // C# needs this change to ensure that array initialisers and object 1999 // initialisers are indented the same way. 2000 if (Style.isCSharp()) 2001 FormatTok->setBlockKind(BK_BracedInit); 2002 nextToken(); 2003 parseBracedList(); 2004 } else if (Style.Language == FormatStyle::LK_Proto && 2005 FormatTok->is(tok::less)) { 2006 nextToken(); 2007 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2008 /*ClosingBraceKind=*/tok::greater); 2009 } 2010 break; 2011 case tok::l_square: 2012 parseSquare(); 2013 break; 2014 case tok::kw_new: 2015 parseNew(); 2016 break; 2017 case tok::kw_case: 2018 // Proto: there are no switch/case statements. 2019 if (Style.isProto()) { 2020 nextToken(); 2021 return; 2022 } 2023 // In Verilog switch is called case. 2024 if (Style.isVerilog()) { 2025 parseBlock(); 2026 addUnwrappedLine(); 2027 return; 2028 } 2029 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2030 // 'case: string' field declaration. 2031 nextToken(); 2032 break; 2033 } 2034 parseCaseLabel(); 2035 break; 2036 case tok::kw_default: 2037 nextToken(); 2038 if (Style.isVerilog()) { 2039 if (FormatTok->is(tok::colon)) { 2040 // The label will be handled in the next iteration. 2041 break; 2042 } 2043 if (FormatTok->is(Keywords.kw_clocking)) { 2044 // A default clocking block. 2045 parseBlock(); 2046 addUnwrappedLine(); 2047 return; 2048 } 2049 parseVerilogCaseLabel(); 2050 return; 2051 } 2052 break; 2053 case tok::colon: 2054 nextToken(); 2055 if (Style.isVerilog()) { 2056 parseVerilogCaseLabel(); 2057 return; 2058 } 2059 break; 2060 default: 2061 nextToken(); 2062 break; 2063 } 2064 } while (!eof()); 2065 } 2066 2067 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2068 assert(FormatTok->is(tok::l_brace)); 2069 if (!Style.isCSharp()) 2070 return false; 2071 // See if it's a property accessor. 2072 if (FormatTok->Previous->isNot(tok::identifier)) 2073 return false; 2074 2075 // See if we are inside a property accessor. 2076 // 2077 // Record the current tokenPosition so that we can advance and 2078 // reset the current token. `Next` is not set yet so we need 2079 // another way to advance along the token stream. 2080 unsigned int StoredPosition = Tokens->getPosition(); 2081 FormatToken *Tok = Tokens->getNextToken(); 2082 2083 // A trivial property accessor is of the form: 2084 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2085 // Track these as they do not require line breaks to be introduced. 2086 bool HasSpecialAccessor = false; 2087 bool IsTrivialPropertyAccessor = true; 2088 while (!eof()) { 2089 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 2090 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 2091 Keywords.kw_init, Keywords.kw_set)) { 2092 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2093 HasSpecialAccessor = true; 2094 Tok = Tokens->getNextToken(); 2095 continue; 2096 } 2097 if (Tok->isNot(tok::r_brace)) 2098 IsTrivialPropertyAccessor = false; 2099 break; 2100 } 2101 2102 if (!HasSpecialAccessor) { 2103 Tokens->setPosition(StoredPosition); 2104 return false; 2105 } 2106 2107 // Try to parse the property accessor: 2108 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2109 Tokens->setPosition(StoredPosition); 2110 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2111 addUnwrappedLine(); 2112 nextToken(); 2113 do { 2114 switch (FormatTok->Tok.getKind()) { 2115 case tok::r_brace: 2116 nextToken(); 2117 if (FormatTok->is(tok::equal)) { 2118 while (!eof() && FormatTok->isNot(tok::semi)) 2119 nextToken(); 2120 nextToken(); 2121 } 2122 addUnwrappedLine(); 2123 return true; 2124 case tok::l_brace: 2125 ++Line->Level; 2126 parseBlock(/*MustBeDeclaration=*/true); 2127 addUnwrappedLine(); 2128 --Line->Level; 2129 break; 2130 case tok::equal: 2131 if (FormatTok->is(TT_FatArrow)) { 2132 ++Line->Level; 2133 do { 2134 nextToken(); 2135 } while (!eof() && FormatTok->isNot(tok::semi)); 2136 nextToken(); 2137 addUnwrappedLine(); 2138 --Line->Level; 2139 break; 2140 } 2141 nextToken(); 2142 break; 2143 default: 2144 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2145 Keywords.kw_set) && 2146 !IsTrivialPropertyAccessor) { 2147 // Non-trivial get/set needs to be on its own line. 2148 addUnwrappedLine(); 2149 } 2150 nextToken(); 2151 } 2152 } while (!eof()); 2153 2154 // Unreachable for well-formed code (paired '{' and '}'). 2155 return true; 2156 } 2157 2158 bool UnwrappedLineParser::tryToParseLambda() { 2159 assert(FormatTok->is(tok::l_square)); 2160 if (!Style.isCpp()) { 2161 nextToken(); 2162 return false; 2163 } 2164 FormatToken &LSquare = *FormatTok; 2165 if (!tryToParseLambdaIntroducer()) 2166 return false; 2167 2168 bool SeenArrow = false; 2169 bool InTemplateParameterList = false; 2170 2171 while (FormatTok->isNot(tok::l_brace)) { 2172 if (FormatTok->isSimpleTypeSpecifier()) { 2173 nextToken(); 2174 continue; 2175 } 2176 switch (FormatTok->Tok.getKind()) { 2177 case tok::l_brace: 2178 break; 2179 case tok::l_paren: 2180 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2181 break; 2182 case tok::l_square: 2183 parseSquare(); 2184 break; 2185 case tok::less: 2186 assert(FormatTok->Previous); 2187 if (FormatTok->Previous->is(tok::r_square)) 2188 InTemplateParameterList = true; 2189 nextToken(); 2190 break; 2191 case tok::kw_auto: 2192 case tok::kw_class: 2193 case tok::kw_template: 2194 case tok::kw_typename: 2195 case tok::amp: 2196 case tok::star: 2197 case tok::kw_const: 2198 case tok::kw_constexpr: 2199 case tok::kw_consteval: 2200 case tok::comma: 2201 case tok::greater: 2202 case tok::identifier: 2203 case tok::numeric_constant: 2204 case tok::coloncolon: 2205 case tok::kw_mutable: 2206 case tok::kw_noexcept: 2207 case tok::kw_static: 2208 nextToken(); 2209 break; 2210 // Specialization of a template with an integer parameter can contain 2211 // arithmetic, logical, comparison and ternary operators. 2212 // 2213 // FIXME: This also accepts sequences of operators that are not in the scope 2214 // of a template argument list. 2215 // 2216 // In a C++ lambda a template type can only occur after an arrow. We use 2217 // this as an heuristic to distinguish between Objective-C expressions 2218 // followed by an `a->b` expression, such as: 2219 // ([obj func:arg] + a->b) 2220 // Otherwise the code below would parse as a lambda. 2221 // 2222 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 2223 // explicit template lists: []<bool b = true && false>(U &&u){} 2224 case tok::plus: 2225 case tok::minus: 2226 case tok::exclaim: 2227 case tok::tilde: 2228 case tok::slash: 2229 case tok::percent: 2230 case tok::lessless: 2231 case tok::pipe: 2232 case tok::pipepipe: 2233 case tok::ampamp: 2234 case tok::caret: 2235 case tok::equalequal: 2236 case tok::exclaimequal: 2237 case tok::greaterequal: 2238 case tok::lessequal: 2239 case tok::question: 2240 case tok::colon: 2241 case tok::ellipsis: 2242 case tok::kw_true: 2243 case tok::kw_false: 2244 if (SeenArrow || InTemplateParameterList) { 2245 nextToken(); 2246 break; 2247 } 2248 return true; 2249 case tok::arrow: 2250 // This might or might not actually be a lambda arrow (this could be an 2251 // ObjC method invocation followed by a dereferencing arrow). We might 2252 // reset this back to TT_Unknown in TokenAnnotator. 2253 FormatTok->setFinalizedType(TT_LambdaArrow); 2254 SeenArrow = true; 2255 nextToken(); 2256 break; 2257 case tok::kw_requires: { 2258 auto *RequiresToken = FormatTok; 2259 nextToken(); 2260 parseRequiresClause(RequiresToken); 2261 break; 2262 } 2263 default: 2264 return true; 2265 } 2266 } 2267 2268 FormatTok->setFinalizedType(TT_LambdaLBrace); 2269 LSquare.setFinalizedType(TT_LambdaLSquare); 2270 2271 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2272 parseChildBlock(); 2273 assert(!NestedLambdas.empty()); 2274 NestedLambdas.pop_back(); 2275 2276 return true; 2277 } 2278 2279 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2280 const FormatToken *Previous = FormatTok->Previous; 2281 const FormatToken *LeftSquare = FormatTok; 2282 nextToken(); 2283 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2284 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2285 tok::kw_co_yield, tok::kw_co_return)) || 2286 Previous->closesScope())) || 2287 LeftSquare->isCppStructuredBinding(Style)) { 2288 return false; 2289 } 2290 if (FormatTok->is(tok::l_square)) 2291 return false; 2292 if (FormatTok->is(tok::r_square)) { 2293 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2294 if (Next->is(tok::greater)) 2295 return false; 2296 } 2297 parseSquare(/*LambdaIntroducer=*/true); 2298 return true; 2299 } 2300 2301 void UnwrappedLineParser::tryToParseJSFunction() { 2302 assert(FormatTok->is(Keywords.kw_function) || 2303 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 2304 if (FormatTok->is(Keywords.kw_async)) 2305 nextToken(); 2306 // Consume "function". 2307 nextToken(); 2308 2309 // Consume * (generator function). Treat it like C++'s overloaded operators. 2310 if (FormatTok->is(tok::star)) { 2311 FormatTok->setFinalizedType(TT_OverloadedOperator); 2312 nextToken(); 2313 } 2314 2315 // Consume function name. 2316 if (FormatTok->is(tok::identifier)) 2317 nextToken(); 2318 2319 if (FormatTok->isNot(tok::l_paren)) 2320 return; 2321 2322 // Parse formal parameter list. 2323 parseParens(); 2324 2325 if (FormatTok->is(tok::colon)) { 2326 // Parse a type definition. 2327 nextToken(); 2328 2329 // Eat the type declaration. For braced inline object types, balance braces, 2330 // otherwise just parse until finding an l_brace for the function body. 2331 if (FormatTok->is(tok::l_brace)) 2332 tryToParseBracedList(); 2333 else 2334 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2335 nextToken(); 2336 } 2337 2338 if (FormatTok->is(tok::semi)) 2339 return; 2340 2341 parseChildBlock(); 2342 } 2343 2344 bool UnwrappedLineParser::tryToParseBracedList() { 2345 if (FormatTok->is(BK_Unknown)) 2346 calculateBraceTypes(); 2347 assert(FormatTok->isNot(BK_Unknown)); 2348 if (FormatTok->is(BK_Block)) 2349 return false; 2350 nextToken(); 2351 parseBracedList(); 2352 return true; 2353 } 2354 2355 bool UnwrappedLineParser::tryToParseChildBlock() { 2356 assert(Style.isJavaScript() || Style.isCSharp()); 2357 assert(FormatTok->is(TT_FatArrow)); 2358 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2359 // They always start an expression or a child block if followed by a curly 2360 // brace. 2361 nextToken(); 2362 if (FormatTok->isNot(tok::l_brace)) 2363 return false; 2364 parseChildBlock(); 2365 return true; 2366 } 2367 2368 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2369 bool IsEnum, 2370 tok::TokenKind ClosingBraceKind) { 2371 bool HasError = false; 2372 2373 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2374 // replace this by using parseAssignmentExpression() inside. 2375 do { 2376 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2377 tryToParseChildBlock()) { 2378 continue; 2379 } 2380 if (Style.isJavaScript()) { 2381 if (FormatTok->is(Keywords.kw_function) || 2382 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2383 tryToParseJSFunction(); 2384 continue; 2385 } 2386 if (FormatTok->is(tok::l_brace)) { 2387 // Could be a method inside of a braced list `{a() { return 1; }}`. 2388 if (tryToParseBracedList()) 2389 continue; 2390 parseChildBlock(); 2391 } 2392 } 2393 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2394 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2395 addUnwrappedLine(); 2396 nextToken(); 2397 return !HasError; 2398 } 2399 switch (FormatTok->Tok.getKind()) { 2400 case tok::l_square: 2401 if (Style.isCSharp()) 2402 parseSquare(); 2403 else 2404 tryToParseLambda(); 2405 break; 2406 case tok::l_paren: 2407 parseParens(); 2408 // JavaScript can just have free standing methods and getters/setters in 2409 // object literals. Detect them by a "{" following ")". 2410 if (Style.isJavaScript()) { 2411 if (FormatTok->is(tok::l_brace)) 2412 parseChildBlock(); 2413 break; 2414 } 2415 break; 2416 case tok::l_brace: 2417 // Assume there are no blocks inside a braced init list apart 2418 // from the ones we explicitly parse out (like lambdas). 2419 FormatTok->setBlockKind(BK_BracedInit); 2420 nextToken(); 2421 parseBracedList(); 2422 break; 2423 case tok::less: 2424 if (Style.Language == FormatStyle::LK_Proto || 2425 ClosingBraceKind == tok::greater) { 2426 nextToken(); 2427 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2428 /*ClosingBraceKind=*/tok::greater); 2429 } else { 2430 nextToken(); 2431 } 2432 break; 2433 case tok::semi: 2434 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2435 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2436 // used for error recovery if we have otherwise determined that this is 2437 // a braced list. 2438 if (Style.isJavaScript()) { 2439 nextToken(); 2440 break; 2441 } 2442 HasError = true; 2443 if (!ContinueOnSemicolons) 2444 return !HasError; 2445 nextToken(); 2446 break; 2447 case tok::comma: 2448 nextToken(); 2449 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2450 addUnwrappedLine(); 2451 break; 2452 default: 2453 nextToken(); 2454 break; 2455 } 2456 } while (!eof()); 2457 return false; 2458 } 2459 2460 /// \brief Parses a pair of parentheses (and everything between them). 2461 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2462 /// double ampersands. This applies for all nested scopes as well. 2463 /// 2464 /// Returns whether there is a `=` token between the parentheses. 2465 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2466 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2467 auto *LeftParen = FormatTok; 2468 bool SeenEqual = false; 2469 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2470 nextToken(); 2471 do { 2472 switch (FormatTok->Tok.getKind()) { 2473 case tok::l_paren: 2474 if (parseParens(AmpAmpTokenType)) 2475 SeenEqual = true; 2476 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2477 parseChildBlock(); 2478 break; 2479 case tok::r_paren: 2480 if (!MightBeStmtExpr && 2481 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2482 const auto *Prev = LeftParen->Previous; 2483 const auto *Next = Tokens->peekNextToken(); 2484 const bool DoubleParens = 2485 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2486 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2487 const bool Blacklisted = 2488 PrevPrev && 2489 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2490 (SeenEqual && 2491 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2492 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2493 const bool ReturnParens = 2494 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2495 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2496 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2497 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2498 Next->is(tok::semi); 2499 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2500 LeftParen->Optional = true; 2501 FormatTok->Optional = true; 2502 } 2503 } 2504 nextToken(); 2505 return SeenEqual; 2506 case tok::r_brace: 2507 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2508 return SeenEqual; 2509 case tok::l_square: 2510 tryToParseLambda(); 2511 break; 2512 case tok::l_brace: 2513 if (!tryToParseBracedList()) 2514 parseChildBlock(); 2515 break; 2516 case tok::at: 2517 nextToken(); 2518 if (FormatTok->is(tok::l_brace)) { 2519 nextToken(); 2520 parseBracedList(); 2521 } 2522 break; 2523 case tok::equal: 2524 SeenEqual = true; 2525 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2526 tryToParseChildBlock(); 2527 else 2528 nextToken(); 2529 break; 2530 case tok::kw_class: 2531 if (Style.isJavaScript()) 2532 parseRecord(/*ParseAsExpr=*/true); 2533 else 2534 nextToken(); 2535 break; 2536 case tok::identifier: 2537 if (Style.isJavaScript() && 2538 (FormatTok->is(Keywords.kw_function) || 2539 FormatTok->startsSequence(Keywords.kw_async, 2540 Keywords.kw_function))) { 2541 tryToParseJSFunction(); 2542 } else { 2543 nextToken(); 2544 } 2545 break; 2546 case tok::kw_requires: { 2547 auto RequiresToken = FormatTok; 2548 nextToken(); 2549 parseRequiresExpression(RequiresToken); 2550 break; 2551 } 2552 case tok::ampamp: 2553 if (AmpAmpTokenType != TT_Unknown) 2554 FormatTok->setFinalizedType(AmpAmpTokenType); 2555 [[fallthrough]]; 2556 default: 2557 nextToken(); 2558 break; 2559 } 2560 } while (!eof()); 2561 return SeenEqual; 2562 } 2563 2564 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2565 if (!LambdaIntroducer) { 2566 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2567 if (tryToParseLambda()) 2568 return; 2569 } 2570 do { 2571 switch (FormatTok->Tok.getKind()) { 2572 case tok::l_paren: 2573 parseParens(); 2574 break; 2575 case tok::r_square: 2576 nextToken(); 2577 return; 2578 case tok::r_brace: 2579 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2580 return; 2581 case tok::l_square: 2582 parseSquare(); 2583 break; 2584 case tok::l_brace: { 2585 if (!tryToParseBracedList()) 2586 parseChildBlock(); 2587 break; 2588 } 2589 case tok::at: 2590 nextToken(); 2591 if (FormatTok->is(tok::l_brace)) { 2592 nextToken(); 2593 parseBracedList(); 2594 } 2595 break; 2596 default: 2597 nextToken(); 2598 break; 2599 } 2600 } while (!eof()); 2601 } 2602 2603 void UnwrappedLineParser::keepAncestorBraces() { 2604 if (!Style.RemoveBracesLLVM) 2605 return; 2606 2607 const int MaxNestingLevels = 2; 2608 const int Size = NestedTooDeep.size(); 2609 if (Size >= MaxNestingLevels) 2610 NestedTooDeep[Size - MaxNestingLevels] = true; 2611 NestedTooDeep.push_back(false); 2612 } 2613 2614 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2615 for (const auto &Token : llvm::reverse(Line.Tokens)) 2616 if (Token.Tok->isNot(tok::comment)) 2617 return Token.Tok; 2618 2619 return nullptr; 2620 } 2621 2622 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2623 FormatToken *Tok = nullptr; 2624 2625 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2626 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2627 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2628 ? getLastNonComment(*Line) 2629 : Line->Tokens.back().Tok; 2630 assert(Tok); 2631 if (Tok->BraceCount < 0) { 2632 assert(Tok->BraceCount == -1); 2633 Tok = nullptr; 2634 } else { 2635 Tok->BraceCount = -1; 2636 } 2637 } 2638 2639 addUnwrappedLine(); 2640 ++Line->Level; 2641 parseStructuralElement(); 2642 2643 if (Tok) { 2644 assert(!Line->InPPDirective); 2645 Tok = nullptr; 2646 for (const auto &L : llvm::reverse(*CurrentLines)) { 2647 if (!L.InPPDirective && getLastNonComment(L)) { 2648 Tok = L.Tokens.back().Tok; 2649 break; 2650 } 2651 } 2652 assert(Tok); 2653 ++Tok->BraceCount; 2654 } 2655 2656 if (CheckEOF && eof()) 2657 addUnwrappedLine(); 2658 2659 --Line->Level; 2660 } 2661 2662 static void markOptionalBraces(FormatToken *LeftBrace) { 2663 if (!LeftBrace) 2664 return; 2665 2666 assert(LeftBrace->is(tok::l_brace)); 2667 2668 FormatToken *RightBrace = LeftBrace->MatchingParen; 2669 if (!RightBrace) { 2670 assert(!LeftBrace->Optional); 2671 return; 2672 } 2673 2674 assert(RightBrace->is(tok::r_brace)); 2675 assert(RightBrace->MatchingParen == LeftBrace); 2676 assert(LeftBrace->Optional == RightBrace->Optional); 2677 2678 LeftBrace->Optional = true; 2679 RightBrace->Optional = true; 2680 } 2681 2682 void UnwrappedLineParser::handleAttributes() { 2683 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2684 if (FormatTok->is(TT_AttributeMacro)) 2685 nextToken(); 2686 if (FormatTok->is(tok::l_square)) 2687 handleCppAttributes(); 2688 } 2689 2690 bool UnwrappedLineParser::handleCppAttributes() { 2691 // Handle [[likely]] / [[unlikely]] attributes. 2692 assert(FormatTok->is(tok::l_square)); 2693 if (!tryToParseSimpleAttribute()) 2694 return false; 2695 parseSquare(); 2696 return true; 2697 } 2698 2699 /// Returns whether \c Tok begins a block. 2700 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2701 // FIXME: rename the function or make 2702 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2703 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2704 : Tok.is(tok::l_brace); 2705 } 2706 2707 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2708 bool KeepBraces, 2709 bool IsVerilogAssert) { 2710 assert((FormatTok->is(tok::kw_if) || 2711 (Style.isVerilog() && 2712 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2713 Keywords.kw_assume, Keywords.kw_cover))) && 2714 "'if' expected"); 2715 nextToken(); 2716 2717 if (IsVerilogAssert) { 2718 // Handle `assert #0` and `assert final`. 2719 if (FormatTok->is(Keywords.kw_verilogHash)) { 2720 nextToken(); 2721 if (FormatTok->is(tok::numeric_constant)) 2722 nextToken(); 2723 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2724 Keywords.kw_sequence)) { 2725 nextToken(); 2726 } 2727 } 2728 2729 // Handle `if !consteval`. 2730 if (FormatTok->is(tok::exclaim)) 2731 nextToken(); 2732 2733 bool KeepIfBraces = true; 2734 if (FormatTok->is(tok::kw_consteval)) { 2735 nextToken(); 2736 } else { 2737 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2738 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2739 nextToken(); 2740 if (FormatTok->is(tok::l_paren)) { 2741 FormatTok->setFinalizedType(TT_ConditionLParen); 2742 parseParens(); 2743 } 2744 } 2745 handleAttributes(); 2746 // The then action is optional in Verilog assert statements. 2747 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2748 nextToken(); 2749 addUnwrappedLine(); 2750 return nullptr; 2751 } 2752 2753 bool NeedsUnwrappedLine = false; 2754 keepAncestorBraces(); 2755 2756 FormatToken *IfLeftBrace = nullptr; 2757 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2758 2759 if (isBlockBegin(*FormatTok)) { 2760 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2761 IfLeftBrace = FormatTok; 2762 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2763 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2764 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2765 if (Style.BraceWrapping.BeforeElse) 2766 addUnwrappedLine(); 2767 else 2768 NeedsUnwrappedLine = true; 2769 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2770 addUnwrappedLine(); 2771 } else { 2772 parseUnbracedBody(); 2773 } 2774 2775 if (Style.RemoveBracesLLVM) { 2776 assert(!NestedTooDeep.empty()); 2777 KeepIfBraces = KeepIfBraces || 2778 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2779 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2780 IfBlockKind == IfStmtKind::IfElseIf; 2781 } 2782 2783 bool KeepElseBraces = KeepIfBraces; 2784 FormatToken *ElseLeftBrace = nullptr; 2785 IfStmtKind Kind = IfStmtKind::IfOnly; 2786 2787 if (FormatTok->is(tok::kw_else)) { 2788 if (Style.RemoveBracesLLVM) { 2789 NestedTooDeep.back() = false; 2790 Kind = IfStmtKind::IfElse; 2791 } 2792 nextToken(); 2793 handleAttributes(); 2794 if (isBlockBegin(*FormatTok)) { 2795 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2796 FormatTok->setFinalizedType(TT_ElseLBrace); 2797 ElseLeftBrace = FormatTok; 2798 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2799 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2800 FormatToken *IfLBrace = 2801 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2802 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2803 if (FormatTok->is(tok::kw_else)) { 2804 KeepElseBraces = KeepElseBraces || 2805 ElseBlockKind == IfStmtKind::IfOnly || 2806 ElseBlockKind == IfStmtKind::IfElseIf; 2807 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2808 KeepElseBraces = true; 2809 assert(ElseLeftBrace->MatchingParen); 2810 markOptionalBraces(ElseLeftBrace); 2811 } 2812 addUnwrappedLine(); 2813 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2814 const FormatToken *Previous = Tokens->getPreviousToken(); 2815 assert(Previous); 2816 const bool IsPrecededByComment = Previous->is(tok::comment); 2817 if (IsPrecededByComment) { 2818 addUnwrappedLine(); 2819 ++Line->Level; 2820 } 2821 bool TooDeep = true; 2822 if (Style.RemoveBracesLLVM) { 2823 Kind = IfStmtKind::IfElseIf; 2824 TooDeep = NestedTooDeep.pop_back_val(); 2825 } 2826 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2827 if (Style.RemoveBracesLLVM) 2828 NestedTooDeep.push_back(TooDeep); 2829 if (IsPrecededByComment) 2830 --Line->Level; 2831 } else { 2832 parseUnbracedBody(/*CheckEOF=*/true); 2833 } 2834 } else { 2835 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2836 if (NeedsUnwrappedLine) 2837 addUnwrappedLine(); 2838 } 2839 2840 if (!Style.RemoveBracesLLVM) 2841 return nullptr; 2842 2843 assert(!NestedTooDeep.empty()); 2844 KeepElseBraces = KeepElseBraces || 2845 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2846 NestedTooDeep.back(); 2847 2848 NestedTooDeep.pop_back(); 2849 2850 if (!KeepIfBraces && !KeepElseBraces) { 2851 markOptionalBraces(IfLeftBrace); 2852 markOptionalBraces(ElseLeftBrace); 2853 } else if (IfLeftBrace) { 2854 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2855 if (IfRightBrace) { 2856 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2857 assert(!IfLeftBrace->Optional); 2858 assert(!IfRightBrace->Optional); 2859 IfLeftBrace->MatchingParen = nullptr; 2860 IfRightBrace->MatchingParen = nullptr; 2861 } 2862 } 2863 2864 if (IfKind) 2865 *IfKind = Kind; 2866 2867 return IfLeftBrace; 2868 } 2869 2870 void UnwrappedLineParser::parseTryCatch() { 2871 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2872 nextToken(); 2873 bool NeedsUnwrappedLine = false; 2874 if (FormatTok->is(tok::colon)) { 2875 // We are in a function try block, what comes is an initializer list. 2876 nextToken(); 2877 2878 // In case identifiers were removed by clang-tidy, what might follow is 2879 // multiple commas in sequence - before the first identifier. 2880 while (FormatTok->is(tok::comma)) 2881 nextToken(); 2882 2883 while (FormatTok->is(tok::identifier)) { 2884 nextToken(); 2885 if (FormatTok->is(tok::l_paren)) 2886 parseParens(); 2887 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2888 FormatTok->is(tok::l_brace)) { 2889 do { 2890 nextToken(); 2891 } while (!FormatTok->is(tok::r_brace)); 2892 nextToken(); 2893 } 2894 2895 // In case identifiers were removed by clang-tidy, what might follow is 2896 // multiple commas in sequence - after the first identifier. 2897 while (FormatTok->is(tok::comma)) 2898 nextToken(); 2899 } 2900 } 2901 // Parse try with resource. 2902 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2903 parseParens(); 2904 2905 keepAncestorBraces(); 2906 2907 if (FormatTok->is(tok::l_brace)) { 2908 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2909 parseBlock(); 2910 if (Style.BraceWrapping.BeforeCatch) 2911 addUnwrappedLine(); 2912 else 2913 NeedsUnwrappedLine = true; 2914 } else if (!FormatTok->is(tok::kw_catch)) { 2915 // The C++ standard requires a compound-statement after a try. 2916 // If there's none, we try to assume there's a structuralElement 2917 // and try to continue. 2918 addUnwrappedLine(); 2919 ++Line->Level; 2920 parseStructuralElement(); 2921 --Line->Level; 2922 } 2923 while (true) { 2924 if (FormatTok->is(tok::at)) 2925 nextToken(); 2926 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2927 tok::kw___finally) || 2928 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2929 FormatTok->is(Keywords.kw_finally)) || 2930 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2931 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2932 break; 2933 } 2934 nextToken(); 2935 while (FormatTok->isNot(tok::l_brace)) { 2936 if (FormatTok->is(tok::l_paren)) { 2937 parseParens(); 2938 continue; 2939 } 2940 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2941 if (Style.RemoveBracesLLVM) 2942 NestedTooDeep.pop_back(); 2943 return; 2944 } 2945 nextToken(); 2946 } 2947 NeedsUnwrappedLine = false; 2948 Line->MustBeDeclaration = false; 2949 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2950 parseBlock(); 2951 if (Style.BraceWrapping.BeforeCatch) 2952 addUnwrappedLine(); 2953 else 2954 NeedsUnwrappedLine = true; 2955 } 2956 2957 if (Style.RemoveBracesLLVM) 2958 NestedTooDeep.pop_back(); 2959 2960 if (NeedsUnwrappedLine) 2961 addUnwrappedLine(); 2962 } 2963 2964 void UnwrappedLineParser::parseNamespace() { 2965 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2966 "'namespace' expected"); 2967 2968 const FormatToken &InitialToken = *FormatTok; 2969 nextToken(); 2970 if (InitialToken.is(TT_NamespaceMacro)) { 2971 parseParens(); 2972 } else { 2973 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2974 tok::l_square, tok::period, tok::l_paren) || 2975 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 2976 if (FormatTok->is(tok::l_square)) 2977 parseSquare(); 2978 else if (FormatTok->is(tok::l_paren)) 2979 parseParens(); 2980 else 2981 nextToken(); 2982 } 2983 } 2984 if (FormatTok->is(tok::l_brace)) { 2985 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2986 addUnwrappedLine(); 2987 2988 unsigned AddLevels = 2989 Style.NamespaceIndentation == FormatStyle::NI_All || 2990 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2991 DeclarationScopeStack.size() > 1) 2992 ? 1u 2993 : 0u; 2994 bool ManageWhitesmithsBraces = 2995 AddLevels == 0u && 2996 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2997 2998 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2999 // the whole block. 3000 if (ManageWhitesmithsBraces) 3001 ++Line->Level; 3002 3003 // Munch the semicolon after a namespace. This is more common than one would 3004 // think. Putting the semicolon into its own line is very ugly. 3005 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3006 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3007 ManageWhitesmithsBraces); 3008 3009 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3010 3011 if (ManageWhitesmithsBraces) 3012 --Line->Level; 3013 } 3014 // FIXME: Add error handling. 3015 } 3016 3017 void UnwrappedLineParser::parseNew() { 3018 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3019 nextToken(); 3020 3021 if (Style.isCSharp()) { 3022 do { 3023 // Handle constructor invocation, e.g. `new(field: value)`. 3024 if (FormatTok->is(tok::l_paren)) 3025 parseParens(); 3026 3027 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3028 if (FormatTok->is(tok::l_brace)) 3029 parseBracedList(); 3030 3031 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3032 return; 3033 3034 nextToken(); 3035 } while (!eof()); 3036 } 3037 3038 if (Style.Language != FormatStyle::LK_Java) 3039 return; 3040 3041 // In Java, we can parse everything up to the parens, which aren't optional. 3042 do { 3043 // There should not be a ;, { or } before the new's open paren. 3044 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3045 return; 3046 3047 // Consume the parens. 3048 if (FormatTok->is(tok::l_paren)) { 3049 parseParens(); 3050 3051 // If there is a class body of an anonymous class, consume that as child. 3052 if (FormatTok->is(tok::l_brace)) 3053 parseChildBlock(); 3054 return; 3055 } 3056 nextToken(); 3057 } while (!eof()); 3058 } 3059 3060 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3061 keepAncestorBraces(); 3062 3063 if (isBlockBegin(*FormatTok)) { 3064 if (!KeepBraces) 3065 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3066 FormatToken *LeftBrace = FormatTok; 3067 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3068 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3069 /*MunchSemi=*/true, KeepBraces); 3070 if (!KeepBraces) { 3071 assert(!NestedTooDeep.empty()); 3072 if (!NestedTooDeep.back()) 3073 markOptionalBraces(LeftBrace); 3074 } 3075 if (WrapRightBrace) 3076 addUnwrappedLine(); 3077 } else { 3078 parseUnbracedBody(); 3079 } 3080 3081 if (!KeepBraces) 3082 NestedTooDeep.pop_back(); 3083 } 3084 3085 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3086 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3087 (Style.isVerilog() && 3088 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3089 Keywords.kw_always_ff, Keywords.kw_always_latch, 3090 Keywords.kw_final, Keywords.kw_initial, 3091 Keywords.kw_foreach, Keywords.kw_forever, 3092 Keywords.kw_repeat))) && 3093 "'for', 'while' or foreach macro expected"); 3094 const bool KeepBraces = !Style.RemoveBracesLLVM || 3095 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3096 3097 nextToken(); 3098 // JS' for await ( ... 3099 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3100 nextToken(); 3101 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 3102 nextToken(); 3103 if (HasParens && FormatTok->is(tok::l_paren)) { 3104 // The type is only set for Verilog basically because we were afraid to 3105 // change the existing behavior for loops. See the discussion on D121756 for 3106 // details. 3107 if (Style.isVerilog()) 3108 FormatTok->setFinalizedType(TT_ConditionLParen); 3109 parseParens(); 3110 } 3111 // Event control. 3112 if (Style.isVerilog()) 3113 parseVerilogSensitivityList(); 3114 3115 handleAttributes(); 3116 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3117 } 3118 3119 void UnwrappedLineParser::parseDoWhile() { 3120 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3121 nextToken(); 3122 3123 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3124 3125 // FIXME: Add error handling. 3126 if (!FormatTok->is(tok::kw_while)) { 3127 addUnwrappedLine(); 3128 return; 3129 } 3130 3131 // If in Whitesmiths mode, the line with the while() needs to be indented 3132 // to the same level as the block. 3133 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3134 ++Line->Level; 3135 3136 nextToken(); 3137 parseStructuralElement(); 3138 } 3139 3140 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3141 nextToken(); 3142 unsigned OldLineLevel = Line->Level; 3143 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3144 --Line->Level; 3145 if (LeftAlignLabel) 3146 Line->Level = 0; 3147 3148 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3149 FormatTok->is(tok::l_brace)) { 3150 3151 CompoundStatementIndenter Indenter(this, Line->Level, 3152 Style.BraceWrapping.AfterCaseLabel, 3153 Style.BraceWrapping.IndentBraces); 3154 parseBlock(); 3155 if (FormatTok->is(tok::kw_break)) { 3156 if (Style.BraceWrapping.AfterControlStatement == 3157 FormatStyle::BWACS_Always) { 3158 addUnwrappedLine(); 3159 if (!Style.IndentCaseBlocks && 3160 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3161 ++Line->Level; 3162 } 3163 } 3164 parseStructuralElement(); 3165 } 3166 addUnwrappedLine(); 3167 } else { 3168 if (FormatTok->is(tok::semi)) 3169 nextToken(); 3170 addUnwrappedLine(); 3171 } 3172 Line->Level = OldLineLevel; 3173 if (FormatTok->isNot(tok::l_brace)) { 3174 parseStructuralElement(); 3175 addUnwrappedLine(); 3176 } 3177 } 3178 3179 void UnwrappedLineParser::parseCaseLabel() { 3180 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3181 3182 // FIXME: fix handling of complex expressions here. 3183 do { 3184 nextToken(); 3185 if (FormatTok->is(tok::colon)) { 3186 FormatTok->setFinalizedType(TT_CaseLabelColon); 3187 break; 3188 } 3189 } while (!eof()); 3190 parseLabel(); 3191 } 3192 3193 void UnwrappedLineParser::parseSwitch() { 3194 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3195 nextToken(); 3196 if (FormatTok->is(tok::l_paren)) 3197 parseParens(); 3198 3199 keepAncestorBraces(); 3200 3201 if (FormatTok->is(tok::l_brace)) { 3202 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3203 parseBlock(); 3204 addUnwrappedLine(); 3205 } else { 3206 addUnwrappedLine(); 3207 ++Line->Level; 3208 parseStructuralElement(); 3209 --Line->Level; 3210 } 3211 3212 if (Style.RemoveBracesLLVM) 3213 NestedTooDeep.pop_back(); 3214 } 3215 3216 // Operators that can follow a C variable. 3217 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 3218 switch (kind) { 3219 case tok::ampamp: 3220 case tok::ampequal: 3221 case tok::arrow: 3222 case tok::caret: 3223 case tok::caretequal: 3224 case tok::comma: 3225 case tok::ellipsis: 3226 case tok::equal: 3227 case tok::equalequal: 3228 case tok::exclaim: 3229 case tok::exclaimequal: 3230 case tok::greater: 3231 case tok::greaterequal: 3232 case tok::greatergreater: 3233 case tok::greatergreaterequal: 3234 case tok::l_paren: 3235 case tok::l_square: 3236 case tok::less: 3237 case tok::lessequal: 3238 case tok::lessless: 3239 case tok::lesslessequal: 3240 case tok::minus: 3241 case tok::minusequal: 3242 case tok::minusminus: 3243 case tok::percent: 3244 case tok::percentequal: 3245 case tok::period: 3246 case tok::pipe: 3247 case tok::pipeequal: 3248 case tok::pipepipe: 3249 case tok::plus: 3250 case tok::plusequal: 3251 case tok::plusplus: 3252 case tok::question: 3253 case tok::r_brace: 3254 case tok::r_paren: 3255 case tok::r_square: 3256 case tok::semi: 3257 case tok::slash: 3258 case tok::slashequal: 3259 case tok::star: 3260 case tok::starequal: 3261 return true; 3262 default: 3263 return false; 3264 } 3265 } 3266 3267 void UnwrappedLineParser::parseAccessSpecifier() { 3268 FormatToken *AccessSpecifierCandidate = FormatTok; 3269 nextToken(); 3270 // Understand Qt's slots. 3271 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3272 nextToken(); 3273 // Otherwise, we don't know what it is, and we'd better keep the next token. 3274 if (FormatTok->is(tok::colon)) { 3275 nextToken(); 3276 addUnwrappedLine(); 3277 } else if (!FormatTok->is(tok::coloncolon) && 3278 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3279 // Not a variable name nor namespace name. 3280 addUnwrappedLine(); 3281 } else if (AccessSpecifierCandidate) { 3282 // Consider the access specifier to be a C identifier. 3283 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3284 } 3285 } 3286 3287 /// \brief Parses a requires, decides if it is a clause or an expression. 3288 /// \pre The current token has to be the requires keyword. 3289 /// \returns true if it parsed a clause. 3290 bool clang::format::UnwrappedLineParser::parseRequires() { 3291 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3292 auto RequiresToken = FormatTok; 3293 3294 // We try to guess if it is a requires clause, or a requires expression. For 3295 // that we first consume the keyword and check the next token. 3296 nextToken(); 3297 3298 switch (FormatTok->Tok.getKind()) { 3299 case tok::l_brace: 3300 // This can only be an expression, never a clause. 3301 parseRequiresExpression(RequiresToken); 3302 return false; 3303 case tok::l_paren: 3304 // Clauses and expression can start with a paren, it's unclear what we have. 3305 break; 3306 default: 3307 // All other tokens can only be a clause. 3308 parseRequiresClause(RequiresToken); 3309 return true; 3310 } 3311 3312 // Looking forward we would have to decide if there are function declaration 3313 // like arguments to the requires expression: 3314 // requires (T t) { 3315 // Or there is a constraint expression for the requires clause: 3316 // requires (C<T> && ... 3317 3318 // But first let's look behind. 3319 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3320 3321 if (!PreviousNonComment || 3322 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3323 // If there is no token, or an expression left brace, we are a requires 3324 // clause within a requires expression. 3325 parseRequiresClause(RequiresToken); 3326 return true; 3327 } 3328 3329 switch (PreviousNonComment->Tok.getKind()) { 3330 case tok::greater: 3331 case tok::r_paren: 3332 case tok::kw_noexcept: 3333 case tok::kw_const: 3334 // This is a requires clause. 3335 parseRequiresClause(RequiresToken); 3336 return true; 3337 case tok::amp: 3338 case tok::ampamp: { 3339 // This can be either: 3340 // if (... && requires (T t) ...) 3341 // Or 3342 // void member(...) && requires (C<T> ... 3343 // We check the one token before that for a const: 3344 // void member(...) const && requires (C<T> ... 3345 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3346 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3347 parseRequiresClause(RequiresToken); 3348 return true; 3349 } 3350 break; 3351 } 3352 default: 3353 if (PreviousNonComment->isTypeOrIdentifier()) { 3354 // This is a requires clause. 3355 parseRequiresClause(RequiresToken); 3356 return true; 3357 } 3358 // It's an expression. 3359 parseRequiresExpression(RequiresToken); 3360 return false; 3361 } 3362 3363 // Now we look forward and try to check if the paren content is a parameter 3364 // list. The parameters can be cv-qualified and contain references or 3365 // pointers. 3366 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3367 // of stuff: typename, const, *, &, &&, ::, identifiers. 3368 3369 unsigned StoredPosition = Tokens->getPosition(); 3370 FormatToken *NextToken = Tokens->getNextToken(); 3371 int Lookahead = 0; 3372 auto PeekNext = [&Lookahead, &NextToken, this] { 3373 ++Lookahead; 3374 NextToken = Tokens->getNextToken(); 3375 }; 3376 3377 bool FoundType = false; 3378 bool LastWasColonColon = false; 3379 int OpenAngles = 0; 3380 3381 for (; Lookahead < 50; PeekNext()) { 3382 switch (NextToken->Tok.getKind()) { 3383 case tok::kw_volatile: 3384 case tok::kw_const: 3385 case tok::comma: 3386 FormatTok = Tokens->setPosition(StoredPosition); 3387 parseRequiresExpression(RequiresToken); 3388 return false; 3389 case tok::r_paren: 3390 case tok::pipepipe: 3391 FormatTok = Tokens->setPosition(StoredPosition); 3392 parseRequiresClause(RequiresToken); 3393 return true; 3394 case tok::eof: 3395 // Break out of the loop. 3396 Lookahead = 50; 3397 break; 3398 case tok::coloncolon: 3399 LastWasColonColon = true; 3400 break; 3401 case tok::identifier: 3402 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3403 FormatTok = Tokens->setPosition(StoredPosition); 3404 parseRequiresExpression(RequiresToken); 3405 return false; 3406 } 3407 FoundType = true; 3408 LastWasColonColon = false; 3409 break; 3410 case tok::less: 3411 ++OpenAngles; 3412 break; 3413 case tok::greater: 3414 --OpenAngles; 3415 break; 3416 default: 3417 if (NextToken->isSimpleTypeSpecifier()) { 3418 FormatTok = Tokens->setPosition(StoredPosition); 3419 parseRequiresExpression(RequiresToken); 3420 return false; 3421 } 3422 break; 3423 } 3424 } 3425 // This seems to be a complicated expression, just assume it's a clause. 3426 FormatTok = Tokens->setPosition(StoredPosition); 3427 parseRequiresClause(RequiresToken); 3428 return true; 3429 } 3430 3431 /// \brief Parses a requires clause. 3432 /// \param RequiresToken The requires keyword token, which starts this clause. 3433 /// \pre We need to be on the next token after the requires keyword. 3434 /// \sa parseRequiresExpression 3435 /// 3436 /// Returns if it either has finished parsing the clause, or it detects, that 3437 /// the clause is incorrect. 3438 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3439 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3440 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3441 3442 // If there is no previous token, we are within a requires expression, 3443 // otherwise we will always have the template or function declaration in front 3444 // of it. 3445 bool InRequiresExpression = 3446 !RequiresToken->Previous || 3447 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3448 3449 RequiresToken->setFinalizedType(InRequiresExpression 3450 ? TT_RequiresClauseInARequiresExpression 3451 : TT_RequiresClause); 3452 3453 // NOTE: parseConstraintExpression is only ever called from this function. 3454 // It could be inlined into here. 3455 parseConstraintExpression(); 3456 3457 if (!InRequiresExpression) 3458 FormatTok->Previous->ClosesRequiresClause = true; 3459 } 3460 3461 /// \brief Parses a requires expression. 3462 /// \param RequiresToken The requires keyword token, which starts this clause. 3463 /// \pre We need to be on the next token after the requires keyword. 3464 /// \sa parseRequiresClause 3465 /// 3466 /// Returns if it either has finished parsing the expression, or it detects, 3467 /// that the expression is incorrect. 3468 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3469 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3470 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3471 3472 RequiresToken->setFinalizedType(TT_RequiresExpression); 3473 3474 if (FormatTok->is(tok::l_paren)) { 3475 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3476 parseParens(); 3477 } 3478 3479 if (FormatTok->is(tok::l_brace)) { 3480 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3481 parseChildBlock(/*CanContainBracedList=*/false, 3482 /*NextLBracesType=*/TT_CompoundRequirementLBrace); 3483 } 3484 } 3485 3486 /// \brief Parses a constraint expression. 3487 /// 3488 /// This is the body of a requires clause. It returns, when the parsing is 3489 /// complete, or the expression is incorrect. 3490 void UnwrappedLineParser::parseConstraintExpression() { 3491 // The special handling for lambdas is needed since tryToParseLambda() eats a 3492 // token and if a requires expression is the last part of a requires clause 3493 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3494 // not set on the correct token. Thus we need to be aware if we even expect a 3495 // lambda to be possible. 3496 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3497 bool LambdaNextTimeAllowed = true; 3498 3499 // Within lambda declarations, it is permitted to put a requires clause after 3500 // its template parameter list, which would place the requires clause right 3501 // before the parentheses of the parameters of the lambda declaration. Thus, 3502 // we track if we expect to see grouping parentheses at all. 3503 // Without this check, `requires foo<T> (T t)` in the below example would be 3504 // seen as the whole requires clause, accidentally eating the parameters of 3505 // the lambda. 3506 // [&]<typename T> requires foo<T> (T t) { ... }; 3507 bool TopLevelParensAllowed = true; 3508 3509 do { 3510 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3511 3512 switch (FormatTok->Tok.getKind()) { 3513 case tok::kw_requires: { 3514 auto RequiresToken = FormatTok; 3515 nextToken(); 3516 parseRequiresExpression(RequiresToken); 3517 break; 3518 } 3519 3520 case tok::l_paren: 3521 if (!TopLevelParensAllowed) 3522 return; 3523 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3524 TopLevelParensAllowed = false; 3525 break; 3526 3527 case tok::l_square: 3528 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3529 return; 3530 break; 3531 3532 case tok::kw_const: 3533 case tok::semi: 3534 case tok::kw_class: 3535 case tok::kw_struct: 3536 case tok::kw_union: 3537 return; 3538 3539 case tok::l_brace: 3540 // Potential function body. 3541 return; 3542 3543 case tok::ampamp: 3544 case tok::pipepipe: 3545 FormatTok->setFinalizedType(TT_BinaryOperator); 3546 nextToken(); 3547 LambdaNextTimeAllowed = true; 3548 TopLevelParensAllowed = true; 3549 break; 3550 3551 case tok::comma: 3552 case tok::comment: 3553 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3554 nextToken(); 3555 break; 3556 3557 case tok::kw_sizeof: 3558 case tok::greater: 3559 case tok::greaterequal: 3560 case tok::greatergreater: 3561 case tok::less: 3562 case tok::lessequal: 3563 case tok::lessless: 3564 case tok::equalequal: 3565 case tok::exclaim: 3566 case tok::exclaimequal: 3567 case tok::plus: 3568 case tok::minus: 3569 case tok::star: 3570 case tok::slash: 3571 LambdaNextTimeAllowed = true; 3572 TopLevelParensAllowed = true; 3573 // Just eat them. 3574 nextToken(); 3575 break; 3576 3577 case tok::numeric_constant: 3578 case tok::coloncolon: 3579 case tok::kw_true: 3580 case tok::kw_false: 3581 TopLevelParensAllowed = false; 3582 // Just eat them. 3583 nextToken(); 3584 break; 3585 3586 case tok::kw_static_cast: 3587 case tok::kw_const_cast: 3588 case tok::kw_reinterpret_cast: 3589 case tok::kw_dynamic_cast: 3590 nextToken(); 3591 if (!FormatTok->is(tok::less)) 3592 return; 3593 3594 nextToken(); 3595 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3596 /*ClosingBraceKind=*/tok::greater); 3597 break; 3598 3599 default: 3600 if (!FormatTok->Tok.getIdentifierInfo()) { 3601 // Identifiers are part of the default case, we check for more then 3602 // tok::identifier to handle builtin type traits. 3603 return; 3604 } 3605 3606 // We need to differentiate identifiers for a template deduction guide, 3607 // variables, or function return types (the constraint expression has 3608 // ended before that), and basically all other cases. But it's easier to 3609 // check the other way around. 3610 assert(FormatTok->Previous); 3611 switch (FormatTok->Previous->Tok.getKind()) { 3612 case tok::coloncolon: // Nested identifier. 3613 case tok::ampamp: // Start of a function or variable for the 3614 case tok::pipepipe: // constraint expression. (binary) 3615 case tok::exclaim: // The same as above, but unary. 3616 case tok::kw_requires: // Initial identifier of a requires clause. 3617 case tok::equal: // Initial identifier of a concept declaration. 3618 break; 3619 default: 3620 return; 3621 } 3622 3623 // Read identifier with optional template declaration. 3624 nextToken(); 3625 if (FormatTok->is(tok::less)) { 3626 nextToken(); 3627 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3628 /*ClosingBraceKind=*/tok::greater); 3629 } 3630 TopLevelParensAllowed = false; 3631 break; 3632 } 3633 } while (!eof()); 3634 } 3635 3636 bool UnwrappedLineParser::parseEnum() { 3637 const FormatToken &InitialToken = *FormatTok; 3638 3639 // Won't be 'enum' for NS_ENUMs. 3640 if (FormatTok->is(tok::kw_enum)) 3641 nextToken(); 3642 3643 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3644 // declarations. An "enum" keyword followed by a colon would be a syntax 3645 // error and thus assume it is just an identifier. 3646 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3647 return false; 3648 3649 // In protobuf, "enum" can be used as a field name. 3650 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3651 return false; 3652 3653 // Eat up enum class ... 3654 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3655 nextToken(); 3656 3657 while (FormatTok->Tok.getIdentifierInfo() || 3658 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3659 tok::greater, tok::comma, tok::question, 3660 tok::l_square, tok::r_square)) { 3661 if (Style.isVerilog()) { 3662 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3663 nextToken(); 3664 // In Verilog the base type can have dimensions. 3665 while (FormatTok->is(tok::l_square)) 3666 parseSquare(); 3667 } else { 3668 nextToken(); 3669 } 3670 // We can have macros or attributes in between 'enum' and the enum name. 3671 if (FormatTok->is(tok::l_paren)) 3672 parseParens(); 3673 if (FormatTok->is(TT_AttributeSquare)) { 3674 parseSquare(); 3675 // Consume the closing TT_AttributeSquare. 3676 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3677 nextToken(); 3678 } 3679 if (FormatTok->is(tok::identifier)) { 3680 nextToken(); 3681 // If there are two identifiers in a row, this is likely an elaborate 3682 // return type. In Java, this can be "implements", etc. 3683 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3684 return false; 3685 } 3686 } 3687 3688 // Just a declaration or something is wrong. 3689 if (FormatTok->isNot(tok::l_brace)) 3690 return true; 3691 FormatTok->setFinalizedType(TT_EnumLBrace); 3692 FormatTok->setBlockKind(BK_Block); 3693 3694 if (Style.Language == FormatStyle::LK_Java) { 3695 // Java enums are different. 3696 parseJavaEnumBody(); 3697 return true; 3698 } 3699 if (Style.Language == FormatStyle::LK_Proto) { 3700 parseBlock(/*MustBeDeclaration=*/true); 3701 return true; 3702 } 3703 3704 if (!Style.AllowShortEnumsOnASingleLine && 3705 ShouldBreakBeforeBrace(Style, InitialToken)) { 3706 addUnwrappedLine(); 3707 } 3708 // Parse enum body. 3709 nextToken(); 3710 if (!Style.AllowShortEnumsOnASingleLine) { 3711 addUnwrappedLine(); 3712 Line->Level += 1; 3713 } 3714 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 3715 /*IsEnum=*/true); 3716 if (!Style.AllowShortEnumsOnASingleLine) 3717 Line->Level -= 1; 3718 if (HasError) { 3719 if (FormatTok->is(tok::semi)) 3720 nextToken(); 3721 addUnwrappedLine(); 3722 } 3723 return true; 3724 3725 // There is no addUnwrappedLine() here so that we fall through to parsing a 3726 // structural element afterwards. Thus, in "enum A {} n, m;", 3727 // "} n, m;" will end up in one unwrapped line. 3728 } 3729 3730 bool UnwrappedLineParser::parseStructLike() { 3731 // parseRecord falls through and does not yet add an unwrapped line as a 3732 // record declaration or definition can start a structural element. 3733 parseRecord(); 3734 // This does not apply to Java, JavaScript and C#. 3735 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3736 Style.isCSharp()) { 3737 if (FormatTok->is(tok::semi)) 3738 nextToken(); 3739 addUnwrappedLine(); 3740 return true; 3741 } 3742 return false; 3743 } 3744 3745 namespace { 3746 // A class used to set and restore the Token position when peeking 3747 // ahead in the token source. 3748 class ScopedTokenPosition { 3749 unsigned StoredPosition; 3750 FormatTokenSource *Tokens; 3751 3752 public: 3753 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3754 assert(Tokens && "Tokens expected to not be null"); 3755 StoredPosition = Tokens->getPosition(); 3756 } 3757 3758 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3759 }; 3760 } // namespace 3761 3762 // Look to see if we have [[ by looking ahead, if 3763 // its not then rewind to the original position. 3764 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3765 ScopedTokenPosition AutoPosition(Tokens); 3766 FormatToken *Tok = Tokens->getNextToken(); 3767 // We already read the first [ check for the second. 3768 if (!Tok->is(tok::l_square)) 3769 return false; 3770 // Double check that the attribute is just something 3771 // fairly simple. 3772 while (Tok->isNot(tok::eof)) { 3773 if (Tok->is(tok::r_square)) 3774 break; 3775 Tok = Tokens->getNextToken(); 3776 } 3777 if (Tok->is(tok::eof)) 3778 return false; 3779 Tok = Tokens->getNextToken(); 3780 if (!Tok->is(tok::r_square)) 3781 return false; 3782 Tok = Tokens->getNextToken(); 3783 if (Tok->is(tok::semi)) 3784 return false; 3785 return true; 3786 } 3787 3788 void UnwrappedLineParser::parseJavaEnumBody() { 3789 assert(FormatTok->is(tok::l_brace)); 3790 const FormatToken *OpeningBrace = FormatTok; 3791 3792 // Determine whether the enum is simple, i.e. does not have a semicolon or 3793 // constants with class bodies. Simple enums can be formatted like braced 3794 // lists, contracted to a single line, etc. 3795 unsigned StoredPosition = Tokens->getPosition(); 3796 bool IsSimple = true; 3797 FormatToken *Tok = Tokens->getNextToken(); 3798 while (!Tok->is(tok::eof)) { 3799 if (Tok->is(tok::r_brace)) 3800 break; 3801 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3802 IsSimple = false; 3803 break; 3804 } 3805 // FIXME: This will also mark enums with braces in the arguments to enum 3806 // constants as "not simple". This is probably fine in practice, though. 3807 Tok = Tokens->getNextToken(); 3808 } 3809 FormatTok = Tokens->setPosition(StoredPosition); 3810 3811 if (IsSimple) { 3812 nextToken(); 3813 parseBracedList(); 3814 addUnwrappedLine(); 3815 return; 3816 } 3817 3818 // Parse the body of a more complex enum. 3819 // First add a line for everything up to the "{". 3820 nextToken(); 3821 addUnwrappedLine(); 3822 ++Line->Level; 3823 3824 // Parse the enum constants. 3825 while (!eof()) { 3826 if (FormatTok->is(tok::l_brace)) { 3827 // Parse the constant's class body. 3828 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3829 /*MunchSemi=*/false); 3830 } else if (FormatTok->is(tok::l_paren)) { 3831 parseParens(); 3832 } else if (FormatTok->is(tok::comma)) { 3833 nextToken(); 3834 addUnwrappedLine(); 3835 } else if (FormatTok->is(tok::semi)) { 3836 nextToken(); 3837 addUnwrappedLine(); 3838 break; 3839 } else if (FormatTok->is(tok::r_brace)) { 3840 addUnwrappedLine(); 3841 break; 3842 } else { 3843 nextToken(); 3844 } 3845 } 3846 3847 // Parse the class body after the enum's ";" if any. 3848 parseLevel(OpeningBrace); 3849 nextToken(); 3850 --Line->Level; 3851 addUnwrappedLine(); 3852 } 3853 3854 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3855 const FormatToken &InitialToken = *FormatTok; 3856 nextToken(); 3857 3858 // The actual identifier can be a nested name specifier, and in macros 3859 // it is often token-pasted. 3860 // An [[attribute]] can be before the identifier. 3861 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3862 tok::kw___attribute, tok::kw___declspec, 3863 tok::kw_alignas, tok::l_square) || 3864 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3865 FormatTok->isOneOf(tok::period, tok::comma))) { 3866 if (Style.isJavaScript() && 3867 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3868 // JavaScript/TypeScript supports inline object types in 3869 // extends/implements positions: 3870 // class Foo implements {bar: number} { } 3871 nextToken(); 3872 if (FormatTok->is(tok::l_brace)) { 3873 tryToParseBracedList(); 3874 continue; 3875 } 3876 } 3877 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 3878 continue; 3879 bool IsNonMacroIdentifier = 3880 FormatTok->is(tok::identifier) && 3881 FormatTok->TokenText != FormatTok->TokenText.upper(); 3882 nextToken(); 3883 // We can have macros in between 'class' and the class name. 3884 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren)) 3885 parseParens(); 3886 } 3887 3888 // Note that parsing away template declarations here leads to incorrectly 3889 // accepting function declarations as record declarations. 3890 // In general, we cannot solve this problem. Consider: 3891 // class A<int> B() {} 3892 // which can be a function definition or a class definition when B() is a 3893 // macro. If we find enough real-world cases where this is a problem, we 3894 // can parse for the 'template' keyword in the beginning of the statement, 3895 // and thus rule out the record production in case there is no template 3896 // (this would still leave us with an ambiguity between template function 3897 // and class declarations). 3898 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3899 do { 3900 if (FormatTok->is(tok::l_brace)) { 3901 calculateBraceTypes(/*ExpectClassBody=*/true); 3902 if (!tryToParseBracedList()) 3903 break; 3904 } 3905 if (FormatTok->is(tok::l_square)) { 3906 FormatToken *Previous = FormatTok->Previous; 3907 if (!Previous || 3908 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3909 // Don't try parsing a lambda if we had a closing parenthesis before, 3910 // it was probably a pointer to an array: int (*)[]. 3911 if (!tryToParseLambda()) 3912 continue; 3913 } else { 3914 parseSquare(); 3915 continue; 3916 } 3917 } 3918 if (FormatTok->is(tok::semi)) 3919 return; 3920 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3921 addUnwrappedLine(); 3922 nextToken(); 3923 parseCSharpGenericTypeConstraint(); 3924 break; 3925 } 3926 nextToken(); 3927 } while (!eof()); 3928 } 3929 3930 auto GetBraceType = [](const FormatToken &RecordTok) { 3931 switch (RecordTok.Tok.getKind()) { 3932 case tok::kw_class: 3933 return TT_ClassLBrace; 3934 case tok::kw_struct: 3935 return TT_StructLBrace; 3936 case tok::kw_union: 3937 return TT_UnionLBrace; 3938 default: 3939 // Useful for e.g. interface. 3940 return TT_RecordLBrace; 3941 } 3942 }; 3943 if (FormatTok->is(tok::l_brace)) { 3944 FormatTok->setFinalizedType(GetBraceType(InitialToken)); 3945 if (ParseAsExpr) { 3946 parseChildBlock(); 3947 } else { 3948 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3949 addUnwrappedLine(); 3950 3951 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3952 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3953 } 3954 } 3955 // There is no addUnwrappedLine() here so that we fall through to parsing a 3956 // structural element afterwards. Thus, in "class A {} n, m;", 3957 // "} n, m;" will end up in one unwrapped line. 3958 } 3959 3960 void UnwrappedLineParser::parseObjCMethod() { 3961 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 3962 "'(' or identifier expected."); 3963 do { 3964 if (FormatTok->is(tok::semi)) { 3965 nextToken(); 3966 addUnwrappedLine(); 3967 return; 3968 } else if (FormatTok->is(tok::l_brace)) { 3969 if (Style.BraceWrapping.AfterFunction) 3970 addUnwrappedLine(); 3971 parseBlock(); 3972 addUnwrappedLine(); 3973 return; 3974 } else { 3975 nextToken(); 3976 } 3977 } while (!eof()); 3978 } 3979 3980 void UnwrappedLineParser::parseObjCProtocolList() { 3981 assert(FormatTok->is(tok::less) && "'<' expected."); 3982 do { 3983 nextToken(); 3984 // Early exit in case someone forgot a close angle. 3985 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3986 FormatTok->isObjCAtKeyword(tok::objc_end)) { 3987 return; 3988 } 3989 } while (!eof() && FormatTok->isNot(tok::greater)); 3990 nextToken(); // Skip '>'. 3991 } 3992 3993 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3994 do { 3995 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 3996 nextToken(); 3997 addUnwrappedLine(); 3998 break; 3999 } 4000 if (FormatTok->is(tok::l_brace)) { 4001 parseBlock(); 4002 // In ObjC interfaces, nothing should be following the "}". 4003 addUnwrappedLine(); 4004 } else if (FormatTok->is(tok::r_brace)) { 4005 // Ignore stray "}". parseStructuralElement doesn't consume them. 4006 nextToken(); 4007 addUnwrappedLine(); 4008 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4009 nextToken(); 4010 parseObjCMethod(); 4011 } else { 4012 parseStructuralElement(); 4013 } 4014 } while (!eof()); 4015 } 4016 4017 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4018 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4019 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4020 nextToken(); 4021 nextToken(); // interface name 4022 4023 // @interface can be followed by a lightweight generic 4024 // specialization list, then either a base class or a category. 4025 if (FormatTok->is(tok::less)) 4026 parseObjCLightweightGenerics(); 4027 if (FormatTok->is(tok::colon)) { 4028 nextToken(); 4029 nextToken(); // base class name 4030 // The base class can also have lightweight generics applied to it. 4031 if (FormatTok->is(tok::less)) 4032 parseObjCLightweightGenerics(); 4033 } else if (FormatTok->is(tok::l_paren)) { 4034 // Skip category, if present. 4035 parseParens(); 4036 } 4037 4038 if (FormatTok->is(tok::less)) 4039 parseObjCProtocolList(); 4040 4041 if (FormatTok->is(tok::l_brace)) { 4042 if (Style.BraceWrapping.AfterObjCDeclaration) 4043 addUnwrappedLine(); 4044 parseBlock(/*MustBeDeclaration=*/true); 4045 } 4046 4047 // With instance variables, this puts '}' on its own line. Without instance 4048 // variables, this ends the @interface line. 4049 addUnwrappedLine(); 4050 4051 parseObjCUntilAtEnd(); 4052 } 4053 4054 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4055 assert(FormatTok->is(tok::less)); 4056 // Unlike protocol lists, generic parameterizations support 4057 // nested angles: 4058 // 4059 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4060 // NSObject <NSCopying, NSSecureCoding> 4061 // 4062 // so we need to count how many open angles we have left. 4063 unsigned NumOpenAngles = 1; 4064 do { 4065 nextToken(); 4066 // Early exit in case someone forgot a close angle. 4067 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4068 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4069 break; 4070 } 4071 if (FormatTok->is(tok::less)) { 4072 ++NumOpenAngles; 4073 } else if (FormatTok->is(tok::greater)) { 4074 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4075 --NumOpenAngles; 4076 } 4077 } while (!eof() && NumOpenAngles != 0); 4078 nextToken(); // Skip '>'. 4079 } 4080 4081 // Returns true for the declaration/definition form of @protocol, 4082 // false for the expression form. 4083 bool UnwrappedLineParser::parseObjCProtocol() { 4084 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4085 nextToken(); 4086 4087 if (FormatTok->is(tok::l_paren)) { 4088 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4089 return false; 4090 } 4091 4092 // The definition/declaration form, 4093 // @protocol Foo 4094 // - (int)someMethod; 4095 // @end 4096 4097 nextToken(); // protocol name 4098 4099 if (FormatTok->is(tok::less)) 4100 parseObjCProtocolList(); 4101 4102 // Check for protocol declaration. 4103 if (FormatTok->is(tok::semi)) { 4104 nextToken(); 4105 addUnwrappedLine(); 4106 return true; 4107 } 4108 4109 addUnwrappedLine(); 4110 parseObjCUntilAtEnd(); 4111 return true; 4112 } 4113 4114 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4115 bool IsImport = FormatTok->is(Keywords.kw_import); 4116 assert(IsImport || FormatTok->is(tok::kw_export)); 4117 nextToken(); 4118 4119 // Consume the "default" in "export default class/function". 4120 if (FormatTok->is(tok::kw_default)) 4121 nextToken(); 4122 4123 // Consume "async function", "function" and "default function", so that these 4124 // get parsed as free-standing JS functions, i.e. do not require a trailing 4125 // semicolon. 4126 if (FormatTok->is(Keywords.kw_async)) 4127 nextToken(); 4128 if (FormatTok->is(Keywords.kw_function)) { 4129 nextToken(); 4130 return; 4131 } 4132 4133 // For imports, `export *`, `export {...}`, consume the rest of the line up 4134 // to the terminating `;`. For everything else, just return and continue 4135 // parsing the structural element, i.e. the declaration or expression for 4136 // `export default`. 4137 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4138 !FormatTok->isStringLiteral() && 4139 !(FormatTok->is(Keywords.kw_type) && 4140 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4141 return; 4142 } 4143 4144 while (!eof()) { 4145 if (FormatTok->is(tok::semi)) 4146 return; 4147 if (Line->Tokens.empty()) { 4148 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4149 // import statement should terminate. 4150 return; 4151 } 4152 if (FormatTok->is(tok::l_brace)) { 4153 FormatTok->setBlockKind(BK_Block); 4154 nextToken(); 4155 parseBracedList(); 4156 } else { 4157 nextToken(); 4158 } 4159 } 4160 } 4161 4162 void UnwrappedLineParser::parseStatementMacro() { 4163 nextToken(); 4164 if (FormatTok->is(tok::l_paren)) 4165 parseParens(); 4166 if (FormatTok->is(tok::semi)) 4167 nextToken(); 4168 addUnwrappedLine(); 4169 } 4170 4171 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4172 // consume things like a::`b.c[d:e] or a::* 4173 while (true) { 4174 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4175 tok::coloncolon, tok::hash) || 4176 Keywords.isVerilogIdentifier(*FormatTok)) { 4177 nextToken(); 4178 } else if (FormatTok->is(tok::l_square)) { 4179 parseSquare(); 4180 } else { 4181 break; 4182 } 4183 } 4184 } 4185 4186 void UnwrappedLineParser::parseVerilogSensitivityList() { 4187 if (!FormatTok->is(tok::at)) 4188 return; 4189 nextToken(); 4190 // A block event expression has 2 at signs. 4191 if (FormatTok->is(tok::at)) 4192 nextToken(); 4193 switch (FormatTok->Tok.getKind()) { 4194 case tok::star: 4195 nextToken(); 4196 break; 4197 case tok::l_paren: 4198 parseParens(); 4199 break; 4200 default: 4201 parseVerilogHierarchyIdentifier(); 4202 break; 4203 } 4204 } 4205 4206 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4207 unsigned AddLevels = 0; 4208 4209 if (FormatTok->is(Keywords.kw_clocking)) { 4210 nextToken(); 4211 if (Keywords.isVerilogIdentifier(*FormatTok)) 4212 nextToken(); 4213 parseVerilogSensitivityList(); 4214 if (FormatTok->is(tok::semi)) 4215 nextToken(); 4216 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4217 Keywords.kw_casez, Keywords.kw_randcase, 4218 Keywords.kw_randsequence)) { 4219 if (Style.IndentCaseLabels) 4220 AddLevels++; 4221 nextToken(); 4222 if (FormatTok->is(tok::l_paren)) { 4223 FormatTok->setFinalizedType(TT_ConditionLParen); 4224 parseParens(); 4225 } 4226 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4227 nextToken(); 4228 // The case header has no semicolon. 4229 } else { 4230 // "module" etc. 4231 nextToken(); 4232 // all the words like the name of the module and specifiers like 4233 // "automatic" and the width of function return type 4234 while (true) { 4235 if (FormatTok->is(tok::l_square)) { 4236 auto Prev = FormatTok->getPreviousNonComment(); 4237 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4238 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4239 parseSquare(); 4240 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4241 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4242 nextToken(); 4243 } else { 4244 break; 4245 } 4246 } 4247 4248 auto NewLine = [this]() { 4249 addUnwrappedLine(); 4250 Line->IsContinuation = true; 4251 }; 4252 4253 // package imports 4254 while (FormatTok->is(Keywords.kw_import)) { 4255 NewLine(); 4256 nextToken(); 4257 parseVerilogHierarchyIdentifier(); 4258 if (FormatTok->is(tok::semi)) 4259 nextToken(); 4260 } 4261 4262 // parameters and ports 4263 if (FormatTok->is(Keywords.kw_verilogHash)) { 4264 NewLine(); 4265 nextToken(); 4266 if (FormatTok->is(tok::l_paren)) { 4267 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4268 parseParens(); 4269 } 4270 } 4271 if (FormatTok->is(tok::l_paren)) { 4272 NewLine(); 4273 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4274 parseParens(); 4275 } 4276 4277 // extends and implements 4278 if (FormatTok->is(Keywords.kw_extends)) { 4279 NewLine(); 4280 nextToken(); 4281 parseVerilogHierarchyIdentifier(); 4282 if (FormatTok->is(tok::l_paren)) 4283 parseParens(); 4284 } 4285 if (FormatTok->is(Keywords.kw_implements)) { 4286 NewLine(); 4287 do { 4288 nextToken(); 4289 parseVerilogHierarchyIdentifier(); 4290 } while (FormatTok->is(tok::comma)); 4291 } 4292 4293 // Coverage event for cover groups. 4294 if (FormatTok->is(tok::at)) { 4295 NewLine(); 4296 parseVerilogSensitivityList(); 4297 } 4298 4299 if (FormatTok->is(tok::semi)) 4300 nextToken(/*LevelDifference=*/1); 4301 addUnwrappedLine(); 4302 } 4303 4304 return AddLevels; 4305 } 4306 4307 void UnwrappedLineParser::parseVerilogTable() { 4308 assert(FormatTok->is(Keywords.kw_table)); 4309 nextToken(/*LevelDifference=*/1); 4310 addUnwrappedLine(); 4311 4312 auto InitialLevel = Line->Level++; 4313 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4314 FormatToken *Tok = FormatTok; 4315 nextToken(); 4316 if (Tok->is(tok::semi)) 4317 addUnwrappedLine(); 4318 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4319 Tok->setFinalizedType(TT_VerilogTableItem); 4320 } 4321 Line->Level = InitialLevel; 4322 nextToken(/*LevelDifference=*/-1); 4323 addUnwrappedLine(); 4324 } 4325 4326 void UnwrappedLineParser::parseVerilogCaseLabel() { 4327 // The label will get unindented in AnnotatingParser. If there are no leading 4328 // spaces, indent the rest here so that things inside the block will be 4329 // indented relative to things outside. We don't use parseLabel because we 4330 // don't know whether this colon is a label or a ternary expression at this 4331 // point. 4332 auto OrigLevel = Line->Level; 4333 auto FirstLine = CurrentLines->size(); 4334 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4335 ++Line->Level; 4336 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4337 --Line->Level; 4338 parseStructuralElement(); 4339 // Restore the indentation in both the new line and the line that has the 4340 // label. 4341 if (CurrentLines->size() > FirstLine) 4342 (*CurrentLines)[FirstLine].Level = OrigLevel; 4343 Line->Level = OrigLevel; 4344 } 4345 4346 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4347 for (const auto &N : Line.Tokens) { 4348 if (N.Tok->MacroCtx) 4349 return true; 4350 for (const UnwrappedLine &Child : N.Children) 4351 if (containsExpansion(Child)) 4352 return true; 4353 } 4354 return false; 4355 } 4356 4357 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4358 if (Line->Tokens.empty()) 4359 return; 4360 LLVM_DEBUG({ 4361 if (!parsingPPDirective()) { 4362 llvm::dbgs() << "Adding unwrapped line:\n"; 4363 printDebugInfo(*Line); 4364 } 4365 }); 4366 4367 // If this line closes a block when in Whitesmiths mode, remember that 4368 // information so that the level can be decreased after the line is added. 4369 // This has to happen after the addition of the line since the line itself 4370 // needs to be indented. 4371 bool ClosesWhitesmithsBlock = 4372 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4373 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4374 4375 // If the current line was expanded from a macro call, we use it to 4376 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4377 // line and the unexpanded token stream. 4378 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4379 if (!Reconstruct) 4380 Reconstruct.emplace(Line->Level, Unexpanded); 4381 Reconstruct->addLine(*Line); 4382 4383 // While the reconstructed unexpanded lines are stored in the normal 4384 // flow of lines, the expanded lines are stored on the side to be analyzed 4385 // in an extra step. 4386 CurrentExpandedLines.push_back(std::move(*Line)); 4387 4388 if (Reconstruct->finished()) { 4389 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4390 assert(!Reconstructed.Tokens.empty() && 4391 "Reconstructed must at least contain the macro identifier."); 4392 assert(!parsingPPDirective()); 4393 LLVM_DEBUG({ 4394 llvm::dbgs() << "Adding unexpanded line:\n"; 4395 printDebugInfo(Reconstructed); 4396 }); 4397 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4398 Lines.push_back(std::move(Reconstructed)); 4399 CurrentExpandedLines.clear(); 4400 Reconstruct.reset(); 4401 } 4402 } else { 4403 // At the top level we only get here when no unexpansion is going on, or 4404 // when conditional formatting led to unfinished macro reconstructions. 4405 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4406 CurrentLines->push_back(std::move(*Line)); 4407 } 4408 Line->Tokens.clear(); 4409 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4410 Line->FirstStartColumn = 0; 4411 Line->IsContinuation = false; 4412 Line->SeenDecltypeAuto = false; 4413 4414 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4415 --Line->Level; 4416 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4417 CurrentLines->append( 4418 std::make_move_iterator(PreprocessorDirectives.begin()), 4419 std::make_move_iterator(PreprocessorDirectives.end())); 4420 PreprocessorDirectives.clear(); 4421 } 4422 // Disconnect the current token from the last token on the previous line. 4423 FormatTok->Previous = nullptr; 4424 } 4425 4426 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4427 4428 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4429 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4430 FormatTok.NewlinesBefore > 0; 4431 } 4432 4433 // Checks if \p FormatTok is a line comment that continues the line comment 4434 // section on \p Line. 4435 static bool 4436 continuesLineCommentSection(const FormatToken &FormatTok, 4437 const UnwrappedLine &Line, 4438 const llvm::Regex &CommentPragmasRegex) { 4439 if (Line.Tokens.empty()) 4440 return false; 4441 4442 StringRef IndentContent = FormatTok.TokenText; 4443 if (FormatTok.TokenText.startswith("//") || 4444 FormatTok.TokenText.startswith("/*")) { 4445 IndentContent = FormatTok.TokenText.substr(2); 4446 } 4447 if (CommentPragmasRegex.match(IndentContent)) 4448 return false; 4449 4450 // If Line starts with a line comment, then FormatTok continues the comment 4451 // section if its original column is greater or equal to the original start 4452 // column of the line. 4453 // 4454 // Define the min column token of a line as follows: if a line ends in '{' or 4455 // contains a '{' followed by a line comment, then the min column token is 4456 // that '{'. Otherwise, the min column token of the line is the first token of 4457 // the line. 4458 // 4459 // If Line starts with a token other than a line comment, then FormatTok 4460 // continues the comment section if its original column is greater than the 4461 // original start column of the min column token of the line. 4462 // 4463 // For example, the second line comment continues the first in these cases: 4464 // 4465 // // first line 4466 // // second line 4467 // 4468 // and: 4469 // 4470 // // first line 4471 // // second line 4472 // 4473 // and: 4474 // 4475 // int i; // first line 4476 // // second line 4477 // 4478 // and: 4479 // 4480 // do { // first line 4481 // // second line 4482 // int i; 4483 // } while (true); 4484 // 4485 // and: 4486 // 4487 // enum { 4488 // a, // first line 4489 // // second line 4490 // b 4491 // }; 4492 // 4493 // The second line comment doesn't continue the first in these cases: 4494 // 4495 // // first line 4496 // // second line 4497 // 4498 // and: 4499 // 4500 // int i; // first line 4501 // // second line 4502 // 4503 // and: 4504 // 4505 // do { // first line 4506 // // second line 4507 // int i; 4508 // } while (true); 4509 // 4510 // and: 4511 // 4512 // enum { 4513 // a, // first line 4514 // // second line 4515 // }; 4516 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4517 4518 // Scan for '{//'. If found, use the column of '{' as a min column for line 4519 // comment section continuation. 4520 const FormatToken *PreviousToken = nullptr; 4521 for (const UnwrappedLineNode &Node : Line.Tokens) { 4522 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4523 isLineComment(*Node.Tok)) { 4524 MinColumnToken = PreviousToken; 4525 break; 4526 } 4527 PreviousToken = Node.Tok; 4528 4529 // Grab the last newline preceding a token in this unwrapped line. 4530 if (Node.Tok->NewlinesBefore > 0) 4531 MinColumnToken = Node.Tok; 4532 } 4533 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4534 MinColumnToken = PreviousToken; 4535 4536 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4537 MinColumnToken); 4538 } 4539 4540 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4541 bool JustComments = Line->Tokens.empty(); 4542 for (FormatToken *Tok : CommentsBeforeNextToken) { 4543 // Line comments that belong to the same line comment section are put on the 4544 // same line since later we might want to reflow content between them. 4545 // Additional fine-grained breaking of line comment sections is controlled 4546 // by the class BreakableLineCommentSection in case it is desirable to keep 4547 // several line comment sections in the same unwrapped line. 4548 // 4549 // FIXME: Consider putting separate line comment sections as children to the 4550 // unwrapped line instead. 4551 Tok->ContinuesLineCommentSection = 4552 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4553 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4554 addUnwrappedLine(); 4555 pushToken(Tok); 4556 } 4557 if (NewlineBeforeNext && JustComments) 4558 addUnwrappedLine(); 4559 CommentsBeforeNextToken.clear(); 4560 } 4561 4562 void UnwrappedLineParser::nextToken(int LevelDifference) { 4563 if (eof()) 4564 return; 4565 flushComments(isOnNewLine(*FormatTok)); 4566 pushToken(FormatTok); 4567 FormatToken *Previous = FormatTok; 4568 if (!Style.isJavaScript()) 4569 readToken(LevelDifference); 4570 else 4571 readTokenWithJavaScriptASI(); 4572 FormatTok->Previous = Previous; 4573 if (Style.isVerilog()) { 4574 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4575 // keywords like `begin`, we can't treat them the same as left braces 4576 // because some contexts require one of them. For example structs use 4577 // braces and if blocks use keywords, and a left brace can occur in an if 4578 // statement, but it is not a block. For keywords like `end`, we simply 4579 // treat them the same as right braces. 4580 if (Keywords.isVerilogEnd(*FormatTok)) 4581 FormatTok->Tok.setKind(tok::r_brace); 4582 } 4583 } 4584 4585 void UnwrappedLineParser::distributeComments( 4586 const SmallVectorImpl<FormatToken *> &Comments, 4587 const FormatToken *NextTok) { 4588 // Whether or not a line comment token continues a line is controlled by 4589 // the method continuesLineCommentSection, with the following caveat: 4590 // 4591 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4592 // that each comment line from the trail is aligned with the next token, if 4593 // the next token exists. If a trail exists, the beginning of the maximal 4594 // trail is marked as a start of a new comment section. 4595 // 4596 // For example in this code: 4597 // 4598 // int a; // line about a 4599 // // line 1 about b 4600 // // line 2 about b 4601 // int b; 4602 // 4603 // the two lines about b form a maximal trail, so there are two sections, the 4604 // first one consisting of the single comment "// line about a" and the 4605 // second one consisting of the next two comments. 4606 if (Comments.empty()) 4607 return; 4608 bool ShouldPushCommentsInCurrentLine = true; 4609 bool HasTrailAlignedWithNextToken = false; 4610 unsigned StartOfTrailAlignedWithNextToken = 0; 4611 if (NextTok) { 4612 // We are skipping the first element intentionally. 4613 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4614 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4615 HasTrailAlignedWithNextToken = true; 4616 StartOfTrailAlignedWithNextToken = i; 4617 } 4618 } 4619 } 4620 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4621 FormatToken *FormatTok = Comments[i]; 4622 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4623 FormatTok->ContinuesLineCommentSection = false; 4624 } else { 4625 FormatTok->ContinuesLineCommentSection = 4626 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4627 } 4628 if (!FormatTok->ContinuesLineCommentSection && 4629 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4630 ShouldPushCommentsInCurrentLine = false; 4631 } 4632 if (ShouldPushCommentsInCurrentLine) 4633 pushToken(FormatTok); 4634 else 4635 CommentsBeforeNextToken.push_back(FormatTok); 4636 } 4637 } 4638 4639 void UnwrappedLineParser::readToken(int LevelDifference) { 4640 SmallVector<FormatToken *, 1> Comments; 4641 bool PreviousWasComment = false; 4642 bool FirstNonCommentOnLine = false; 4643 do { 4644 FormatTok = Tokens->getNextToken(); 4645 assert(FormatTok); 4646 while (FormatTok->getType() == TT_ConflictStart || 4647 FormatTok->getType() == TT_ConflictEnd || 4648 FormatTok->getType() == TT_ConflictAlternative) { 4649 if (FormatTok->getType() == TT_ConflictStart) 4650 conditionalCompilationStart(/*Unreachable=*/false); 4651 else if (FormatTok->getType() == TT_ConflictAlternative) 4652 conditionalCompilationAlternative(); 4653 else if (FormatTok->getType() == TT_ConflictEnd) 4654 conditionalCompilationEnd(); 4655 FormatTok = Tokens->getNextToken(); 4656 FormatTok->MustBreakBefore = true; 4657 } 4658 4659 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4660 const FormatToken &Tok, 4661 bool PreviousWasComment) { 4662 auto IsFirstOnLine = [](const FormatToken &Tok) { 4663 return Tok.HasUnescapedNewline || Tok.IsFirst; 4664 }; 4665 4666 // Consider preprocessor directives preceded by block comments as first 4667 // on line. 4668 if (PreviousWasComment) 4669 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4670 return IsFirstOnLine(Tok); 4671 }; 4672 4673 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4674 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4675 PreviousWasComment = FormatTok->is(tok::comment); 4676 4677 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4678 (!Style.isVerilog() || 4679 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4680 FirstNonCommentOnLine) { 4681 distributeComments(Comments, FormatTok); 4682 Comments.clear(); 4683 // If there is an unfinished unwrapped line, we flush the preprocessor 4684 // directives only after that unwrapped line was finished later. 4685 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4686 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4687 assert((LevelDifference >= 0 || 4688 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4689 "LevelDifference makes Line->Level negative"); 4690 Line->Level += LevelDifference; 4691 // Comments stored before the preprocessor directive need to be output 4692 // before the preprocessor directive, at the same level as the 4693 // preprocessor directive, as we consider them to apply to the directive. 4694 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4695 PPBranchLevel > 0) { 4696 Line->Level += PPBranchLevel; 4697 } 4698 flushComments(isOnNewLine(*FormatTok)); 4699 parsePPDirective(); 4700 PreviousWasComment = FormatTok->is(tok::comment); 4701 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4702 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4703 } 4704 4705 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4706 !Line->InPPDirective) { 4707 continue; 4708 } 4709 4710 if (FormatTok->is(tok::identifier) && 4711 Macros.defined(FormatTok->TokenText) && 4712 // FIXME: Allow expanding macros in preprocessor directives. 4713 !Line->InPPDirective) { 4714 FormatToken *ID = FormatTok; 4715 unsigned Position = Tokens->getPosition(); 4716 4717 // To correctly parse the code, we need to replace the tokens of the macro 4718 // call with its expansion. 4719 auto PreCall = std::move(Line); 4720 Line.reset(new UnwrappedLine); 4721 bool OldInExpansion = InExpansion; 4722 InExpansion = true; 4723 // We parse the macro call into a new line. 4724 auto Args = parseMacroCall(); 4725 InExpansion = OldInExpansion; 4726 assert(Line->Tokens.front().Tok == ID); 4727 // And remember the unexpanded macro call tokens. 4728 auto UnexpandedLine = std::move(Line); 4729 // Reset to the old line. 4730 Line = std::move(PreCall); 4731 4732 LLVM_DEBUG({ 4733 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4734 if (Args) { 4735 llvm::dbgs() << "("; 4736 for (const auto &Arg : Args.value()) 4737 for (const auto &T : Arg) 4738 llvm::dbgs() << T->TokenText << " "; 4739 llvm::dbgs() << ")"; 4740 } 4741 llvm::dbgs() << "\n"; 4742 }); 4743 if (Macros.objectLike(ID->TokenText) && Args && 4744 !Macros.hasArity(ID->TokenText, Args->size())) { 4745 // The macro is either 4746 // - object-like, but we got argumnets, or 4747 // - overloaded to be both object-like and function-like, but none of 4748 // the function-like arities match the number of arguments. 4749 // Thus, expand as object-like macro. 4750 LLVM_DEBUG(llvm::dbgs() 4751 << "Macro \"" << ID->TokenText 4752 << "\" not overloaded for arity " << Args->size() 4753 << "or not function-like, using object-like overload."); 4754 Args.reset(); 4755 UnexpandedLine->Tokens.resize(1); 4756 Tokens->setPosition(Position); 4757 nextToken(); 4758 assert(!Args && Macros.objectLike(ID->TokenText)); 4759 } 4760 if ((!Args && Macros.objectLike(ID->TokenText)) || 4761 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4762 // Next, we insert the expanded tokens in the token stream at the 4763 // current position, and continue parsing. 4764 Unexpanded[ID] = std::move(UnexpandedLine); 4765 SmallVector<FormatToken *, 8> Expansion = 4766 Macros.expand(ID, std::move(Args)); 4767 if (!Expansion.empty()) 4768 FormatTok = Tokens->insertTokens(Expansion); 4769 4770 LLVM_DEBUG({ 4771 llvm::dbgs() << "Expanded: "; 4772 for (const auto &T : Expansion) 4773 llvm::dbgs() << T->TokenText << " "; 4774 llvm::dbgs() << "\n"; 4775 }); 4776 } else { 4777 LLVM_DEBUG({ 4778 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4779 << "\", because it was used "; 4780 if (Args) 4781 llvm::dbgs() << "with " << Args->size(); 4782 else 4783 llvm::dbgs() << "without"; 4784 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4785 }); 4786 Tokens->setPosition(Position); 4787 FormatTok = ID; 4788 } 4789 } 4790 4791 if (!FormatTok->is(tok::comment)) { 4792 distributeComments(Comments, FormatTok); 4793 Comments.clear(); 4794 return; 4795 } 4796 4797 Comments.push_back(FormatTok); 4798 } while (!eof()); 4799 4800 distributeComments(Comments, nullptr); 4801 Comments.clear(); 4802 } 4803 4804 namespace { 4805 template <typename Iterator> 4806 void pushTokens(Iterator Begin, Iterator End, 4807 llvm::SmallVectorImpl<FormatToken *> &Into) { 4808 for (auto I = Begin; I != End; ++I) { 4809 Into.push_back(I->Tok); 4810 for (const auto &Child : I->Children) 4811 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4812 } 4813 } 4814 } // namespace 4815 4816 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4817 UnwrappedLineParser::parseMacroCall() { 4818 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4819 assert(Line->Tokens.empty()); 4820 nextToken(); 4821 if (!FormatTok->is(tok::l_paren)) 4822 return Args; 4823 unsigned Position = Tokens->getPosition(); 4824 FormatToken *Tok = FormatTok; 4825 nextToken(); 4826 Args.emplace(); 4827 auto ArgStart = std::prev(Line->Tokens.end()); 4828 4829 int Parens = 0; 4830 do { 4831 switch (FormatTok->Tok.getKind()) { 4832 case tok::l_paren: 4833 ++Parens; 4834 nextToken(); 4835 break; 4836 case tok::r_paren: { 4837 if (Parens > 0) { 4838 --Parens; 4839 nextToken(); 4840 break; 4841 } 4842 Args->push_back({}); 4843 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4844 nextToken(); 4845 return Args; 4846 } 4847 case tok::comma: { 4848 if (Parens > 0) { 4849 nextToken(); 4850 break; 4851 } 4852 Args->push_back({}); 4853 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4854 nextToken(); 4855 ArgStart = std::prev(Line->Tokens.end()); 4856 break; 4857 } 4858 default: 4859 nextToken(); 4860 break; 4861 } 4862 } while (!eof()); 4863 Line->Tokens.resize(1); 4864 Tokens->setPosition(Position); 4865 FormatTok = Tok; 4866 return {}; 4867 } 4868 4869 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4870 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4871 if (MustBreakBeforeNextToken) { 4872 Line->Tokens.back().Tok->MustBreakBefore = true; 4873 MustBreakBeforeNextToken = false; 4874 } 4875 } 4876 4877 } // end namespace format 4878 } // end namespace clang 4879