1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (SmallVectorImpl<UnwrappedLine>::const_iterator 55 CI = I->Children.begin(), 56 CE = I->Children.end(); 57 CI != CE; ++CI) { 58 OS << "\n"; 59 printLine(OS, *CI, (Prefix + " ").str()); 60 NewLine = true; 61 } 62 } 63 if (!NewLine) 64 OS << "\n"; 65 } 66 67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 68 printLine(llvm::dbgs(), Line); 69 } 70 71 class ScopedDeclarationState { 72 public: 73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 74 bool MustBeDeclaration) 75 : Line(Line), Stack(Stack) { 76 Line.MustBeDeclaration = MustBeDeclaration; 77 Stack.push_back(MustBeDeclaration); 78 } 79 ~ScopedDeclarationState() { 80 Stack.pop_back(); 81 if (!Stack.empty()) 82 Line.MustBeDeclaration = Stack.back(); 83 else 84 Line.MustBeDeclaration = true; 85 } 86 87 private: 88 UnwrappedLine &Line; 89 llvm::BitVector &Stack; 90 }; 91 92 } // end anonymous namespace 93 94 class ScopedLineState { 95 public: 96 ScopedLineState(UnwrappedLineParser &Parser, 97 bool SwitchToPreprocessorLines = false) 98 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 99 if (SwitchToPreprocessorLines) 100 Parser.CurrentLines = &Parser.PreprocessorDirectives; 101 else if (!Parser.Line->Tokens.empty()) 102 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 103 PreBlockLine = std::move(Parser.Line); 104 Parser.Line = std::make_unique<UnwrappedLine>(); 105 Parser.Line->Level = PreBlockLine->Level; 106 Parser.Line->PPLevel = PreBlockLine->PPLevel; 107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 108 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 109 } 110 111 ~ScopedLineState() { 112 if (!Parser.Line->Tokens.empty()) 113 Parser.addUnwrappedLine(); 114 assert(Parser.Line->Tokens.empty()); 115 Parser.Line = std::move(PreBlockLine); 116 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 117 Parser.MustBreakBeforeNextToken = true; 118 Parser.CurrentLines = OriginalLines; 119 } 120 121 private: 122 UnwrappedLineParser &Parser; 123 124 std::unique_ptr<UnwrappedLine> PreBlockLine; 125 SmallVectorImpl<UnwrappedLine> *OriginalLines; 126 }; 127 128 class CompoundStatementIndenter { 129 public: 130 CompoundStatementIndenter(UnwrappedLineParser *Parser, 131 const FormatStyle &Style, unsigned &LineLevel) 132 : CompoundStatementIndenter(Parser, LineLevel, 133 Style.BraceWrapping.AfterControlStatement, 134 Style.BraceWrapping.IndentBraces) {} 135 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 136 bool WrapBrace, bool IndentBrace) 137 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 138 if (WrapBrace) 139 Parser->addUnwrappedLine(); 140 if (IndentBrace) 141 ++LineLevel; 142 } 143 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 144 145 private: 146 unsigned &LineLevel; 147 unsigned OldLineLevel; 148 }; 149 150 UnwrappedLineParser::UnwrappedLineParser( 151 SourceManager &SourceMgr, const FormatStyle &Style, 152 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 153 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 154 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 155 IdentifierTable &IdentTable) 156 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 157 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 158 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 159 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 160 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 161 ? IG_Rejected 162 : IG_Inited), 163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 164 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} 165 166 void UnwrappedLineParser::reset() { 167 PPBranchLevel = -1; 168 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 169 ? IG_Rejected 170 : IG_Inited; 171 IncludeGuardToken = nullptr; 172 Line.reset(new UnwrappedLine); 173 CommentsBeforeNextToken.clear(); 174 FormatTok = nullptr; 175 MustBreakBeforeNextToken = false; 176 PreprocessorDirectives.clear(); 177 CurrentLines = &Lines; 178 DeclarationScopeStack.clear(); 179 NestedTooDeep.clear(); 180 PPStack.clear(); 181 Line->FirstStartColumn = FirstStartColumn; 182 183 if (!Unexpanded.empty()) 184 for (FormatToken *Token : AllTokens) 185 Token->MacroCtx.reset(); 186 CurrentExpandedLines.clear(); 187 ExpandedLines.clear(); 188 Unexpanded.clear(); 189 InExpansion = false; 190 Reconstruct.reset(); 191 } 192 193 void UnwrappedLineParser::parse() { 194 IndexedTokenSource TokenSource(AllTokens); 195 Line->FirstStartColumn = FirstStartColumn; 196 do { 197 LLVM_DEBUG(llvm::dbgs() << "----\n"); 198 reset(); 199 Tokens = &TokenSource; 200 TokenSource.reset(); 201 202 readToken(); 203 parseFile(); 204 205 // If we found an include guard then all preprocessor directives (other than 206 // the guard) are over-indented by one. 207 if (IncludeGuard == IG_Found) { 208 for (auto &Line : Lines) 209 if (Line.InPPDirective && Line.Level > 0) 210 --Line.Level; 211 } 212 213 // Create line with eof token. 214 assert(FormatTok->is(tok::eof)); 215 pushToken(FormatTok); 216 addUnwrappedLine(); 217 218 // In a first run, format everything with the lines containing macro calls 219 // replaced by the expansion. 220 if (!ExpandedLines.empty()) { 221 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 222 for (const auto &Line : Lines) { 223 if (!Line.Tokens.empty()) { 224 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 225 if (it != ExpandedLines.end()) { 226 for (const auto &Expanded : it->second) { 227 LLVM_DEBUG(printDebugInfo(Expanded)); 228 Callback.consumeUnwrappedLine(Expanded); 229 } 230 continue; 231 } 232 } 233 LLVM_DEBUG(printDebugInfo(Line)); 234 Callback.consumeUnwrappedLine(Line); 235 } 236 Callback.finishRun(); 237 } 238 239 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 240 for (const UnwrappedLine &Line : Lines) { 241 LLVM_DEBUG(printDebugInfo(Line)); 242 Callback.consumeUnwrappedLine(Line); 243 } 244 Callback.finishRun(); 245 Lines.clear(); 246 while (!PPLevelBranchIndex.empty() && 247 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 248 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 249 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 250 } 251 if (!PPLevelBranchIndex.empty()) { 252 ++PPLevelBranchIndex.back(); 253 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 254 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 255 } 256 } while (!PPLevelBranchIndex.empty()); 257 } 258 259 void UnwrappedLineParser::parseFile() { 260 // The top-level context in a file always has declarations, except for pre- 261 // processor directives and JavaScript files. 262 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 263 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 264 MustBeDeclaration); 265 if (Style.Language == FormatStyle::LK_TextProto) 266 parseBracedList(); 267 else 268 parseLevel(); 269 // Make sure to format the remaining tokens. 270 // 271 // LK_TextProto is special since its top-level is parsed as the body of a 272 // braced list, which does not necessarily have natural line separators such 273 // as a semicolon. Comments after the last entry that have been determined to 274 // not belong to that line, as in: 275 // key: value 276 // // endfile comment 277 // do not have a chance to be put on a line of their own until this point. 278 // Here we add this newline before end-of-file comments. 279 if (Style.Language == FormatStyle::LK_TextProto && 280 !CommentsBeforeNextToken.empty()) { 281 addUnwrappedLine(); 282 } 283 flushComments(true); 284 addUnwrappedLine(); 285 } 286 287 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 288 do { 289 switch (FormatTok->Tok.getKind()) { 290 case tok::l_brace: 291 return; 292 default: 293 if (FormatTok->is(Keywords.kw_where)) { 294 addUnwrappedLine(); 295 nextToken(); 296 parseCSharpGenericTypeConstraint(); 297 break; 298 } 299 nextToken(); 300 break; 301 } 302 } while (!eof()); 303 } 304 305 void UnwrappedLineParser::parseCSharpAttribute() { 306 int UnpairedSquareBrackets = 1; 307 do { 308 switch (FormatTok->Tok.getKind()) { 309 case tok::r_square: 310 nextToken(); 311 --UnpairedSquareBrackets; 312 if (UnpairedSquareBrackets == 0) { 313 addUnwrappedLine(); 314 return; 315 } 316 break; 317 case tok::l_square: 318 ++UnpairedSquareBrackets; 319 nextToken(); 320 break; 321 default: 322 nextToken(); 323 break; 324 } 325 } while (!eof()); 326 } 327 328 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 329 if (!Lines.empty() && Lines.back().InPPDirective) 330 return true; 331 332 const FormatToken *Previous = Tokens->getPreviousToken(); 333 return Previous && Previous->is(tok::comment) && 334 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 335 } 336 337 /// \brief Parses a level, that is ???. 338 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level 339 /// \param CanContainBracedList If the content can contain (at any level) a 340 /// braced list. 341 /// \param NextLBracesType The type for left brace found in this level. 342 /// \param IfKind The \p if statement kind in the level. 343 /// \param IfLeftBrace The left brace of the \p if block in the level. 344 /// \returns true if a simple block of if/else/for/while, or false otherwise. 345 /// (A simple block has a single statement.) 346 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 347 bool CanContainBracedList, 348 TokenType NextLBracesType, 349 IfStmtKind *IfKind, 350 FormatToken **IfLeftBrace) { 351 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace 352 ? TT_BracedListLBrace 353 : TT_Unknown; 354 const bool IsPrecededByCommentOrPPDirective = 355 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 356 FormatToken *IfLBrace = nullptr; 357 bool HasDoWhile = false; 358 bool HasLabel = false; 359 unsigned StatementCount = 0; 360 bool SwitchLabelEncountered = false; 361 362 do { 363 if (FormatTok->getType() == TT_AttributeMacro) { 364 nextToken(); 365 continue; 366 } 367 tok::TokenKind kind = FormatTok->Tok.getKind(); 368 if (FormatTok->getType() == TT_MacroBlockBegin) 369 kind = tok::l_brace; 370 else if (FormatTok->getType() == TT_MacroBlockEnd) 371 kind = tok::r_brace; 372 373 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind, 374 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] { 375 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind, 376 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile, 377 HasLabel ? nullptr : &HasLabel); 378 ++StatementCount; 379 assert(StatementCount > 0 && "StatementCount overflow!"); 380 }; 381 382 switch (kind) { 383 case tok::comment: 384 nextToken(); 385 addUnwrappedLine(); 386 break; 387 case tok::l_brace: 388 if (NextLBracesType != TT_Unknown) { 389 FormatTok->setFinalizedType(NextLBracesType); 390 } else if (FormatTok->Previous && 391 FormatTok->Previous->ClosesRequiresClause) { 392 // We need the 'default' case here to correctly parse a function 393 // l_brace. 394 ParseDefault(); 395 continue; 396 } 397 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && 398 tryToParseBracedList()) { 399 continue; 400 } 401 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 402 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr, 403 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, 404 NextLBracesType); 405 ++StatementCount; 406 assert(StatementCount > 0 && "StatementCount overflow!"); 407 addUnwrappedLine(); 408 break; 409 case tok::r_brace: 410 if (OpeningBrace) { 411 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 412 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 413 return false; 414 } 415 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 416 HasDoWhile || IsPrecededByCommentOrPPDirective || 417 precededByCommentOrPPDirective()) { 418 return false; 419 } 420 const FormatToken *Next = Tokens->peekNextToken(); 421 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 422 return false; 423 if (IfLeftBrace) 424 *IfLeftBrace = IfLBrace; 425 return true; 426 } 427 nextToken(); 428 addUnwrappedLine(); 429 break; 430 case tok::kw_default: { 431 unsigned StoredPosition = Tokens->getPosition(); 432 FormatToken *Next; 433 do { 434 Next = Tokens->getNextToken(); 435 assert(Next); 436 } while (Next->is(tok::comment)); 437 FormatTok = Tokens->setPosition(StoredPosition); 438 if (Next->isNot(tok::colon)) { 439 // default not followed by ':' is not a case label; treat it like 440 // an identifier. 441 parseStructuralElement(); 442 break; 443 } 444 // Else, if it is 'default:', fall through to the case handling. 445 [[fallthrough]]; 446 } 447 case tok::kw_case: 448 if (Style.isProto() || Style.isVerilog() || 449 (Style.isJavaScript() && Line->MustBeDeclaration)) { 450 // Proto: there are no switch/case statements 451 // Verilog: Case labels don't have this word. We handle case 452 // labels including default in TokenAnnotator. 453 // JavaScript: A 'case: string' style field declaration. 454 ParseDefault(); 455 break; 456 } 457 if (!SwitchLabelEncountered && 458 (Style.IndentCaseLabels || 459 (Line->InPPDirective && Line->Level == 1))) { 460 ++Line->Level; 461 } 462 SwitchLabelEncountered = true; 463 parseStructuralElement(); 464 break; 465 case tok::l_square: 466 if (Style.isCSharp()) { 467 nextToken(); 468 parseCSharpAttribute(); 469 break; 470 } 471 if (handleCppAttributes()) 472 break; 473 [[fallthrough]]; 474 default: 475 ParseDefault(); 476 break; 477 } 478 } while (!eof()); 479 480 return false; 481 } 482 483 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 484 // We'll parse forward through the tokens until we hit 485 // a closing brace or eof - note that getNextToken() will 486 // parse macros, so this will magically work inside macro 487 // definitions, too. 488 unsigned StoredPosition = Tokens->getPosition(); 489 FormatToken *Tok = FormatTok; 490 const FormatToken *PrevTok = Tok->Previous; 491 // Keep a stack of positions of lbrace tokens. We will 492 // update information about whether an lbrace starts a 493 // braced init list or a different block during the loop. 494 struct StackEntry { 495 FormatToken *Tok; 496 const FormatToken *PrevTok; 497 }; 498 SmallVector<StackEntry, 8> LBraceStack; 499 assert(Tok->is(tok::l_brace)); 500 do { 501 // Get next non-comment token. 502 FormatToken *NextTok; 503 do { 504 NextTok = Tokens->getNextToken(); 505 } while (NextTok->is(tok::comment)); 506 507 switch (Tok->Tok.getKind()) { 508 case tok::l_brace: 509 if (Style.isJavaScript() && PrevTok) { 510 if (PrevTok->isOneOf(tok::colon, tok::less)) { 511 // A ':' indicates this code is in a type, or a braced list 512 // following a label in an object literal ({a: {b: 1}}). 513 // A '<' could be an object used in a comparison, but that is nonsense 514 // code (can never return true), so more likely it is a generic type 515 // argument (`X<{a: string; b: number}>`). 516 // The code below could be confused by semicolons between the 517 // individual members in a type member list, which would normally 518 // trigger BK_Block. In both cases, this must be parsed as an inline 519 // braced init. 520 Tok->setBlockKind(BK_BracedInit); 521 } else if (PrevTok->is(tok::r_paren)) { 522 // `) { }` can only occur in function or method declarations in JS. 523 Tok->setBlockKind(BK_Block); 524 } 525 } else { 526 Tok->setBlockKind(BK_Unknown); 527 } 528 LBraceStack.push_back({Tok, PrevTok}); 529 break; 530 case tok::r_brace: 531 if (LBraceStack.empty()) 532 break; 533 if (LBraceStack.back().Tok->is(BK_Unknown)) { 534 bool ProbablyBracedList = false; 535 if (Style.Language == FormatStyle::LK_Proto) { 536 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 537 } else { 538 // Skip NextTok over preprocessor lines, otherwise we may not 539 // properly diagnose the block as a braced intializer 540 // if the comma separator appears after the pp directive. 541 while (NextTok->is(tok::hash)) { 542 ScopedMacroState MacroState(*Line, Tokens, NextTok); 543 do { 544 NextTok = Tokens->getNextToken(); 545 } while (NextTok->isNot(tok::eof)); 546 } 547 548 // Using OriginalColumn to distinguish between ObjC methods and 549 // binary operators is a bit hacky. 550 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 551 NextTok->OriginalColumn == 0; 552 553 // Try to detect a braced list. Note that regardless how we mark inner 554 // braces here, we will overwrite the BlockKind later if we parse a 555 // braced list (where all blocks inside are by default braced lists), 556 // or when we explicitly detect blocks (for example while parsing 557 // lambdas). 558 559 // If we already marked the opening brace as braced list, the closing 560 // must also be part of it. 561 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace); 562 563 ProbablyBracedList = ProbablyBracedList || 564 (Style.isJavaScript() && 565 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 566 Keywords.kw_as)); 567 ProbablyBracedList = ProbablyBracedList || 568 (Style.isCpp() && NextTok->is(tok::l_paren)); 569 570 // If there is a comma, semicolon or right paren after the closing 571 // brace, we assume this is a braced initializer list. 572 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 573 // braced list in JS. 574 ProbablyBracedList = 575 ProbablyBracedList || 576 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 577 tok::r_paren, tok::r_square, tok::ellipsis); 578 579 // Distinguish between braced list in a constructor initializer list 580 // followed by constructor body, or just adjacent blocks. 581 ProbablyBracedList = 582 ProbablyBracedList || 583 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 584 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 585 tok::greater)); 586 587 ProbablyBracedList = 588 ProbablyBracedList || 589 (NextTok->is(tok::identifier) && 590 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 591 592 ProbablyBracedList = ProbablyBracedList || 593 (NextTok->is(tok::semi) && 594 (!ExpectClassBody || LBraceStack.size() != 1)); 595 596 ProbablyBracedList = 597 ProbablyBracedList || 598 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 599 600 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 601 // We can have an array subscript after a braced init 602 // list, but C++11 attributes are expected after blocks. 603 NextTok = Tokens->getNextToken(); 604 ProbablyBracedList = NextTok->isNot(tok::l_square); 605 } 606 } 607 if (ProbablyBracedList) { 608 Tok->setBlockKind(BK_BracedInit); 609 LBraceStack.back().Tok->setBlockKind(BK_BracedInit); 610 } else { 611 Tok->setBlockKind(BK_Block); 612 LBraceStack.back().Tok->setBlockKind(BK_Block); 613 } 614 } 615 LBraceStack.pop_back(); 616 break; 617 case tok::identifier: 618 if (!Tok->is(TT_StatementMacro)) 619 break; 620 [[fallthrough]]; 621 case tok::at: 622 case tok::semi: 623 case tok::kw_if: 624 case tok::kw_while: 625 case tok::kw_for: 626 case tok::kw_switch: 627 case tok::kw_try: 628 case tok::kw___try: 629 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 630 LBraceStack.back().Tok->setBlockKind(BK_Block); 631 break; 632 default: 633 break; 634 } 635 PrevTok = Tok; 636 Tok = NextTok; 637 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 638 639 // Assume other blocks for all unclosed opening braces. 640 for (const auto &Entry : LBraceStack) 641 if (Entry.Tok->is(BK_Unknown)) 642 Entry.Tok->setBlockKind(BK_Block); 643 644 FormatTok = Tokens->setPosition(StoredPosition); 645 } 646 647 template <class T> 648 static inline void hash_combine(std::size_t &seed, const T &v) { 649 std::hash<T> hasher; 650 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 651 } 652 653 size_t UnwrappedLineParser::computePPHash() const { 654 size_t h = 0; 655 for (const auto &i : PPStack) { 656 hash_combine(h, size_t(i.Kind)); 657 hash_combine(h, i.Line); 658 } 659 return h; 660 } 661 662 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 663 // is not null, subtracts its length (plus the preceding space) when computing 664 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 665 // running the token annotator on it so that we can restore them afterward. 666 bool UnwrappedLineParser::mightFitOnOneLine( 667 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 668 const auto ColumnLimit = Style.ColumnLimit; 669 if (ColumnLimit == 0) 670 return true; 671 672 auto &Tokens = ParsedLine.Tokens; 673 assert(!Tokens.empty()); 674 675 const auto *LastToken = Tokens.back().Tok; 676 assert(LastToken); 677 678 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 679 680 int Index = 0; 681 for (const auto &Token : Tokens) { 682 assert(Token.Tok); 683 auto &SavedToken = SavedTokens[Index++]; 684 SavedToken.Tok = new FormatToken; 685 SavedToken.Tok->copyFrom(*Token.Tok); 686 SavedToken.Children = std::move(Token.Children); 687 } 688 689 AnnotatedLine Line(ParsedLine); 690 assert(Line.Last == LastToken); 691 692 TokenAnnotator Annotator(Style, Keywords); 693 Annotator.annotate(Line); 694 Annotator.calculateFormattingInformation(Line); 695 696 auto Length = LastToken->TotalLength; 697 if (OpeningBrace) { 698 assert(OpeningBrace != Tokens.front().Tok); 699 if (auto Prev = OpeningBrace->Previous; 700 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 701 Length -= ColumnLimit; 702 } 703 Length -= OpeningBrace->TokenText.size() + 1; 704 } 705 706 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 707 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 708 Length -= FirstToken->TokenText.size() + 1; 709 } 710 711 Index = 0; 712 for (auto &Token : Tokens) { 713 const auto &SavedToken = SavedTokens[Index++]; 714 Token.Tok->copyFrom(*SavedToken.Tok); 715 Token.Children = std::move(SavedToken.Children); 716 delete SavedToken.Tok; 717 } 718 719 // If these change PPLevel needs to be used for get correct indentation. 720 assert(!Line.InMacroBody); 721 assert(!Line.InPPDirective); 722 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 723 } 724 725 FormatToken *UnwrappedLineParser::parseBlock( 726 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, 727 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces, 728 bool CanContainBracedList, TokenType NextLBracesType) { 729 auto HandleVerilogBlockLabel = [this]() { 730 // ":" name 731 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 732 nextToken(); 733 if (Keywords.isVerilogIdentifier(*FormatTok)) 734 nextToken(); 735 } 736 }; 737 738 // Whether this is a Verilog-specific block that has a special header like a 739 // module. 740 const bool VerilogHierarchy = 741 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 742 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 743 (Style.isVerilog() && 744 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 745 "'{' or macro block token expected"); 746 FormatToken *Tok = FormatTok; 747 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 748 auto Index = CurrentLines->size(); 749 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 750 FormatTok->setBlockKind(BK_Block); 751 752 // For Whitesmiths mode, jump to the next level prior to skipping over the 753 // braces. 754 if (!VerilogHierarchy && AddLevels > 0 && 755 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 756 ++Line->Level; 757 } 758 759 size_t PPStartHash = computePPHash(); 760 761 const unsigned InitialLevel = Line->Level; 762 if (VerilogHierarchy) { 763 AddLevels += parseVerilogHierarchyHeader(); 764 } else { 765 nextToken(/*LevelDifference=*/AddLevels); 766 HandleVerilogBlockLabel(); 767 } 768 769 // Bail out if there are too many levels. Otherwise, the stack might overflow. 770 if (Line->Level > 300) 771 return nullptr; 772 773 if (MacroBlock && FormatTok->is(tok::l_paren)) 774 parseParens(); 775 776 size_t NbPreprocessorDirectives = 777 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 778 addUnwrappedLine(); 779 size_t OpeningLineIndex = 780 CurrentLines->empty() 781 ? (UnwrappedLine::kInvalidIndex) 782 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 783 784 // Whitesmiths is weird here. The brace needs to be indented for the namespace 785 // block, but the block itself may not be indented depending on the style 786 // settings. This allows the format to back up one level in those cases. 787 if (UnindentWhitesmithsBraces) 788 --Line->Level; 789 790 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 791 MustBeDeclaration); 792 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 793 Line->Level += AddLevels; 794 795 FormatToken *IfLBrace = nullptr; 796 const bool SimpleBlock = 797 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace); 798 799 if (eof()) 800 return IfLBrace; 801 802 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 803 : !FormatTok->is(tok::r_brace)) { 804 Line->Level = InitialLevel; 805 FormatTok->setBlockKind(BK_Block); 806 return IfLBrace; 807 } 808 809 const bool IsFunctionRBrace = 810 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 811 812 auto RemoveBraces = [=]() mutable { 813 if (!SimpleBlock) 814 return false; 815 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 816 assert(FormatTok->is(tok::r_brace)); 817 const bool WrappedOpeningBrace = !Tok->Previous; 818 if (WrappedOpeningBrace && FollowedByComment) 819 return false; 820 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 821 if (KeepBraces && !HasRequiredIfBraces) 822 return false; 823 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 824 const FormatToken *Previous = Tokens->getPreviousToken(); 825 assert(Previous); 826 if (Previous->is(tok::r_brace) && !Previous->Optional) 827 return false; 828 } 829 assert(!CurrentLines->empty()); 830 auto &LastLine = CurrentLines->back(); 831 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 832 return false; 833 if (Tok->is(TT_ElseLBrace)) 834 return true; 835 if (WrappedOpeningBrace) { 836 assert(Index > 0); 837 --Index; // The line above the wrapped l_brace. 838 Tok = nullptr; 839 } 840 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 841 }; 842 if (RemoveBraces()) { 843 Tok->MatchingParen = FormatTok; 844 FormatTok->MatchingParen = Tok; 845 } 846 847 size_t PPEndHash = computePPHash(); 848 849 // Munch the closing brace. 850 nextToken(/*LevelDifference=*/-AddLevels); 851 852 // When this is a function block and there is an unnecessary semicolon 853 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 854 // it later). 855 if (Style.RemoveSemicolon && IsFunctionRBrace) { 856 while (FormatTok->is(tok::semi)) { 857 FormatTok->Optional = true; 858 nextToken(); 859 } 860 } 861 862 HandleVerilogBlockLabel(); 863 864 if (MacroBlock && FormatTok->is(tok::l_paren)) 865 parseParens(); 866 867 Line->Level = InitialLevel; 868 869 if (FormatTok->is(tok::kw_noexcept)) { 870 // A noexcept in a requires expression. 871 nextToken(); 872 } 873 874 if (FormatTok->is(tok::arrow)) { 875 // Following the } or noexcept we can find a trailing return type arrow 876 // as part of an implicit conversion constraint. 877 nextToken(); 878 parseStructuralElement(); 879 } 880 881 if (MunchSemi && FormatTok->is(tok::semi)) 882 nextToken(); 883 884 if (PPStartHash == PPEndHash) { 885 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 886 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 887 // Update the opening line to add the forward reference as well 888 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 889 CurrentLines->size() - 1; 890 } 891 } 892 893 return IfLBrace; 894 } 895 896 static bool isGoogScope(const UnwrappedLine &Line) { 897 // FIXME: Closure-library specific stuff should not be hard-coded but be 898 // configurable. 899 if (Line.Tokens.size() < 4) 900 return false; 901 auto I = Line.Tokens.begin(); 902 if (I->Tok->TokenText != "goog") 903 return false; 904 ++I; 905 if (I->Tok->isNot(tok::period)) 906 return false; 907 ++I; 908 if (I->Tok->TokenText != "scope") 909 return false; 910 ++I; 911 return I->Tok->is(tok::l_paren); 912 } 913 914 static bool isIIFE(const UnwrappedLine &Line, 915 const AdditionalKeywords &Keywords) { 916 // Look for the start of an immediately invoked anonymous function. 917 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 918 // This is commonly done in JavaScript to create a new, anonymous scope. 919 // Example: (function() { ... })() 920 if (Line.Tokens.size() < 3) 921 return false; 922 auto I = Line.Tokens.begin(); 923 if (I->Tok->isNot(tok::l_paren)) 924 return false; 925 ++I; 926 if (I->Tok->isNot(Keywords.kw_function)) 927 return false; 928 ++I; 929 return I->Tok->is(tok::l_paren); 930 } 931 932 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 933 const FormatToken &InitialToken) { 934 tok::TokenKind Kind = InitialToken.Tok.getKind(); 935 if (InitialToken.is(TT_NamespaceMacro)) 936 Kind = tok::kw_namespace; 937 938 switch (Kind) { 939 case tok::kw_namespace: 940 return Style.BraceWrapping.AfterNamespace; 941 case tok::kw_class: 942 return Style.BraceWrapping.AfterClass; 943 case tok::kw_union: 944 return Style.BraceWrapping.AfterUnion; 945 case tok::kw_struct: 946 return Style.BraceWrapping.AfterStruct; 947 case tok::kw_enum: 948 return Style.BraceWrapping.AfterEnum; 949 default: 950 return false; 951 } 952 } 953 954 void UnwrappedLineParser::parseChildBlock( 955 bool CanContainBracedList, clang::format::TokenType NextLBracesType) { 956 assert(FormatTok->is(tok::l_brace)); 957 FormatTok->setBlockKind(BK_Block); 958 const FormatToken *OpeningBrace = FormatTok; 959 nextToken(); 960 { 961 bool SkipIndent = (Style.isJavaScript() && 962 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 963 ScopedLineState LineState(*this); 964 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 965 /*MustBeDeclaration=*/false); 966 Line->Level += SkipIndent ? 0 : 1; 967 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType); 968 flushComments(isOnNewLine(*FormatTok)); 969 Line->Level -= SkipIndent ? 0 : 1; 970 } 971 nextToken(); 972 } 973 974 void UnwrappedLineParser::parsePPDirective() { 975 assert(FormatTok->is(tok::hash) && "'#' expected"); 976 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 977 978 nextToken(); 979 980 if (!FormatTok->Tok.getIdentifierInfo()) { 981 parsePPUnknown(); 982 return; 983 } 984 985 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 986 case tok::pp_define: 987 parsePPDefine(); 988 return; 989 case tok::pp_if: 990 parsePPIf(/*IfDef=*/false); 991 break; 992 case tok::pp_ifdef: 993 case tok::pp_ifndef: 994 parsePPIf(/*IfDef=*/true); 995 break; 996 case tok::pp_else: 997 case tok::pp_elifdef: 998 case tok::pp_elifndef: 999 case tok::pp_elif: 1000 parsePPElse(); 1001 break; 1002 case tok::pp_endif: 1003 parsePPEndIf(); 1004 break; 1005 case tok::pp_pragma: 1006 parsePPPragma(); 1007 break; 1008 default: 1009 parsePPUnknown(); 1010 break; 1011 } 1012 } 1013 1014 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1015 size_t Line = CurrentLines->size(); 1016 if (CurrentLines == &PreprocessorDirectives) 1017 Line += Lines.size(); 1018 1019 if (Unreachable || 1020 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1021 PPStack.push_back({PP_Unreachable, Line}); 1022 } else { 1023 PPStack.push_back({PP_Conditional, Line}); 1024 } 1025 } 1026 1027 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1028 ++PPBranchLevel; 1029 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1030 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1031 PPLevelBranchIndex.push_back(0); 1032 PPLevelBranchCount.push_back(0); 1033 } 1034 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1035 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1036 conditionalCompilationCondition(Unreachable || Skip); 1037 } 1038 1039 void UnwrappedLineParser::conditionalCompilationAlternative() { 1040 if (!PPStack.empty()) 1041 PPStack.pop_back(); 1042 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1043 if (!PPChainBranchIndex.empty()) 1044 ++PPChainBranchIndex.top(); 1045 conditionalCompilationCondition( 1046 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1047 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1048 } 1049 1050 void UnwrappedLineParser::conditionalCompilationEnd() { 1051 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1052 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1053 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1054 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1055 } 1056 // Guard against #endif's without #if. 1057 if (PPBranchLevel > -1) 1058 --PPBranchLevel; 1059 if (!PPChainBranchIndex.empty()) 1060 PPChainBranchIndex.pop(); 1061 if (!PPStack.empty()) 1062 PPStack.pop_back(); 1063 } 1064 1065 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1066 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1067 nextToken(); 1068 bool Unreachable = false; 1069 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1070 Unreachable = true; 1071 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1072 Unreachable = true; 1073 conditionalCompilationStart(Unreachable); 1074 FormatToken *IfCondition = FormatTok; 1075 // If there's a #ifndef on the first line, and the only lines before it are 1076 // comments, it could be an include guard. 1077 bool MaybeIncludeGuard = IfNDef; 1078 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1079 for (auto &Line : Lines) { 1080 if (!Line.Tokens.front().Tok->is(tok::comment)) { 1081 MaybeIncludeGuard = false; 1082 IncludeGuard = IG_Rejected; 1083 break; 1084 } 1085 } 1086 } 1087 --PPBranchLevel; 1088 parsePPUnknown(); 1089 ++PPBranchLevel; 1090 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1091 IncludeGuard = IG_IfNdefed; 1092 IncludeGuardToken = IfCondition; 1093 } 1094 } 1095 1096 void UnwrappedLineParser::parsePPElse() { 1097 // If a potential include guard has an #else, it's not an include guard. 1098 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1099 IncludeGuard = IG_Rejected; 1100 // Don't crash when there is an #else without an #if. 1101 assert(PPBranchLevel >= -1); 1102 if (PPBranchLevel == -1) 1103 conditionalCompilationStart(/*Unreachable=*/true); 1104 conditionalCompilationAlternative(); 1105 --PPBranchLevel; 1106 parsePPUnknown(); 1107 ++PPBranchLevel; 1108 } 1109 1110 void UnwrappedLineParser::parsePPEndIf() { 1111 conditionalCompilationEnd(); 1112 parsePPUnknown(); 1113 // If the #endif of a potential include guard is the last thing in the file, 1114 // then we found an include guard. 1115 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1116 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1117 IncludeGuard = IG_Found; 1118 } 1119 } 1120 1121 void UnwrappedLineParser::parsePPDefine() { 1122 nextToken(); 1123 1124 if (!FormatTok->Tok.getIdentifierInfo()) { 1125 IncludeGuard = IG_Rejected; 1126 IncludeGuardToken = nullptr; 1127 parsePPUnknown(); 1128 return; 1129 } 1130 1131 if (IncludeGuard == IG_IfNdefed && 1132 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1133 IncludeGuard = IG_Defined; 1134 IncludeGuardToken = nullptr; 1135 for (auto &Line : Lines) { 1136 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1137 IncludeGuard = IG_Rejected; 1138 break; 1139 } 1140 } 1141 } 1142 1143 // In the context of a define, even keywords should be treated as normal 1144 // identifiers. Setting the kind to identifier is not enough, because we need 1145 // to treat additional keywords like __except as well, which are already 1146 // identifiers. Setting the identifier info to null interferes with include 1147 // guard processing above, and changes preprocessing nesting. 1148 FormatTok->Tok.setKind(tok::identifier); 1149 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1150 nextToken(); 1151 if (FormatTok->Tok.getKind() == tok::l_paren && 1152 !FormatTok->hasWhitespaceBefore()) { 1153 parseParens(); 1154 } 1155 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1156 Line->Level += PPBranchLevel + 1; 1157 addUnwrappedLine(); 1158 ++Line->Level; 1159 1160 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1161 assert((int)Line->PPLevel >= 0); 1162 Line->InMacroBody = true; 1163 1164 // Errors during a preprocessor directive can only affect the layout of the 1165 // preprocessor directive, and thus we ignore them. An alternative approach 1166 // would be to use the same approach we use on the file level (no 1167 // re-indentation if there was a structural error) within the macro 1168 // definition. 1169 parseFile(); 1170 } 1171 1172 void UnwrappedLineParser::parsePPPragma() { 1173 Line->InPragmaDirective = true; 1174 parsePPUnknown(); 1175 } 1176 1177 void UnwrappedLineParser::parsePPUnknown() { 1178 do { 1179 nextToken(); 1180 } while (!eof()); 1181 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1182 Line->Level += PPBranchLevel + 1; 1183 addUnwrappedLine(); 1184 } 1185 1186 // Here we exclude certain tokens that are not usually the first token in an 1187 // unwrapped line. This is used in attempt to distinguish macro calls without 1188 // trailing semicolons from other constructs split to several lines. 1189 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1190 // Semicolon can be a null-statement, l_square can be a start of a macro or 1191 // a C++11 attribute, but this doesn't seem to be common. 1192 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1193 Tok.isNot(TT_AttributeSquare) && 1194 // Tokens that can only be used as binary operators and a part of 1195 // overloaded operator names. 1196 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1197 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1198 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1199 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1200 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1201 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1202 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1203 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1204 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1205 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1206 Tok.isNot(tok::lesslessequal) && 1207 // Colon is used in labels, base class lists, initializer lists, 1208 // range-based for loops, ternary operator, but should never be the 1209 // first token in an unwrapped line. 1210 Tok.isNot(tok::colon) && 1211 // 'noexcept' is a trailing annotation. 1212 Tok.isNot(tok::kw_noexcept); 1213 } 1214 1215 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1216 const FormatToken *FormatTok) { 1217 // FIXME: This returns true for C/C++ keywords like 'struct'. 1218 return FormatTok->is(tok::identifier) && 1219 (!FormatTok->Tok.getIdentifierInfo() || 1220 !FormatTok->isOneOf( 1221 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1222 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1223 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1224 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1225 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1226 Keywords.kw_instanceof, Keywords.kw_interface, 1227 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1228 } 1229 1230 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1231 const FormatToken *FormatTok) { 1232 return FormatTok->Tok.isLiteral() || 1233 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1234 mustBeJSIdent(Keywords, FormatTok); 1235 } 1236 1237 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1238 // when encountered after a value (see mustBeJSIdentOrValue). 1239 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1240 const FormatToken *FormatTok) { 1241 return FormatTok->isOneOf( 1242 tok::kw_return, Keywords.kw_yield, 1243 // conditionals 1244 tok::kw_if, tok::kw_else, 1245 // loops 1246 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1247 // switch/case 1248 tok::kw_switch, tok::kw_case, 1249 // exceptions 1250 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1251 // declaration 1252 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1253 Keywords.kw_async, Keywords.kw_function, 1254 // import/export 1255 Keywords.kw_import, tok::kw_export); 1256 } 1257 1258 // Checks whether a token is a type in K&R C (aka C78). 1259 static bool isC78Type(const FormatToken &Tok) { 1260 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1261 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1262 tok::identifier); 1263 } 1264 1265 // This function checks whether a token starts the first parameter declaration 1266 // in a K&R C (aka C78) function definition, e.g.: 1267 // int f(a, b) 1268 // short a, b; 1269 // { 1270 // return a + b; 1271 // } 1272 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1273 const FormatToken *FuncName) { 1274 assert(Tok); 1275 assert(Next); 1276 assert(FuncName); 1277 1278 if (FuncName->isNot(tok::identifier)) 1279 return false; 1280 1281 const FormatToken *Prev = FuncName->Previous; 1282 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1283 return false; 1284 1285 if (!isC78Type(*Tok) && 1286 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1287 return false; 1288 } 1289 1290 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1291 return false; 1292 1293 Tok = Tok->Previous; 1294 if (!Tok || Tok->isNot(tok::r_paren)) 1295 return false; 1296 1297 Tok = Tok->Previous; 1298 if (!Tok || Tok->isNot(tok::identifier)) 1299 return false; 1300 1301 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1302 } 1303 1304 bool UnwrappedLineParser::parseModuleImport() { 1305 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1306 1307 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1308 !Token->Tok.getIdentifierInfo() && 1309 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1310 return false; 1311 } 1312 1313 nextToken(); 1314 while (!eof()) { 1315 if (FormatTok->is(tok::colon)) { 1316 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1317 } 1318 // Handle import <foo/bar.h> as we would an include statement. 1319 else if (FormatTok->is(tok::less)) { 1320 nextToken(); 1321 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1322 // Mark tokens up to the trailing line comments as implicit string 1323 // literals. 1324 if (FormatTok->isNot(tok::comment) && 1325 !FormatTok->TokenText.startswith("//")) { 1326 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1327 } 1328 nextToken(); 1329 } 1330 } 1331 if (FormatTok->is(tok::semi)) { 1332 nextToken(); 1333 break; 1334 } 1335 nextToken(); 1336 } 1337 1338 addUnwrappedLine(); 1339 return true; 1340 } 1341 1342 // readTokenWithJavaScriptASI reads the next token and terminates the current 1343 // line if JavaScript Automatic Semicolon Insertion must 1344 // happen between the current token and the next token. 1345 // 1346 // This method is conservative - it cannot cover all edge cases of JavaScript, 1347 // but only aims to correctly handle certain well known cases. It *must not* 1348 // return true in speculative cases. 1349 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1350 FormatToken *Previous = FormatTok; 1351 readToken(); 1352 FormatToken *Next = FormatTok; 1353 1354 bool IsOnSameLine = 1355 CommentsBeforeNextToken.empty() 1356 ? Next->NewlinesBefore == 0 1357 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1358 if (IsOnSameLine) 1359 return; 1360 1361 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1362 bool PreviousStartsTemplateExpr = 1363 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1364 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1365 // If the line contains an '@' sign, the previous token might be an 1366 // annotation, which can precede another identifier/value. 1367 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1368 return LineNode.Tok->is(tok::at); 1369 }); 1370 if (HasAt) 1371 return; 1372 } 1373 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1374 return addUnwrappedLine(); 1375 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1376 bool NextEndsTemplateExpr = 1377 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1378 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1379 (PreviousMustBeValue || 1380 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1381 tok::minusminus))) { 1382 return addUnwrappedLine(); 1383 } 1384 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1385 isJSDeclOrStmt(Keywords, Next)) { 1386 return addUnwrappedLine(); 1387 } 1388 } 1389 1390 void UnwrappedLineParser::parseStructuralElement( 1391 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind, 1392 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1393 if (Style.Language == FormatStyle::LK_TableGen && 1394 FormatTok->is(tok::pp_include)) { 1395 nextToken(); 1396 if (FormatTok->is(tok::string_literal)) 1397 nextToken(); 1398 addUnwrappedLine(); 1399 return; 1400 } 1401 1402 if (Style.isVerilog()) { 1403 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1404 parseForOrWhileLoop(/*HasParens=*/false); 1405 return; 1406 } 1407 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1408 parseForOrWhileLoop(); 1409 return; 1410 } 1411 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1412 Keywords.kw_assume, Keywords.kw_cover)) { 1413 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1414 return; 1415 } 1416 1417 // Skip things that can exist before keywords like 'if' and 'case'. 1418 while (true) { 1419 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1420 Keywords.kw_unique0)) { 1421 nextToken(); 1422 } else if (FormatTok->is(tok::l_paren) && 1423 Tokens->peekNextToken()->is(tok::star)) { 1424 parseParens(); 1425 } else { 1426 break; 1427 } 1428 } 1429 } 1430 1431 // Tokens that only make sense at the beginning of a line. 1432 switch (FormatTok->Tok.getKind()) { 1433 case tok::kw_asm: 1434 nextToken(); 1435 if (FormatTok->is(tok::l_brace)) { 1436 FormatTok->setFinalizedType(TT_InlineASMBrace); 1437 nextToken(); 1438 while (FormatTok && !eof()) { 1439 if (FormatTok->is(tok::r_brace)) { 1440 FormatTok->setFinalizedType(TT_InlineASMBrace); 1441 nextToken(); 1442 addUnwrappedLine(); 1443 break; 1444 } 1445 FormatTok->Finalized = true; 1446 nextToken(); 1447 } 1448 } 1449 break; 1450 case tok::kw_namespace: 1451 parseNamespace(); 1452 return; 1453 case tok::kw_public: 1454 case tok::kw_protected: 1455 case tok::kw_private: 1456 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1457 Style.isCSharp()) { 1458 nextToken(); 1459 } else { 1460 parseAccessSpecifier(); 1461 } 1462 return; 1463 case tok::kw_if: { 1464 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1465 // field/method declaration. 1466 break; 1467 } 1468 FormatToken *Tok = parseIfThenElse(IfKind); 1469 if (IfLeftBrace) 1470 *IfLeftBrace = Tok; 1471 return; 1472 } 1473 case tok::kw_for: 1474 case tok::kw_while: 1475 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1476 // field/method declaration. 1477 break; 1478 } 1479 parseForOrWhileLoop(); 1480 return; 1481 case tok::kw_do: 1482 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1483 // field/method declaration. 1484 break; 1485 } 1486 parseDoWhile(); 1487 if (HasDoWhile) 1488 *HasDoWhile = true; 1489 return; 1490 case tok::kw_switch: 1491 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1492 // 'switch: string' field declaration. 1493 break; 1494 } 1495 parseSwitch(); 1496 return; 1497 case tok::kw_default: 1498 // In Verilog default along with other labels are handled in the next loop. 1499 if (Style.isVerilog()) 1500 break; 1501 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1502 // 'default: string' field declaration. 1503 break; 1504 } 1505 nextToken(); 1506 if (FormatTok->is(tok::colon)) { 1507 FormatTok->setFinalizedType(TT_CaseLabelColon); 1508 parseLabel(); 1509 return; 1510 } 1511 // e.g. "default void f() {}" in a Java interface. 1512 break; 1513 case tok::kw_case: 1514 // Proto: there are no switch/case statements. 1515 if (Style.isProto()) { 1516 nextToken(); 1517 return; 1518 } 1519 if (Style.isVerilog()) { 1520 parseBlock(); 1521 addUnwrappedLine(); 1522 return; 1523 } 1524 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1525 // 'case: string' field declaration. 1526 nextToken(); 1527 break; 1528 } 1529 parseCaseLabel(); 1530 return; 1531 case tok::kw_try: 1532 case tok::kw___try: 1533 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1534 // field/method declaration. 1535 break; 1536 } 1537 parseTryCatch(); 1538 return; 1539 case tok::kw_extern: 1540 nextToken(); 1541 if (Style.isVerilog()) { 1542 // In Verilog and extern module declaration looks like a start of module. 1543 // But there is no body and endmodule. So we handle it separately. 1544 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1545 parseVerilogHierarchyHeader(); 1546 return; 1547 } 1548 } else if (FormatTok->is(tok::string_literal)) { 1549 nextToken(); 1550 if (FormatTok->is(tok::l_brace)) { 1551 if (Style.BraceWrapping.AfterExternBlock) 1552 addUnwrappedLine(); 1553 // Either we indent or for backwards compatibility we follow the 1554 // AfterExternBlock style. 1555 unsigned AddLevels = 1556 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1557 (Style.BraceWrapping.AfterExternBlock && 1558 Style.IndentExternBlock == 1559 FormatStyle::IEBS_AfterExternBlock) 1560 ? 1u 1561 : 0u; 1562 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1563 addUnwrappedLine(); 1564 return; 1565 } 1566 } 1567 break; 1568 case tok::kw_export: 1569 if (Style.isJavaScript()) { 1570 parseJavaScriptEs6ImportExport(); 1571 return; 1572 } 1573 if (Style.isCpp()) { 1574 nextToken(); 1575 if (FormatTok->is(tok::kw_namespace)) { 1576 parseNamespace(); 1577 return; 1578 } 1579 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1580 return; 1581 } 1582 break; 1583 case tok::kw_inline: 1584 nextToken(); 1585 if (FormatTok->is(tok::kw_namespace)) { 1586 parseNamespace(); 1587 return; 1588 } 1589 break; 1590 case tok::identifier: 1591 if (FormatTok->is(TT_ForEachMacro)) { 1592 parseForOrWhileLoop(); 1593 return; 1594 } 1595 if (FormatTok->is(TT_MacroBlockBegin)) { 1596 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1597 /*MunchSemi=*/false); 1598 return; 1599 } 1600 if (FormatTok->is(Keywords.kw_import)) { 1601 if (Style.isJavaScript()) { 1602 parseJavaScriptEs6ImportExport(); 1603 return; 1604 } 1605 if (Style.Language == FormatStyle::LK_Proto) { 1606 nextToken(); 1607 if (FormatTok->is(tok::kw_public)) 1608 nextToken(); 1609 if (!FormatTok->is(tok::string_literal)) 1610 return; 1611 nextToken(); 1612 if (FormatTok->is(tok::semi)) 1613 nextToken(); 1614 addUnwrappedLine(); 1615 return; 1616 } 1617 if (Style.isCpp() && parseModuleImport()) 1618 return; 1619 } 1620 if (Style.isCpp() && 1621 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1622 Keywords.kw_slots, Keywords.kw_qslots)) { 1623 nextToken(); 1624 if (FormatTok->is(tok::colon)) { 1625 nextToken(); 1626 addUnwrappedLine(); 1627 return; 1628 } 1629 } 1630 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1631 parseStatementMacro(); 1632 return; 1633 } 1634 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1635 parseNamespace(); 1636 return; 1637 } 1638 // In all other cases, parse the declaration. 1639 break; 1640 default: 1641 break; 1642 } 1643 do { 1644 const FormatToken *Previous = FormatTok->Previous; 1645 switch (FormatTok->Tok.getKind()) { 1646 case tok::at: 1647 nextToken(); 1648 if (FormatTok->is(tok::l_brace)) { 1649 nextToken(); 1650 parseBracedList(); 1651 break; 1652 } else if (Style.Language == FormatStyle::LK_Java && 1653 FormatTok->is(Keywords.kw_interface)) { 1654 nextToken(); 1655 break; 1656 } 1657 switch (FormatTok->Tok.getObjCKeywordID()) { 1658 case tok::objc_public: 1659 case tok::objc_protected: 1660 case tok::objc_package: 1661 case tok::objc_private: 1662 return parseAccessSpecifier(); 1663 case tok::objc_interface: 1664 case tok::objc_implementation: 1665 return parseObjCInterfaceOrImplementation(); 1666 case tok::objc_protocol: 1667 if (parseObjCProtocol()) 1668 return; 1669 break; 1670 case tok::objc_end: 1671 return; // Handled by the caller. 1672 case tok::objc_optional: 1673 case tok::objc_required: 1674 nextToken(); 1675 addUnwrappedLine(); 1676 return; 1677 case tok::objc_autoreleasepool: 1678 nextToken(); 1679 if (FormatTok->is(tok::l_brace)) { 1680 if (Style.BraceWrapping.AfterControlStatement == 1681 FormatStyle::BWACS_Always) { 1682 addUnwrappedLine(); 1683 } 1684 parseBlock(); 1685 } 1686 addUnwrappedLine(); 1687 return; 1688 case tok::objc_synchronized: 1689 nextToken(); 1690 if (FormatTok->is(tok::l_paren)) { 1691 // Skip synchronization object 1692 parseParens(); 1693 } 1694 if (FormatTok->is(tok::l_brace)) { 1695 if (Style.BraceWrapping.AfterControlStatement == 1696 FormatStyle::BWACS_Always) { 1697 addUnwrappedLine(); 1698 } 1699 parseBlock(); 1700 } 1701 addUnwrappedLine(); 1702 return; 1703 case tok::objc_try: 1704 // This branch isn't strictly necessary (the kw_try case below would 1705 // do this too after the tok::at is parsed above). But be explicit. 1706 parseTryCatch(); 1707 return; 1708 default: 1709 break; 1710 } 1711 break; 1712 case tok::kw_requires: { 1713 if (Style.isCpp()) { 1714 bool ParsedClause = parseRequires(); 1715 if (ParsedClause) 1716 return; 1717 } else { 1718 nextToken(); 1719 } 1720 break; 1721 } 1722 case tok::kw_enum: 1723 // Ignore if this is part of "template <enum ...". 1724 if (Previous && Previous->is(tok::less)) { 1725 nextToken(); 1726 break; 1727 } 1728 1729 // parseEnum falls through and does not yet add an unwrapped line as an 1730 // enum definition can start a structural element. 1731 if (!parseEnum()) 1732 break; 1733 // This only applies to C++ and Verilog. 1734 if (!Style.isCpp() && !Style.isVerilog()) { 1735 addUnwrappedLine(); 1736 return; 1737 } 1738 break; 1739 case tok::kw_typedef: 1740 nextToken(); 1741 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1742 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1743 Keywords.kw_CF_CLOSED_ENUM, 1744 Keywords.kw_NS_CLOSED_ENUM)) { 1745 parseEnum(); 1746 } 1747 break; 1748 case tok::kw_class: 1749 if (Style.isVerilog()) { 1750 parseBlock(); 1751 addUnwrappedLine(); 1752 return; 1753 } 1754 [[fallthrough]]; 1755 case tok::kw_struct: 1756 case tok::kw_union: 1757 if (parseStructLike()) 1758 return; 1759 break; 1760 case tok::period: 1761 nextToken(); 1762 // In Java, classes have an implicit static member "class". 1763 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1764 FormatTok->is(tok::kw_class)) { 1765 nextToken(); 1766 } 1767 if (Style.isJavaScript() && FormatTok && 1768 FormatTok->Tok.getIdentifierInfo()) { 1769 // JavaScript only has pseudo keywords, all keywords are allowed to 1770 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1771 nextToken(); 1772 } 1773 break; 1774 case tok::semi: 1775 nextToken(); 1776 addUnwrappedLine(); 1777 return; 1778 case tok::r_brace: 1779 addUnwrappedLine(); 1780 return; 1781 case tok::l_paren: { 1782 parseParens(); 1783 // Break the unwrapped line if a K&R C function definition has a parameter 1784 // declaration. 1785 if (!IsTopLevel || !Style.isCpp() || !Previous || eof()) 1786 break; 1787 if (isC78ParameterDecl(FormatTok, 1788 Tokens->peekNextToken(/*SkipComment=*/true), 1789 Previous)) { 1790 addUnwrappedLine(); 1791 return; 1792 } 1793 break; 1794 } 1795 case tok::kw_operator: 1796 nextToken(); 1797 if (FormatTok->isBinaryOperator()) 1798 nextToken(); 1799 break; 1800 case tok::caret: 1801 nextToken(); 1802 // Block return type. 1803 if (FormatTok->Tok.isAnyIdentifier() || 1804 FormatTok->isSimpleTypeSpecifier()) { 1805 nextToken(); 1806 // Return types: pointers are ok too. 1807 while (FormatTok->is(tok::star)) 1808 nextToken(); 1809 } 1810 // Block argument list. 1811 if (FormatTok->is(tok::l_paren)) 1812 parseParens(); 1813 // Block body. 1814 if (FormatTok->is(tok::l_brace)) 1815 parseChildBlock(); 1816 break; 1817 case tok::l_brace: 1818 if (NextLBracesType != TT_Unknown) 1819 FormatTok->setFinalizedType(NextLBracesType); 1820 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1821 // A block outside of parentheses must be the last part of a 1822 // structural element. 1823 // FIXME: Figure out cases where this is not true, and add projections 1824 // for them (the one we know is missing are lambdas). 1825 if (Style.Language == FormatStyle::LK_Java && 1826 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1827 // If necessary, we could set the type to something different than 1828 // TT_FunctionLBrace. 1829 if (Style.BraceWrapping.AfterControlStatement == 1830 FormatStyle::BWACS_Always) { 1831 addUnwrappedLine(); 1832 } 1833 } else if (Style.BraceWrapping.AfterFunction) { 1834 addUnwrappedLine(); 1835 } 1836 FormatTok->setFinalizedType(TT_FunctionLBrace); 1837 parseBlock(); 1838 addUnwrappedLine(); 1839 return; 1840 } 1841 // Otherwise this was a braced init list, and the structural 1842 // element continues. 1843 break; 1844 case tok::kw_try: 1845 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1846 // field/method declaration. 1847 nextToken(); 1848 break; 1849 } 1850 // We arrive here when parsing function-try blocks. 1851 if (Style.BraceWrapping.AfterFunction) 1852 addUnwrappedLine(); 1853 parseTryCatch(); 1854 return; 1855 case tok::identifier: { 1856 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1857 Line->MustBeDeclaration) { 1858 addUnwrappedLine(); 1859 parseCSharpGenericTypeConstraint(); 1860 break; 1861 } 1862 if (FormatTok->is(TT_MacroBlockEnd)) { 1863 addUnwrappedLine(); 1864 return; 1865 } 1866 1867 // Function declarations (as opposed to function expressions) are parsed 1868 // on their own unwrapped line by continuing this loop. Function 1869 // expressions (functions that are not on their own line) must not create 1870 // a new unwrapped line, so they are special cased below. 1871 size_t TokenCount = Line->Tokens.size(); 1872 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1873 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1874 Keywords.kw_async)))) { 1875 tryToParseJSFunction(); 1876 break; 1877 } 1878 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1879 FormatTok->is(Keywords.kw_interface)) { 1880 if (Style.isJavaScript()) { 1881 // In JavaScript/TypeScript, "interface" can be used as a standalone 1882 // identifier, e.g. in `var interface = 1;`. If "interface" is 1883 // followed by another identifier, it is very like to be an actual 1884 // interface declaration. 1885 unsigned StoredPosition = Tokens->getPosition(); 1886 FormatToken *Next = Tokens->getNextToken(); 1887 FormatTok = Tokens->setPosition(StoredPosition); 1888 if (!mustBeJSIdent(Keywords, Next)) { 1889 nextToken(); 1890 break; 1891 } 1892 } 1893 parseRecord(); 1894 addUnwrappedLine(); 1895 return; 1896 } 1897 1898 if (Style.isVerilog()) { 1899 if (FormatTok->is(Keywords.kw_table)) { 1900 parseVerilogTable(); 1901 return; 1902 } 1903 if (Keywords.isVerilogBegin(*FormatTok) || 1904 Keywords.isVerilogHierarchy(*FormatTok)) { 1905 parseBlock(); 1906 addUnwrappedLine(); 1907 return; 1908 } 1909 } 1910 1911 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) { 1912 if (parseStructLike()) 1913 return; 1914 break; 1915 } 1916 1917 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1918 parseStatementMacro(); 1919 return; 1920 } 1921 1922 // See if the following token should start a new unwrapped line. 1923 StringRef Text = FormatTok->TokenText; 1924 1925 FormatToken *PreviousToken = FormatTok; 1926 nextToken(); 1927 1928 // JS doesn't have macros, and within classes colons indicate fields, not 1929 // labels. 1930 if (Style.isJavaScript()) 1931 break; 1932 1933 auto OneTokenSoFar = [&]() { 1934 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 1935 while (I != E && I->Tok->is(tok::comment)) 1936 ++I; 1937 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash)) 1938 ++I; 1939 return I != E && (++I == E); 1940 }; 1941 if (OneTokenSoFar()) { 1942 // In Verilog labels can be any expression, so we don't do them here. 1943 if (!Style.isVerilog() && FormatTok->is(tok::colon) && 1944 !Line->MustBeDeclaration) { 1945 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1946 FormatTok->setFinalizedType(TT_GotoLabelColon); 1947 parseLabel(!Style.IndentGotoLabels); 1948 if (HasLabel) 1949 *HasLabel = true; 1950 return; 1951 } 1952 // Recognize function-like macro usages without trailing semicolon as 1953 // well as free-standing macros like Q_OBJECT. 1954 bool FunctionLike = FormatTok->is(tok::l_paren); 1955 if (FunctionLike) 1956 parseParens(); 1957 1958 bool FollowedByNewline = 1959 CommentsBeforeNextToken.empty() 1960 ? FormatTok->NewlinesBefore > 0 1961 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1962 1963 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1964 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1965 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 1966 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 1967 addUnwrappedLine(); 1968 return; 1969 } 1970 } 1971 break; 1972 } 1973 case tok::equal: 1974 if ((Style.isJavaScript() || Style.isCSharp()) && 1975 FormatTok->is(TT_FatArrow)) { 1976 tryToParseChildBlock(); 1977 break; 1978 } 1979 1980 nextToken(); 1981 if (FormatTok->is(tok::l_brace)) { 1982 // Block kind should probably be set to BK_BracedInit for any language. 1983 // C# needs this change to ensure that array initialisers and object 1984 // initialisers are indented the same way. 1985 if (Style.isCSharp()) 1986 FormatTok->setBlockKind(BK_BracedInit); 1987 nextToken(); 1988 parseBracedList(); 1989 } else if (Style.Language == FormatStyle::LK_Proto && 1990 FormatTok->is(tok::less)) { 1991 nextToken(); 1992 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1993 /*ClosingBraceKind=*/tok::greater); 1994 } 1995 break; 1996 case tok::l_square: 1997 parseSquare(); 1998 break; 1999 case tok::kw_new: 2000 parseNew(); 2001 break; 2002 case tok::kw_case: 2003 // Proto: there are no switch/case statements. 2004 if (Style.isProto()) { 2005 nextToken(); 2006 return; 2007 } 2008 // In Verilog switch is called case. 2009 if (Style.isVerilog()) { 2010 parseBlock(); 2011 addUnwrappedLine(); 2012 return; 2013 } 2014 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2015 // 'case: string' field declaration. 2016 nextToken(); 2017 break; 2018 } 2019 parseCaseLabel(); 2020 break; 2021 case tok::kw_default: 2022 nextToken(); 2023 if (Style.isVerilog()) { 2024 if (FormatTok->is(tok::colon)) { 2025 // The label will be handled in the next iteration. 2026 break; 2027 } 2028 if (FormatTok->is(Keywords.kw_clocking)) { 2029 // A default clocking block. 2030 parseBlock(); 2031 addUnwrappedLine(); 2032 return; 2033 } 2034 parseVerilogCaseLabel(); 2035 return; 2036 } 2037 break; 2038 case tok::colon: 2039 nextToken(); 2040 if (Style.isVerilog()) { 2041 parseVerilogCaseLabel(); 2042 return; 2043 } 2044 break; 2045 default: 2046 nextToken(); 2047 break; 2048 } 2049 } while (!eof()); 2050 } 2051 2052 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2053 assert(FormatTok->is(tok::l_brace)); 2054 if (!Style.isCSharp()) 2055 return false; 2056 // See if it's a property accessor. 2057 if (FormatTok->Previous->isNot(tok::identifier)) 2058 return false; 2059 2060 // See if we are inside a property accessor. 2061 // 2062 // Record the current tokenPosition so that we can advance and 2063 // reset the current token. `Next` is not set yet so we need 2064 // another way to advance along the token stream. 2065 unsigned int StoredPosition = Tokens->getPosition(); 2066 FormatToken *Tok = Tokens->getNextToken(); 2067 2068 // A trivial property accessor is of the form: 2069 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2070 // Track these as they do not require line breaks to be introduced. 2071 bool HasSpecialAccessor = false; 2072 bool IsTrivialPropertyAccessor = true; 2073 while (!eof()) { 2074 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 2075 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 2076 Keywords.kw_init, Keywords.kw_set)) { 2077 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2078 HasSpecialAccessor = true; 2079 Tok = Tokens->getNextToken(); 2080 continue; 2081 } 2082 if (Tok->isNot(tok::r_brace)) 2083 IsTrivialPropertyAccessor = false; 2084 break; 2085 } 2086 2087 if (!HasSpecialAccessor) { 2088 Tokens->setPosition(StoredPosition); 2089 return false; 2090 } 2091 2092 // Try to parse the property accessor: 2093 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2094 Tokens->setPosition(StoredPosition); 2095 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2096 addUnwrappedLine(); 2097 nextToken(); 2098 do { 2099 switch (FormatTok->Tok.getKind()) { 2100 case tok::r_brace: 2101 nextToken(); 2102 if (FormatTok->is(tok::equal)) { 2103 while (!eof() && FormatTok->isNot(tok::semi)) 2104 nextToken(); 2105 nextToken(); 2106 } 2107 addUnwrappedLine(); 2108 return true; 2109 case tok::l_brace: 2110 ++Line->Level; 2111 parseBlock(/*MustBeDeclaration=*/true); 2112 addUnwrappedLine(); 2113 --Line->Level; 2114 break; 2115 case tok::equal: 2116 if (FormatTok->is(TT_FatArrow)) { 2117 ++Line->Level; 2118 do { 2119 nextToken(); 2120 } while (!eof() && FormatTok->isNot(tok::semi)); 2121 nextToken(); 2122 addUnwrappedLine(); 2123 --Line->Level; 2124 break; 2125 } 2126 nextToken(); 2127 break; 2128 default: 2129 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2130 Keywords.kw_set) && 2131 !IsTrivialPropertyAccessor) { 2132 // Non-trivial get/set needs to be on its own line. 2133 addUnwrappedLine(); 2134 } 2135 nextToken(); 2136 } 2137 } while (!eof()); 2138 2139 // Unreachable for well-formed code (paired '{' and '}'). 2140 return true; 2141 } 2142 2143 bool UnwrappedLineParser::tryToParseLambda() { 2144 assert(FormatTok->is(tok::l_square)); 2145 if (!Style.isCpp()) { 2146 nextToken(); 2147 return false; 2148 } 2149 FormatToken &LSquare = *FormatTok; 2150 if (!tryToParseLambdaIntroducer()) 2151 return false; 2152 2153 bool SeenArrow = false; 2154 bool InTemplateParameterList = false; 2155 2156 while (FormatTok->isNot(tok::l_brace)) { 2157 if (FormatTok->isSimpleTypeSpecifier()) { 2158 nextToken(); 2159 continue; 2160 } 2161 switch (FormatTok->Tok.getKind()) { 2162 case tok::l_brace: 2163 break; 2164 case tok::l_paren: 2165 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2166 break; 2167 case tok::l_square: 2168 parseSquare(); 2169 break; 2170 case tok::less: 2171 assert(FormatTok->Previous); 2172 if (FormatTok->Previous->is(tok::r_square)) 2173 InTemplateParameterList = true; 2174 nextToken(); 2175 break; 2176 case tok::kw_auto: 2177 case tok::kw_class: 2178 case tok::kw_template: 2179 case tok::kw_typename: 2180 case tok::amp: 2181 case tok::star: 2182 case tok::kw_const: 2183 case tok::kw_constexpr: 2184 case tok::kw_consteval: 2185 case tok::comma: 2186 case tok::greater: 2187 case tok::identifier: 2188 case tok::numeric_constant: 2189 case tok::coloncolon: 2190 case tok::kw_mutable: 2191 case tok::kw_noexcept: 2192 case tok::kw_static: 2193 nextToken(); 2194 break; 2195 // Specialization of a template with an integer parameter can contain 2196 // arithmetic, logical, comparison and ternary operators. 2197 // 2198 // FIXME: This also accepts sequences of operators that are not in the scope 2199 // of a template argument list. 2200 // 2201 // In a C++ lambda a template type can only occur after an arrow. We use 2202 // this as an heuristic to distinguish between Objective-C expressions 2203 // followed by an `a->b` expression, such as: 2204 // ([obj func:arg] + a->b) 2205 // Otherwise the code below would parse as a lambda. 2206 // 2207 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 2208 // explicit template lists: []<bool b = true && false>(U &&u){} 2209 case tok::plus: 2210 case tok::minus: 2211 case tok::exclaim: 2212 case tok::tilde: 2213 case tok::slash: 2214 case tok::percent: 2215 case tok::lessless: 2216 case tok::pipe: 2217 case tok::pipepipe: 2218 case tok::ampamp: 2219 case tok::caret: 2220 case tok::equalequal: 2221 case tok::exclaimequal: 2222 case tok::greaterequal: 2223 case tok::lessequal: 2224 case tok::question: 2225 case tok::colon: 2226 case tok::ellipsis: 2227 case tok::kw_true: 2228 case tok::kw_false: 2229 if (SeenArrow || InTemplateParameterList) { 2230 nextToken(); 2231 break; 2232 } 2233 return true; 2234 case tok::arrow: 2235 // This might or might not actually be a lambda arrow (this could be an 2236 // ObjC method invocation followed by a dereferencing arrow). We might 2237 // reset this back to TT_Unknown in TokenAnnotator. 2238 FormatTok->setFinalizedType(TT_LambdaArrow); 2239 SeenArrow = true; 2240 nextToken(); 2241 break; 2242 case tok::kw_requires: { 2243 auto *RequiresToken = FormatTok; 2244 nextToken(); 2245 parseRequiresClause(RequiresToken); 2246 break; 2247 } 2248 default: 2249 return true; 2250 } 2251 } 2252 FormatTok->setFinalizedType(TT_LambdaLBrace); 2253 LSquare.setFinalizedType(TT_LambdaLSquare); 2254 parseChildBlock(); 2255 return true; 2256 } 2257 2258 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2259 const FormatToken *Previous = FormatTok->Previous; 2260 const FormatToken *LeftSquare = FormatTok; 2261 nextToken(); 2262 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2263 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2264 tok::kw_co_yield, tok::kw_co_return)) || 2265 Previous->closesScope())) || 2266 LeftSquare->isCppStructuredBinding(Style)) { 2267 return false; 2268 } 2269 if (FormatTok->is(tok::l_square)) 2270 return false; 2271 if (FormatTok->is(tok::r_square)) { 2272 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2273 if (Next->is(tok::greater)) 2274 return false; 2275 } 2276 parseSquare(/*LambdaIntroducer=*/true); 2277 return true; 2278 } 2279 2280 void UnwrappedLineParser::tryToParseJSFunction() { 2281 assert(FormatTok->is(Keywords.kw_function) || 2282 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 2283 if (FormatTok->is(Keywords.kw_async)) 2284 nextToken(); 2285 // Consume "function". 2286 nextToken(); 2287 2288 // Consume * (generator function). Treat it like C++'s overloaded operators. 2289 if (FormatTok->is(tok::star)) { 2290 FormatTok->setFinalizedType(TT_OverloadedOperator); 2291 nextToken(); 2292 } 2293 2294 // Consume function name. 2295 if (FormatTok->is(tok::identifier)) 2296 nextToken(); 2297 2298 if (FormatTok->isNot(tok::l_paren)) 2299 return; 2300 2301 // Parse formal parameter list. 2302 parseParens(); 2303 2304 if (FormatTok->is(tok::colon)) { 2305 // Parse a type definition. 2306 nextToken(); 2307 2308 // Eat the type declaration. For braced inline object types, balance braces, 2309 // otherwise just parse until finding an l_brace for the function body. 2310 if (FormatTok->is(tok::l_brace)) 2311 tryToParseBracedList(); 2312 else 2313 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2314 nextToken(); 2315 } 2316 2317 if (FormatTok->is(tok::semi)) 2318 return; 2319 2320 parseChildBlock(); 2321 } 2322 2323 bool UnwrappedLineParser::tryToParseBracedList() { 2324 if (FormatTok->is(BK_Unknown)) 2325 calculateBraceTypes(); 2326 assert(FormatTok->isNot(BK_Unknown)); 2327 if (FormatTok->is(BK_Block)) 2328 return false; 2329 nextToken(); 2330 parseBracedList(); 2331 return true; 2332 } 2333 2334 bool UnwrappedLineParser::tryToParseChildBlock() { 2335 assert(Style.isJavaScript() || Style.isCSharp()); 2336 assert(FormatTok->is(TT_FatArrow)); 2337 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2338 // They always start an expression or a child block if followed by a curly 2339 // brace. 2340 nextToken(); 2341 if (FormatTok->isNot(tok::l_brace)) 2342 return false; 2343 parseChildBlock(); 2344 return true; 2345 } 2346 2347 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2348 bool IsEnum, 2349 tok::TokenKind ClosingBraceKind) { 2350 bool HasError = false; 2351 2352 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2353 // replace this by using parseAssignmentExpression() inside. 2354 do { 2355 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2356 tryToParseChildBlock()) { 2357 continue; 2358 } 2359 if (Style.isJavaScript()) { 2360 if (FormatTok->is(Keywords.kw_function) || 2361 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2362 tryToParseJSFunction(); 2363 continue; 2364 } 2365 if (FormatTok->is(tok::l_brace)) { 2366 // Could be a method inside of a braced list `{a() { return 1; }}`. 2367 if (tryToParseBracedList()) 2368 continue; 2369 parseChildBlock(); 2370 } 2371 } 2372 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2373 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2374 addUnwrappedLine(); 2375 nextToken(); 2376 return !HasError; 2377 } 2378 switch (FormatTok->Tok.getKind()) { 2379 case tok::l_square: 2380 if (Style.isCSharp()) 2381 parseSquare(); 2382 else 2383 tryToParseLambda(); 2384 break; 2385 case tok::l_paren: 2386 parseParens(); 2387 // JavaScript can just have free standing methods and getters/setters in 2388 // object literals. Detect them by a "{" following ")". 2389 if (Style.isJavaScript()) { 2390 if (FormatTok->is(tok::l_brace)) 2391 parseChildBlock(); 2392 break; 2393 } 2394 break; 2395 case tok::l_brace: 2396 // Assume there are no blocks inside a braced init list apart 2397 // from the ones we explicitly parse out (like lambdas). 2398 FormatTok->setBlockKind(BK_BracedInit); 2399 nextToken(); 2400 parseBracedList(); 2401 break; 2402 case tok::less: 2403 if (Style.Language == FormatStyle::LK_Proto || 2404 ClosingBraceKind == tok::greater) { 2405 nextToken(); 2406 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2407 /*ClosingBraceKind=*/tok::greater); 2408 } else { 2409 nextToken(); 2410 } 2411 break; 2412 case tok::semi: 2413 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2414 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2415 // used for error recovery if we have otherwise determined that this is 2416 // a braced list. 2417 if (Style.isJavaScript()) { 2418 nextToken(); 2419 break; 2420 } 2421 HasError = true; 2422 if (!ContinueOnSemicolons) 2423 return !HasError; 2424 nextToken(); 2425 break; 2426 case tok::comma: 2427 nextToken(); 2428 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2429 addUnwrappedLine(); 2430 break; 2431 default: 2432 nextToken(); 2433 break; 2434 } 2435 } while (!eof()); 2436 return false; 2437 } 2438 2439 /// \brief Parses a pair of parentheses (and everything between them). 2440 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2441 /// double ampersands. This applies for all nested scopes as well. 2442 /// 2443 /// Returns whether there is a `=` token between the parentheses. 2444 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2445 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2446 auto *LeftParen = FormatTok; 2447 bool SeenEqual = false; 2448 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2449 nextToken(); 2450 do { 2451 switch (FormatTok->Tok.getKind()) { 2452 case tok::l_paren: 2453 if (parseParens(AmpAmpTokenType)) 2454 SeenEqual = true; 2455 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2456 parseChildBlock(); 2457 break; 2458 case tok::r_paren: 2459 if (!MightBeStmtExpr && 2460 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2461 const auto *Prev = LeftParen->Previous; 2462 const auto *Next = Tokens->peekNextToken(); 2463 const bool DoubleParens = 2464 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2465 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2466 const bool Blacklisted = 2467 PrevPrev && 2468 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2469 (SeenEqual && 2470 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2471 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2472 const bool ReturnParens = 2473 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2474 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2475 Next->is(tok::semi); 2476 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2477 LeftParen->Optional = true; 2478 FormatTok->Optional = true; 2479 } 2480 } 2481 nextToken(); 2482 return SeenEqual; 2483 case tok::r_brace: 2484 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2485 return SeenEqual; 2486 case tok::l_square: 2487 tryToParseLambda(); 2488 break; 2489 case tok::l_brace: 2490 if (!tryToParseBracedList()) 2491 parseChildBlock(); 2492 break; 2493 case tok::at: 2494 nextToken(); 2495 if (FormatTok->is(tok::l_brace)) { 2496 nextToken(); 2497 parseBracedList(); 2498 } 2499 break; 2500 case tok::equal: 2501 SeenEqual = true; 2502 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2503 tryToParseChildBlock(); 2504 else 2505 nextToken(); 2506 break; 2507 case tok::kw_class: 2508 if (Style.isJavaScript()) 2509 parseRecord(/*ParseAsExpr=*/true); 2510 else 2511 nextToken(); 2512 break; 2513 case tok::identifier: 2514 if (Style.isJavaScript() && 2515 (FormatTok->is(Keywords.kw_function) || 2516 FormatTok->startsSequence(Keywords.kw_async, 2517 Keywords.kw_function))) { 2518 tryToParseJSFunction(); 2519 } else { 2520 nextToken(); 2521 } 2522 break; 2523 case tok::kw_requires: { 2524 auto RequiresToken = FormatTok; 2525 nextToken(); 2526 parseRequiresExpression(RequiresToken); 2527 break; 2528 } 2529 case tok::ampamp: 2530 if (AmpAmpTokenType != TT_Unknown) 2531 FormatTok->setFinalizedType(AmpAmpTokenType); 2532 [[fallthrough]]; 2533 default: 2534 nextToken(); 2535 break; 2536 } 2537 } while (!eof()); 2538 return SeenEqual; 2539 } 2540 2541 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2542 if (!LambdaIntroducer) { 2543 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2544 if (tryToParseLambda()) 2545 return; 2546 } 2547 do { 2548 switch (FormatTok->Tok.getKind()) { 2549 case tok::l_paren: 2550 parseParens(); 2551 break; 2552 case tok::r_square: 2553 nextToken(); 2554 return; 2555 case tok::r_brace: 2556 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2557 return; 2558 case tok::l_square: 2559 parseSquare(); 2560 break; 2561 case tok::l_brace: { 2562 if (!tryToParseBracedList()) 2563 parseChildBlock(); 2564 break; 2565 } 2566 case tok::at: 2567 nextToken(); 2568 if (FormatTok->is(tok::l_brace)) { 2569 nextToken(); 2570 parseBracedList(); 2571 } 2572 break; 2573 default: 2574 nextToken(); 2575 break; 2576 } 2577 } while (!eof()); 2578 } 2579 2580 void UnwrappedLineParser::keepAncestorBraces() { 2581 if (!Style.RemoveBracesLLVM) 2582 return; 2583 2584 const int MaxNestingLevels = 2; 2585 const int Size = NestedTooDeep.size(); 2586 if (Size >= MaxNestingLevels) 2587 NestedTooDeep[Size - MaxNestingLevels] = true; 2588 NestedTooDeep.push_back(false); 2589 } 2590 2591 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2592 for (const auto &Token : llvm::reverse(Line.Tokens)) 2593 if (Token.Tok->isNot(tok::comment)) 2594 return Token.Tok; 2595 2596 return nullptr; 2597 } 2598 2599 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2600 FormatToken *Tok = nullptr; 2601 2602 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2603 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2604 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2605 ? getLastNonComment(*Line) 2606 : Line->Tokens.back().Tok; 2607 assert(Tok); 2608 if (Tok->BraceCount < 0) { 2609 assert(Tok->BraceCount == -1); 2610 Tok = nullptr; 2611 } else { 2612 Tok->BraceCount = -1; 2613 } 2614 } 2615 2616 addUnwrappedLine(); 2617 ++Line->Level; 2618 parseStructuralElement(); 2619 2620 if (Tok) { 2621 assert(!Line->InPPDirective); 2622 Tok = nullptr; 2623 for (const auto &L : llvm::reverse(*CurrentLines)) { 2624 if (!L.InPPDirective && getLastNonComment(L)) { 2625 Tok = L.Tokens.back().Tok; 2626 break; 2627 } 2628 } 2629 assert(Tok); 2630 ++Tok->BraceCount; 2631 } 2632 2633 if (CheckEOF && eof()) 2634 addUnwrappedLine(); 2635 2636 --Line->Level; 2637 } 2638 2639 static void markOptionalBraces(FormatToken *LeftBrace) { 2640 if (!LeftBrace) 2641 return; 2642 2643 assert(LeftBrace->is(tok::l_brace)); 2644 2645 FormatToken *RightBrace = LeftBrace->MatchingParen; 2646 if (!RightBrace) { 2647 assert(!LeftBrace->Optional); 2648 return; 2649 } 2650 2651 assert(RightBrace->is(tok::r_brace)); 2652 assert(RightBrace->MatchingParen == LeftBrace); 2653 assert(LeftBrace->Optional == RightBrace->Optional); 2654 2655 LeftBrace->Optional = true; 2656 RightBrace->Optional = true; 2657 } 2658 2659 void UnwrappedLineParser::handleAttributes() { 2660 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2661 if (FormatTok->is(TT_AttributeMacro)) 2662 nextToken(); 2663 if (FormatTok->is(tok::l_square)) 2664 handleCppAttributes(); 2665 } 2666 2667 bool UnwrappedLineParser::handleCppAttributes() { 2668 // Handle [[likely]] / [[unlikely]] attributes. 2669 assert(FormatTok->is(tok::l_square)); 2670 if (!tryToParseSimpleAttribute()) 2671 return false; 2672 parseSquare(); 2673 return true; 2674 } 2675 2676 /// Returns whether \c Tok begins a block. 2677 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2678 // FIXME: rename the function or make 2679 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2680 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2681 : Tok.is(tok::l_brace); 2682 } 2683 2684 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2685 bool KeepBraces, 2686 bool IsVerilogAssert) { 2687 assert((FormatTok->is(tok::kw_if) || 2688 (Style.isVerilog() && 2689 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2690 Keywords.kw_assume, Keywords.kw_cover))) && 2691 "'if' expected"); 2692 nextToken(); 2693 2694 if (IsVerilogAssert) { 2695 // Handle `assert #0` and `assert final`. 2696 if (FormatTok->is(Keywords.kw_verilogHash)) { 2697 nextToken(); 2698 if (FormatTok->is(tok::numeric_constant)) 2699 nextToken(); 2700 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2701 Keywords.kw_sequence)) { 2702 nextToken(); 2703 } 2704 } 2705 2706 // Handle `if !consteval`. 2707 if (FormatTok->is(tok::exclaim)) 2708 nextToken(); 2709 2710 bool KeepIfBraces = true; 2711 if (FormatTok->is(tok::kw_consteval)) { 2712 nextToken(); 2713 } else { 2714 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2715 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2716 nextToken(); 2717 if (FormatTok->is(tok::l_paren)) { 2718 FormatTok->setFinalizedType(TT_ConditionLParen); 2719 parseParens(); 2720 } 2721 } 2722 handleAttributes(); 2723 // The then action is optional in Verilog assert statements. 2724 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2725 nextToken(); 2726 addUnwrappedLine(); 2727 return nullptr; 2728 } 2729 2730 bool NeedsUnwrappedLine = false; 2731 keepAncestorBraces(); 2732 2733 FormatToken *IfLeftBrace = nullptr; 2734 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2735 2736 if (isBlockBegin(*FormatTok)) { 2737 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2738 IfLeftBrace = FormatTok; 2739 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2740 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2741 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2742 if (Style.BraceWrapping.BeforeElse) 2743 addUnwrappedLine(); 2744 else 2745 NeedsUnwrappedLine = true; 2746 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2747 addUnwrappedLine(); 2748 } else { 2749 parseUnbracedBody(); 2750 } 2751 2752 if (Style.RemoveBracesLLVM) { 2753 assert(!NestedTooDeep.empty()); 2754 KeepIfBraces = KeepIfBraces || 2755 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2756 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2757 IfBlockKind == IfStmtKind::IfElseIf; 2758 } 2759 2760 bool KeepElseBraces = KeepIfBraces; 2761 FormatToken *ElseLeftBrace = nullptr; 2762 IfStmtKind Kind = IfStmtKind::IfOnly; 2763 2764 if (FormatTok->is(tok::kw_else)) { 2765 if (Style.RemoveBracesLLVM) { 2766 NestedTooDeep.back() = false; 2767 Kind = IfStmtKind::IfElse; 2768 } 2769 nextToken(); 2770 handleAttributes(); 2771 if (isBlockBegin(*FormatTok)) { 2772 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2773 FormatTok->setFinalizedType(TT_ElseLBrace); 2774 ElseLeftBrace = FormatTok; 2775 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2776 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2777 FormatToken *IfLBrace = 2778 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2779 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2780 if (FormatTok->is(tok::kw_else)) { 2781 KeepElseBraces = KeepElseBraces || 2782 ElseBlockKind == IfStmtKind::IfOnly || 2783 ElseBlockKind == IfStmtKind::IfElseIf; 2784 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2785 KeepElseBraces = true; 2786 assert(ElseLeftBrace->MatchingParen); 2787 markOptionalBraces(ElseLeftBrace); 2788 } 2789 addUnwrappedLine(); 2790 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2791 const FormatToken *Previous = Tokens->getPreviousToken(); 2792 assert(Previous); 2793 const bool IsPrecededByComment = Previous->is(tok::comment); 2794 if (IsPrecededByComment) { 2795 addUnwrappedLine(); 2796 ++Line->Level; 2797 } 2798 bool TooDeep = true; 2799 if (Style.RemoveBracesLLVM) { 2800 Kind = IfStmtKind::IfElseIf; 2801 TooDeep = NestedTooDeep.pop_back_val(); 2802 } 2803 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2804 if (Style.RemoveBracesLLVM) 2805 NestedTooDeep.push_back(TooDeep); 2806 if (IsPrecededByComment) 2807 --Line->Level; 2808 } else { 2809 parseUnbracedBody(/*CheckEOF=*/true); 2810 } 2811 } else { 2812 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2813 if (NeedsUnwrappedLine) 2814 addUnwrappedLine(); 2815 } 2816 2817 if (!Style.RemoveBracesLLVM) 2818 return nullptr; 2819 2820 assert(!NestedTooDeep.empty()); 2821 KeepElseBraces = KeepElseBraces || 2822 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2823 NestedTooDeep.back(); 2824 2825 NestedTooDeep.pop_back(); 2826 2827 if (!KeepIfBraces && !KeepElseBraces) { 2828 markOptionalBraces(IfLeftBrace); 2829 markOptionalBraces(ElseLeftBrace); 2830 } else if (IfLeftBrace) { 2831 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2832 if (IfRightBrace) { 2833 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2834 assert(!IfLeftBrace->Optional); 2835 assert(!IfRightBrace->Optional); 2836 IfLeftBrace->MatchingParen = nullptr; 2837 IfRightBrace->MatchingParen = nullptr; 2838 } 2839 } 2840 2841 if (IfKind) 2842 *IfKind = Kind; 2843 2844 return IfLeftBrace; 2845 } 2846 2847 void UnwrappedLineParser::parseTryCatch() { 2848 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2849 nextToken(); 2850 bool NeedsUnwrappedLine = false; 2851 if (FormatTok->is(tok::colon)) { 2852 // We are in a function try block, what comes is an initializer list. 2853 nextToken(); 2854 2855 // In case identifiers were removed by clang-tidy, what might follow is 2856 // multiple commas in sequence - before the first identifier. 2857 while (FormatTok->is(tok::comma)) 2858 nextToken(); 2859 2860 while (FormatTok->is(tok::identifier)) { 2861 nextToken(); 2862 if (FormatTok->is(tok::l_paren)) 2863 parseParens(); 2864 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2865 FormatTok->is(tok::l_brace)) { 2866 do { 2867 nextToken(); 2868 } while (!FormatTok->is(tok::r_brace)); 2869 nextToken(); 2870 } 2871 2872 // In case identifiers were removed by clang-tidy, what might follow is 2873 // multiple commas in sequence - after the first identifier. 2874 while (FormatTok->is(tok::comma)) 2875 nextToken(); 2876 } 2877 } 2878 // Parse try with resource. 2879 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2880 parseParens(); 2881 2882 keepAncestorBraces(); 2883 2884 if (FormatTok->is(tok::l_brace)) { 2885 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2886 parseBlock(); 2887 if (Style.BraceWrapping.BeforeCatch) 2888 addUnwrappedLine(); 2889 else 2890 NeedsUnwrappedLine = true; 2891 } else if (!FormatTok->is(tok::kw_catch)) { 2892 // The C++ standard requires a compound-statement after a try. 2893 // If there's none, we try to assume there's a structuralElement 2894 // and try to continue. 2895 addUnwrappedLine(); 2896 ++Line->Level; 2897 parseStructuralElement(); 2898 --Line->Level; 2899 } 2900 while (true) { 2901 if (FormatTok->is(tok::at)) 2902 nextToken(); 2903 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2904 tok::kw___finally) || 2905 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2906 FormatTok->is(Keywords.kw_finally)) || 2907 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2908 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2909 break; 2910 } 2911 nextToken(); 2912 while (FormatTok->isNot(tok::l_brace)) { 2913 if (FormatTok->is(tok::l_paren)) { 2914 parseParens(); 2915 continue; 2916 } 2917 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2918 if (Style.RemoveBracesLLVM) 2919 NestedTooDeep.pop_back(); 2920 return; 2921 } 2922 nextToken(); 2923 } 2924 NeedsUnwrappedLine = false; 2925 Line->MustBeDeclaration = false; 2926 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2927 parseBlock(); 2928 if (Style.BraceWrapping.BeforeCatch) 2929 addUnwrappedLine(); 2930 else 2931 NeedsUnwrappedLine = true; 2932 } 2933 2934 if (Style.RemoveBracesLLVM) 2935 NestedTooDeep.pop_back(); 2936 2937 if (NeedsUnwrappedLine) 2938 addUnwrappedLine(); 2939 } 2940 2941 void UnwrappedLineParser::parseNamespace() { 2942 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2943 "'namespace' expected"); 2944 2945 const FormatToken &InitialToken = *FormatTok; 2946 nextToken(); 2947 if (InitialToken.is(TT_NamespaceMacro)) { 2948 parseParens(); 2949 } else { 2950 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2951 tok::l_square, tok::period, tok::l_paren) || 2952 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 2953 if (FormatTok->is(tok::l_square)) 2954 parseSquare(); 2955 else if (FormatTok->is(tok::l_paren)) 2956 parseParens(); 2957 else 2958 nextToken(); 2959 } 2960 } 2961 if (FormatTok->is(tok::l_brace)) { 2962 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2963 addUnwrappedLine(); 2964 2965 unsigned AddLevels = 2966 Style.NamespaceIndentation == FormatStyle::NI_All || 2967 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2968 DeclarationScopeStack.size() > 1) 2969 ? 1u 2970 : 0u; 2971 bool ManageWhitesmithsBraces = 2972 AddLevels == 0u && 2973 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2974 2975 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2976 // the whole block. 2977 if (ManageWhitesmithsBraces) 2978 ++Line->Level; 2979 2980 // Munch the semicolon after a namespace. This is more common than one would 2981 // think. Putting the semicolon into its own line is very ugly. 2982 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 2983 /*KeepBraces=*/true, /*IfKind=*/nullptr, 2984 ManageWhitesmithsBraces); 2985 2986 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2987 2988 if (ManageWhitesmithsBraces) 2989 --Line->Level; 2990 } 2991 // FIXME: Add error handling. 2992 } 2993 2994 void UnwrappedLineParser::parseNew() { 2995 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2996 nextToken(); 2997 2998 if (Style.isCSharp()) { 2999 do { 3000 // Handle constructor invocation, e.g. `new(field: value)`. 3001 if (FormatTok->is(tok::l_paren)) 3002 parseParens(); 3003 3004 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3005 if (FormatTok->is(tok::l_brace)) 3006 parseBracedList(); 3007 3008 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3009 return; 3010 3011 nextToken(); 3012 } while (!eof()); 3013 } 3014 3015 if (Style.Language != FormatStyle::LK_Java) 3016 return; 3017 3018 // In Java, we can parse everything up to the parens, which aren't optional. 3019 do { 3020 // There should not be a ;, { or } before the new's open paren. 3021 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3022 return; 3023 3024 // Consume the parens. 3025 if (FormatTok->is(tok::l_paren)) { 3026 parseParens(); 3027 3028 // If there is a class body of an anonymous class, consume that as child. 3029 if (FormatTok->is(tok::l_brace)) 3030 parseChildBlock(); 3031 return; 3032 } 3033 nextToken(); 3034 } while (!eof()); 3035 } 3036 3037 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3038 keepAncestorBraces(); 3039 3040 if (isBlockBegin(*FormatTok)) { 3041 if (!KeepBraces) 3042 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3043 FormatToken *LeftBrace = FormatTok; 3044 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3045 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3046 /*MunchSemi=*/true, KeepBraces); 3047 if (!KeepBraces) { 3048 assert(!NestedTooDeep.empty()); 3049 if (!NestedTooDeep.back()) 3050 markOptionalBraces(LeftBrace); 3051 } 3052 if (WrapRightBrace) 3053 addUnwrappedLine(); 3054 } else { 3055 parseUnbracedBody(); 3056 } 3057 3058 if (!KeepBraces) 3059 NestedTooDeep.pop_back(); 3060 } 3061 3062 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3063 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3064 (Style.isVerilog() && 3065 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3066 Keywords.kw_always_ff, Keywords.kw_always_latch, 3067 Keywords.kw_final, Keywords.kw_initial, 3068 Keywords.kw_foreach, Keywords.kw_forever, 3069 Keywords.kw_repeat))) && 3070 "'for', 'while' or foreach macro expected"); 3071 const bool KeepBraces = !Style.RemoveBracesLLVM || 3072 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3073 3074 nextToken(); 3075 // JS' for await ( ... 3076 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3077 nextToken(); 3078 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 3079 nextToken(); 3080 if (HasParens && FormatTok->is(tok::l_paren)) { 3081 // The type is only set for Verilog basically because we were afraid to 3082 // change the existing behavior for loops. See the discussion on D121756 for 3083 // details. 3084 if (Style.isVerilog()) 3085 FormatTok->setFinalizedType(TT_ConditionLParen); 3086 parseParens(); 3087 } 3088 // Event control. 3089 if (Style.isVerilog()) 3090 parseVerilogSensitivityList(); 3091 3092 handleAttributes(); 3093 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3094 } 3095 3096 void UnwrappedLineParser::parseDoWhile() { 3097 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3098 nextToken(); 3099 3100 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3101 3102 // FIXME: Add error handling. 3103 if (!FormatTok->is(tok::kw_while)) { 3104 addUnwrappedLine(); 3105 return; 3106 } 3107 3108 // If in Whitesmiths mode, the line with the while() needs to be indented 3109 // to the same level as the block. 3110 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3111 ++Line->Level; 3112 3113 nextToken(); 3114 parseStructuralElement(); 3115 } 3116 3117 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3118 nextToken(); 3119 unsigned OldLineLevel = Line->Level; 3120 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3121 --Line->Level; 3122 if (LeftAlignLabel) 3123 Line->Level = 0; 3124 3125 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3126 FormatTok->is(tok::l_brace)) { 3127 3128 CompoundStatementIndenter Indenter(this, Line->Level, 3129 Style.BraceWrapping.AfterCaseLabel, 3130 Style.BraceWrapping.IndentBraces); 3131 parseBlock(); 3132 if (FormatTok->is(tok::kw_break)) { 3133 if (Style.BraceWrapping.AfterControlStatement == 3134 FormatStyle::BWACS_Always) { 3135 addUnwrappedLine(); 3136 if (!Style.IndentCaseBlocks && 3137 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3138 ++Line->Level; 3139 } 3140 } 3141 parseStructuralElement(); 3142 } 3143 addUnwrappedLine(); 3144 } else { 3145 if (FormatTok->is(tok::semi)) 3146 nextToken(); 3147 addUnwrappedLine(); 3148 } 3149 Line->Level = OldLineLevel; 3150 if (FormatTok->isNot(tok::l_brace)) { 3151 parseStructuralElement(); 3152 addUnwrappedLine(); 3153 } 3154 } 3155 3156 void UnwrappedLineParser::parseCaseLabel() { 3157 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3158 3159 // FIXME: fix handling of complex expressions here. 3160 do { 3161 nextToken(); 3162 if (FormatTok->is(tok::colon)) { 3163 FormatTok->setFinalizedType(TT_CaseLabelColon); 3164 break; 3165 } 3166 } while (!eof()); 3167 parseLabel(); 3168 } 3169 3170 void UnwrappedLineParser::parseSwitch() { 3171 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3172 nextToken(); 3173 if (FormatTok->is(tok::l_paren)) 3174 parseParens(); 3175 3176 keepAncestorBraces(); 3177 3178 if (FormatTok->is(tok::l_brace)) { 3179 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3180 parseBlock(); 3181 addUnwrappedLine(); 3182 } else { 3183 addUnwrappedLine(); 3184 ++Line->Level; 3185 parseStructuralElement(); 3186 --Line->Level; 3187 } 3188 3189 if (Style.RemoveBracesLLVM) 3190 NestedTooDeep.pop_back(); 3191 } 3192 3193 // Operators that can follow a C variable. 3194 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 3195 switch (kind) { 3196 case tok::ampamp: 3197 case tok::ampequal: 3198 case tok::arrow: 3199 case tok::caret: 3200 case tok::caretequal: 3201 case tok::comma: 3202 case tok::ellipsis: 3203 case tok::equal: 3204 case tok::equalequal: 3205 case tok::exclaim: 3206 case tok::exclaimequal: 3207 case tok::greater: 3208 case tok::greaterequal: 3209 case tok::greatergreater: 3210 case tok::greatergreaterequal: 3211 case tok::l_paren: 3212 case tok::l_square: 3213 case tok::less: 3214 case tok::lessequal: 3215 case tok::lessless: 3216 case tok::lesslessequal: 3217 case tok::minus: 3218 case tok::minusequal: 3219 case tok::minusminus: 3220 case tok::percent: 3221 case tok::percentequal: 3222 case tok::period: 3223 case tok::pipe: 3224 case tok::pipeequal: 3225 case tok::pipepipe: 3226 case tok::plus: 3227 case tok::plusequal: 3228 case tok::plusplus: 3229 case tok::question: 3230 case tok::r_brace: 3231 case tok::r_paren: 3232 case tok::r_square: 3233 case tok::semi: 3234 case tok::slash: 3235 case tok::slashequal: 3236 case tok::star: 3237 case tok::starequal: 3238 return true; 3239 default: 3240 return false; 3241 } 3242 } 3243 3244 void UnwrappedLineParser::parseAccessSpecifier() { 3245 FormatToken *AccessSpecifierCandidate = FormatTok; 3246 nextToken(); 3247 // Understand Qt's slots. 3248 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3249 nextToken(); 3250 // Otherwise, we don't know what it is, and we'd better keep the next token. 3251 if (FormatTok->is(tok::colon)) { 3252 nextToken(); 3253 addUnwrappedLine(); 3254 } else if (!FormatTok->is(tok::coloncolon) && 3255 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3256 // Not a variable name nor namespace name. 3257 addUnwrappedLine(); 3258 } else if (AccessSpecifierCandidate) { 3259 // Consider the access specifier to be a C identifier. 3260 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3261 } 3262 } 3263 3264 /// \brief Parses a requires, decides if it is a clause or an expression. 3265 /// \pre The current token has to be the requires keyword. 3266 /// \returns true if it parsed a clause. 3267 bool clang::format::UnwrappedLineParser::parseRequires() { 3268 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3269 auto RequiresToken = FormatTok; 3270 3271 // We try to guess if it is a requires clause, or a requires expression. For 3272 // that we first consume the keyword and check the next token. 3273 nextToken(); 3274 3275 switch (FormatTok->Tok.getKind()) { 3276 case tok::l_brace: 3277 // This can only be an expression, never a clause. 3278 parseRequiresExpression(RequiresToken); 3279 return false; 3280 case tok::l_paren: 3281 // Clauses and expression can start with a paren, it's unclear what we have. 3282 break; 3283 default: 3284 // All other tokens can only be a clause. 3285 parseRequiresClause(RequiresToken); 3286 return true; 3287 } 3288 3289 // Looking forward we would have to decide if there are function declaration 3290 // like arguments to the requires expression: 3291 // requires (T t) { 3292 // Or there is a constraint expression for the requires clause: 3293 // requires (C<T> && ... 3294 3295 // But first let's look behind. 3296 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3297 3298 if (!PreviousNonComment || 3299 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3300 // If there is no token, or an expression left brace, we are a requires 3301 // clause within a requires expression. 3302 parseRequiresClause(RequiresToken); 3303 return true; 3304 } 3305 3306 switch (PreviousNonComment->Tok.getKind()) { 3307 case tok::greater: 3308 case tok::r_paren: 3309 case tok::kw_noexcept: 3310 case tok::kw_const: 3311 // This is a requires clause. 3312 parseRequiresClause(RequiresToken); 3313 return true; 3314 case tok::amp: 3315 case tok::ampamp: { 3316 // This can be either: 3317 // if (... && requires (T t) ...) 3318 // Or 3319 // void member(...) && requires (C<T> ... 3320 // We check the one token before that for a const: 3321 // void member(...) const && requires (C<T> ... 3322 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3323 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3324 parseRequiresClause(RequiresToken); 3325 return true; 3326 } 3327 break; 3328 } 3329 default: 3330 if (PreviousNonComment->isTypeOrIdentifier()) { 3331 // This is a requires clause. 3332 parseRequiresClause(RequiresToken); 3333 return true; 3334 } 3335 // It's an expression. 3336 parseRequiresExpression(RequiresToken); 3337 return false; 3338 } 3339 3340 // Now we look forward and try to check if the paren content is a parameter 3341 // list. The parameters can be cv-qualified and contain references or 3342 // pointers. 3343 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3344 // of stuff: typename, const, *, &, &&, ::, identifiers. 3345 3346 unsigned StoredPosition = Tokens->getPosition(); 3347 FormatToken *NextToken = Tokens->getNextToken(); 3348 int Lookahead = 0; 3349 auto PeekNext = [&Lookahead, &NextToken, this] { 3350 ++Lookahead; 3351 NextToken = Tokens->getNextToken(); 3352 }; 3353 3354 bool FoundType = false; 3355 bool LastWasColonColon = false; 3356 int OpenAngles = 0; 3357 3358 for (; Lookahead < 50; PeekNext()) { 3359 switch (NextToken->Tok.getKind()) { 3360 case tok::kw_volatile: 3361 case tok::kw_const: 3362 case tok::comma: 3363 FormatTok = Tokens->setPosition(StoredPosition); 3364 parseRequiresExpression(RequiresToken); 3365 return false; 3366 case tok::r_paren: 3367 case tok::pipepipe: 3368 FormatTok = Tokens->setPosition(StoredPosition); 3369 parseRequiresClause(RequiresToken); 3370 return true; 3371 case tok::eof: 3372 // Break out of the loop. 3373 Lookahead = 50; 3374 break; 3375 case tok::coloncolon: 3376 LastWasColonColon = true; 3377 break; 3378 case tok::identifier: 3379 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3380 FormatTok = Tokens->setPosition(StoredPosition); 3381 parseRequiresExpression(RequiresToken); 3382 return false; 3383 } 3384 FoundType = true; 3385 LastWasColonColon = false; 3386 break; 3387 case tok::less: 3388 ++OpenAngles; 3389 break; 3390 case tok::greater: 3391 --OpenAngles; 3392 break; 3393 default: 3394 if (NextToken->isSimpleTypeSpecifier()) { 3395 FormatTok = Tokens->setPosition(StoredPosition); 3396 parseRequiresExpression(RequiresToken); 3397 return false; 3398 } 3399 break; 3400 } 3401 } 3402 // This seems to be a complicated expression, just assume it's a clause. 3403 FormatTok = Tokens->setPosition(StoredPosition); 3404 parseRequiresClause(RequiresToken); 3405 return true; 3406 } 3407 3408 /// \brief Parses a requires clause. 3409 /// \param RequiresToken The requires keyword token, which starts this clause. 3410 /// \pre We need to be on the next token after the requires keyword. 3411 /// \sa parseRequiresExpression 3412 /// 3413 /// Returns if it either has finished parsing the clause, or it detects, that 3414 /// the clause is incorrect. 3415 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3416 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3417 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3418 3419 // If there is no previous token, we are within a requires expression, 3420 // otherwise we will always have the template or function declaration in front 3421 // of it. 3422 bool InRequiresExpression = 3423 !RequiresToken->Previous || 3424 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3425 3426 RequiresToken->setFinalizedType(InRequiresExpression 3427 ? TT_RequiresClauseInARequiresExpression 3428 : TT_RequiresClause); 3429 3430 // NOTE: parseConstraintExpression is only ever called from this function. 3431 // It could be inlined into here. 3432 parseConstraintExpression(); 3433 3434 if (!InRequiresExpression) 3435 FormatTok->Previous->ClosesRequiresClause = true; 3436 } 3437 3438 /// \brief Parses a requires expression. 3439 /// \param RequiresToken The requires keyword token, which starts this clause. 3440 /// \pre We need to be on the next token after the requires keyword. 3441 /// \sa parseRequiresClause 3442 /// 3443 /// Returns if it either has finished parsing the expression, or it detects, 3444 /// that the expression is incorrect. 3445 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3446 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3447 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3448 3449 RequiresToken->setFinalizedType(TT_RequiresExpression); 3450 3451 if (FormatTok->is(tok::l_paren)) { 3452 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3453 parseParens(); 3454 } 3455 3456 if (FormatTok->is(tok::l_brace)) { 3457 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3458 parseChildBlock(/*CanContainBracedList=*/false, 3459 /*NextLBracesType=*/TT_CompoundRequirementLBrace); 3460 } 3461 } 3462 3463 /// \brief Parses a constraint expression. 3464 /// 3465 /// This is the body of a requires clause. It returns, when the parsing is 3466 /// complete, or the expression is incorrect. 3467 void UnwrappedLineParser::parseConstraintExpression() { 3468 // The special handling for lambdas is needed since tryToParseLambda() eats a 3469 // token and if a requires expression is the last part of a requires clause 3470 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3471 // not set on the correct token. Thus we need to be aware if we even expect a 3472 // lambda to be possible. 3473 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3474 bool LambdaNextTimeAllowed = true; 3475 3476 // Within lambda declarations, it is permitted to put a requires clause after 3477 // its template parameter list, which would place the requires clause right 3478 // before the parentheses of the parameters of the lambda declaration. Thus, 3479 // we track if we expect to see grouping parentheses at all. 3480 // Without this check, `requires foo<T> (T t)` in the below example would be 3481 // seen as the whole requires clause, accidentally eating the parameters of 3482 // the lambda. 3483 // [&]<typename T> requires foo<T> (T t) { ... }; 3484 bool TopLevelParensAllowed = true; 3485 3486 do { 3487 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3488 3489 switch (FormatTok->Tok.getKind()) { 3490 case tok::kw_requires: { 3491 auto RequiresToken = FormatTok; 3492 nextToken(); 3493 parseRequiresExpression(RequiresToken); 3494 break; 3495 } 3496 3497 case tok::l_paren: 3498 if (!TopLevelParensAllowed) 3499 return; 3500 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3501 TopLevelParensAllowed = false; 3502 break; 3503 3504 case tok::l_square: 3505 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3506 return; 3507 break; 3508 3509 case tok::kw_const: 3510 case tok::semi: 3511 case tok::kw_class: 3512 case tok::kw_struct: 3513 case tok::kw_union: 3514 return; 3515 3516 case tok::l_brace: 3517 // Potential function body. 3518 return; 3519 3520 case tok::ampamp: 3521 case tok::pipepipe: 3522 FormatTok->setFinalizedType(TT_BinaryOperator); 3523 nextToken(); 3524 LambdaNextTimeAllowed = true; 3525 TopLevelParensAllowed = true; 3526 break; 3527 3528 case tok::comma: 3529 case tok::comment: 3530 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3531 nextToken(); 3532 break; 3533 3534 case tok::kw_sizeof: 3535 case tok::greater: 3536 case tok::greaterequal: 3537 case tok::greatergreater: 3538 case tok::less: 3539 case tok::lessequal: 3540 case tok::lessless: 3541 case tok::equalequal: 3542 case tok::exclaim: 3543 case tok::exclaimequal: 3544 case tok::plus: 3545 case tok::minus: 3546 case tok::star: 3547 case tok::slash: 3548 LambdaNextTimeAllowed = true; 3549 TopLevelParensAllowed = true; 3550 // Just eat them. 3551 nextToken(); 3552 break; 3553 3554 case tok::numeric_constant: 3555 case tok::coloncolon: 3556 case tok::kw_true: 3557 case tok::kw_false: 3558 TopLevelParensAllowed = false; 3559 // Just eat them. 3560 nextToken(); 3561 break; 3562 3563 case tok::kw_static_cast: 3564 case tok::kw_const_cast: 3565 case tok::kw_reinterpret_cast: 3566 case tok::kw_dynamic_cast: 3567 nextToken(); 3568 if (!FormatTok->is(tok::less)) 3569 return; 3570 3571 nextToken(); 3572 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3573 /*ClosingBraceKind=*/tok::greater); 3574 break; 3575 3576 default: 3577 if (!FormatTok->Tok.getIdentifierInfo()) { 3578 // Identifiers are part of the default case, we check for more then 3579 // tok::identifier to handle builtin type traits. 3580 return; 3581 } 3582 3583 // We need to differentiate identifiers for a template deduction guide, 3584 // variables, or function return types (the constraint expression has 3585 // ended before that), and basically all other cases. But it's easier to 3586 // check the other way around. 3587 assert(FormatTok->Previous); 3588 switch (FormatTok->Previous->Tok.getKind()) { 3589 case tok::coloncolon: // Nested identifier. 3590 case tok::ampamp: // Start of a function or variable for the 3591 case tok::pipepipe: // constraint expression. (binary) 3592 case tok::exclaim: // The same as above, but unary. 3593 case tok::kw_requires: // Initial identifier of a requires clause. 3594 case tok::equal: // Initial identifier of a concept declaration. 3595 break; 3596 default: 3597 return; 3598 } 3599 3600 // Read identifier with optional template declaration. 3601 nextToken(); 3602 if (FormatTok->is(tok::less)) { 3603 nextToken(); 3604 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3605 /*ClosingBraceKind=*/tok::greater); 3606 } 3607 TopLevelParensAllowed = false; 3608 break; 3609 } 3610 } while (!eof()); 3611 } 3612 3613 bool UnwrappedLineParser::parseEnum() { 3614 const FormatToken &InitialToken = *FormatTok; 3615 3616 // Won't be 'enum' for NS_ENUMs. 3617 if (FormatTok->is(tok::kw_enum)) 3618 nextToken(); 3619 3620 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3621 // declarations. An "enum" keyword followed by a colon would be a syntax 3622 // error and thus assume it is just an identifier. 3623 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3624 return false; 3625 3626 // In protobuf, "enum" can be used as a field name. 3627 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3628 return false; 3629 3630 // Eat up enum class ... 3631 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3632 nextToken(); 3633 3634 while (FormatTok->Tok.getIdentifierInfo() || 3635 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3636 tok::greater, tok::comma, tok::question, 3637 tok::l_square, tok::r_square)) { 3638 if (Style.isVerilog()) { 3639 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3640 nextToken(); 3641 // In Verilog the base type can have dimensions. 3642 while (FormatTok->is(tok::l_square)) 3643 parseSquare(); 3644 } else { 3645 nextToken(); 3646 } 3647 // We can have macros or attributes in between 'enum' and the enum name. 3648 if (FormatTok->is(tok::l_paren)) 3649 parseParens(); 3650 if (FormatTok->is(TT_AttributeSquare)) { 3651 parseSquare(); 3652 // Consume the closing TT_AttributeSquare. 3653 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3654 nextToken(); 3655 } 3656 if (FormatTok->is(tok::identifier)) { 3657 nextToken(); 3658 // If there are two identifiers in a row, this is likely an elaborate 3659 // return type. In Java, this can be "implements", etc. 3660 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3661 return false; 3662 } 3663 } 3664 3665 // Just a declaration or something is wrong. 3666 if (FormatTok->isNot(tok::l_brace)) 3667 return true; 3668 FormatTok->setFinalizedType(TT_EnumLBrace); 3669 FormatTok->setBlockKind(BK_Block); 3670 3671 if (Style.Language == FormatStyle::LK_Java) { 3672 // Java enums are different. 3673 parseJavaEnumBody(); 3674 return true; 3675 } 3676 if (Style.Language == FormatStyle::LK_Proto) { 3677 parseBlock(/*MustBeDeclaration=*/true); 3678 return true; 3679 } 3680 3681 if (!Style.AllowShortEnumsOnASingleLine && 3682 ShouldBreakBeforeBrace(Style, InitialToken)) { 3683 addUnwrappedLine(); 3684 } 3685 // Parse enum body. 3686 nextToken(); 3687 if (!Style.AllowShortEnumsOnASingleLine) { 3688 addUnwrappedLine(); 3689 Line->Level += 1; 3690 } 3691 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 3692 /*IsEnum=*/true); 3693 if (!Style.AllowShortEnumsOnASingleLine) 3694 Line->Level -= 1; 3695 if (HasError) { 3696 if (FormatTok->is(tok::semi)) 3697 nextToken(); 3698 addUnwrappedLine(); 3699 } 3700 return true; 3701 3702 // There is no addUnwrappedLine() here so that we fall through to parsing a 3703 // structural element afterwards. Thus, in "enum A {} n, m;", 3704 // "} n, m;" will end up in one unwrapped line. 3705 } 3706 3707 bool UnwrappedLineParser::parseStructLike() { 3708 // parseRecord falls through and does not yet add an unwrapped line as a 3709 // record declaration or definition can start a structural element. 3710 parseRecord(); 3711 // This does not apply to Java, JavaScript and C#. 3712 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3713 Style.isCSharp()) { 3714 if (FormatTok->is(tok::semi)) 3715 nextToken(); 3716 addUnwrappedLine(); 3717 return true; 3718 } 3719 return false; 3720 } 3721 3722 namespace { 3723 // A class used to set and restore the Token position when peeking 3724 // ahead in the token source. 3725 class ScopedTokenPosition { 3726 unsigned StoredPosition; 3727 FormatTokenSource *Tokens; 3728 3729 public: 3730 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3731 assert(Tokens && "Tokens expected to not be null"); 3732 StoredPosition = Tokens->getPosition(); 3733 } 3734 3735 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3736 }; 3737 } // namespace 3738 3739 // Look to see if we have [[ by looking ahead, if 3740 // its not then rewind to the original position. 3741 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3742 ScopedTokenPosition AutoPosition(Tokens); 3743 FormatToken *Tok = Tokens->getNextToken(); 3744 // We already read the first [ check for the second. 3745 if (!Tok->is(tok::l_square)) 3746 return false; 3747 // Double check that the attribute is just something 3748 // fairly simple. 3749 while (Tok->isNot(tok::eof)) { 3750 if (Tok->is(tok::r_square)) 3751 break; 3752 Tok = Tokens->getNextToken(); 3753 } 3754 if (Tok->is(tok::eof)) 3755 return false; 3756 Tok = Tokens->getNextToken(); 3757 if (!Tok->is(tok::r_square)) 3758 return false; 3759 Tok = Tokens->getNextToken(); 3760 if (Tok->is(tok::semi)) 3761 return false; 3762 return true; 3763 } 3764 3765 void UnwrappedLineParser::parseJavaEnumBody() { 3766 assert(FormatTok->is(tok::l_brace)); 3767 const FormatToken *OpeningBrace = FormatTok; 3768 3769 // Determine whether the enum is simple, i.e. does not have a semicolon or 3770 // constants with class bodies. Simple enums can be formatted like braced 3771 // lists, contracted to a single line, etc. 3772 unsigned StoredPosition = Tokens->getPosition(); 3773 bool IsSimple = true; 3774 FormatToken *Tok = Tokens->getNextToken(); 3775 while (!Tok->is(tok::eof)) { 3776 if (Tok->is(tok::r_brace)) 3777 break; 3778 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3779 IsSimple = false; 3780 break; 3781 } 3782 // FIXME: This will also mark enums with braces in the arguments to enum 3783 // constants as "not simple". This is probably fine in practice, though. 3784 Tok = Tokens->getNextToken(); 3785 } 3786 FormatTok = Tokens->setPosition(StoredPosition); 3787 3788 if (IsSimple) { 3789 nextToken(); 3790 parseBracedList(); 3791 addUnwrappedLine(); 3792 return; 3793 } 3794 3795 // Parse the body of a more complex enum. 3796 // First add a line for everything up to the "{". 3797 nextToken(); 3798 addUnwrappedLine(); 3799 ++Line->Level; 3800 3801 // Parse the enum constants. 3802 while (!eof()) { 3803 if (FormatTok->is(tok::l_brace)) { 3804 // Parse the constant's class body. 3805 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3806 /*MunchSemi=*/false); 3807 } else if (FormatTok->is(tok::l_paren)) { 3808 parseParens(); 3809 } else if (FormatTok->is(tok::comma)) { 3810 nextToken(); 3811 addUnwrappedLine(); 3812 } else if (FormatTok->is(tok::semi)) { 3813 nextToken(); 3814 addUnwrappedLine(); 3815 break; 3816 } else if (FormatTok->is(tok::r_brace)) { 3817 addUnwrappedLine(); 3818 break; 3819 } else { 3820 nextToken(); 3821 } 3822 } 3823 3824 // Parse the class body after the enum's ";" if any. 3825 parseLevel(OpeningBrace); 3826 nextToken(); 3827 --Line->Level; 3828 addUnwrappedLine(); 3829 } 3830 3831 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3832 const FormatToken &InitialToken = *FormatTok; 3833 nextToken(); 3834 3835 // The actual identifier can be a nested name specifier, and in macros 3836 // it is often token-pasted. 3837 // An [[attribute]] can be before the identifier. 3838 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3839 tok::kw___attribute, tok::kw___declspec, 3840 tok::kw_alignas, tok::l_square) || 3841 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3842 FormatTok->isOneOf(tok::period, tok::comma))) { 3843 if (Style.isJavaScript() && 3844 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3845 // JavaScript/TypeScript supports inline object types in 3846 // extends/implements positions: 3847 // class Foo implements {bar: number} { } 3848 nextToken(); 3849 if (FormatTok->is(tok::l_brace)) { 3850 tryToParseBracedList(); 3851 continue; 3852 } 3853 } 3854 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 3855 continue; 3856 bool IsNonMacroIdentifier = 3857 FormatTok->is(tok::identifier) && 3858 FormatTok->TokenText != FormatTok->TokenText.upper(); 3859 nextToken(); 3860 // We can have macros in between 'class' and the class name. 3861 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren)) 3862 parseParens(); 3863 } 3864 3865 // Note that parsing away template declarations here leads to incorrectly 3866 // accepting function declarations as record declarations. 3867 // In general, we cannot solve this problem. Consider: 3868 // class A<int> B() {} 3869 // which can be a function definition or a class definition when B() is a 3870 // macro. If we find enough real-world cases where this is a problem, we 3871 // can parse for the 'template' keyword in the beginning of the statement, 3872 // and thus rule out the record production in case there is no template 3873 // (this would still leave us with an ambiguity between template function 3874 // and class declarations). 3875 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3876 do { 3877 if (FormatTok->is(tok::l_brace)) { 3878 calculateBraceTypes(/*ExpectClassBody=*/true); 3879 if (!tryToParseBracedList()) 3880 break; 3881 } 3882 if (FormatTok->is(tok::l_square)) { 3883 FormatToken *Previous = FormatTok->Previous; 3884 if (!Previous || 3885 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3886 // Don't try parsing a lambda if we had a closing parenthesis before, 3887 // it was probably a pointer to an array: int (*)[]. 3888 if (!tryToParseLambda()) 3889 continue; 3890 } else { 3891 parseSquare(); 3892 continue; 3893 } 3894 } 3895 if (FormatTok->is(tok::semi)) 3896 return; 3897 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3898 addUnwrappedLine(); 3899 nextToken(); 3900 parseCSharpGenericTypeConstraint(); 3901 break; 3902 } 3903 nextToken(); 3904 } while (!eof()); 3905 } 3906 3907 auto GetBraceType = [](const FormatToken &RecordTok) { 3908 switch (RecordTok.Tok.getKind()) { 3909 case tok::kw_class: 3910 return TT_ClassLBrace; 3911 case tok::kw_struct: 3912 return TT_StructLBrace; 3913 case tok::kw_union: 3914 return TT_UnionLBrace; 3915 default: 3916 // Useful for e.g. interface. 3917 return TT_RecordLBrace; 3918 } 3919 }; 3920 if (FormatTok->is(tok::l_brace)) { 3921 FormatTok->setFinalizedType(GetBraceType(InitialToken)); 3922 if (ParseAsExpr) { 3923 parseChildBlock(); 3924 } else { 3925 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3926 addUnwrappedLine(); 3927 3928 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3929 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3930 } 3931 } 3932 // There is no addUnwrappedLine() here so that we fall through to parsing a 3933 // structural element afterwards. Thus, in "class A {} n, m;", 3934 // "} n, m;" will end up in one unwrapped line. 3935 } 3936 3937 void UnwrappedLineParser::parseObjCMethod() { 3938 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 3939 "'(' or identifier expected."); 3940 do { 3941 if (FormatTok->is(tok::semi)) { 3942 nextToken(); 3943 addUnwrappedLine(); 3944 return; 3945 } else if (FormatTok->is(tok::l_brace)) { 3946 if (Style.BraceWrapping.AfterFunction) 3947 addUnwrappedLine(); 3948 parseBlock(); 3949 addUnwrappedLine(); 3950 return; 3951 } else { 3952 nextToken(); 3953 } 3954 } while (!eof()); 3955 } 3956 3957 void UnwrappedLineParser::parseObjCProtocolList() { 3958 assert(FormatTok->is(tok::less) && "'<' expected."); 3959 do { 3960 nextToken(); 3961 // Early exit in case someone forgot a close angle. 3962 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3963 FormatTok->isObjCAtKeyword(tok::objc_end)) { 3964 return; 3965 } 3966 } while (!eof() && FormatTok->isNot(tok::greater)); 3967 nextToken(); // Skip '>'. 3968 } 3969 3970 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3971 do { 3972 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 3973 nextToken(); 3974 addUnwrappedLine(); 3975 break; 3976 } 3977 if (FormatTok->is(tok::l_brace)) { 3978 parseBlock(); 3979 // In ObjC interfaces, nothing should be following the "}". 3980 addUnwrappedLine(); 3981 } else if (FormatTok->is(tok::r_brace)) { 3982 // Ignore stray "}". parseStructuralElement doesn't consume them. 3983 nextToken(); 3984 addUnwrappedLine(); 3985 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3986 nextToken(); 3987 parseObjCMethod(); 3988 } else { 3989 parseStructuralElement(); 3990 } 3991 } while (!eof()); 3992 } 3993 3994 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3995 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3996 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3997 nextToken(); 3998 nextToken(); // interface name 3999 4000 // @interface can be followed by a lightweight generic 4001 // specialization list, then either a base class or a category. 4002 if (FormatTok->is(tok::less)) 4003 parseObjCLightweightGenerics(); 4004 if (FormatTok->is(tok::colon)) { 4005 nextToken(); 4006 nextToken(); // base class name 4007 // The base class can also have lightweight generics applied to it. 4008 if (FormatTok->is(tok::less)) 4009 parseObjCLightweightGenerics(); 4010 } else if (FormatTok->is(tok::l_paren)) { 4011 // Skip category, if present. 4012 parseParens(); 4013 } 4014 4015 if (FormatTok->is(tok::less)) 4016 parseObjCProtocolList(); 4017 4018 if (FormatTok->is(tok::l_brace)) { 4019 if (Style.BraceWrapping.AfterObjCDeclaration) 4020 addUnwrappedLine(); 4021 parseBlock(/*MustBeDeclaration=*/true); 4022 } 4023 4024 // With instance variables, this puts '}' on its own line. Without instance 4025 // variables, this ends the @interface line. 4026 addUnwrappedLine(); 4027 4028 parseObjCUntilAtEnd(); 4029 } 4030 4031 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4032 assert(FormatTok->is(tok::less)); 4033 // Unlike protocol lists, generic parameterizations support 4034 // nested angles: 4035 // 4036 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4037 // NSObject <NSCopying, NSSecureCoding> 4038 // 4039 // so we need to count how many open angles we have left. 4040 unsigned NumOpenAngles = 1; 4041 do { 4042 nextToken(); 4043 // Early exit in case someone forgot a close angle. 4044 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4045 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4046 break; 4047 } 4048 if (FormatTok->is(tok::less)) { 4049 ++NumOpenAngles; 4050 } else if (FormatTok->is(tok::greater)) { 4051 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4052 --NumOpenAngles; 4053 } 4054 } while (!eof() && NumOpenAngles != 0); 4055 nextToken(); // Skip '>'. 4056 } 4057 4058 // Returns true for the declaration/definition form of @protocol, 4059 // false for the expression form. 4060 bool UnwrappedLineParser::parseObjCProtocol() { 4061 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4062 nextToken(); 4063 4064 if (FormatTok->is(tok::l_paren)) { 4065 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4066 return false; 4067 } 4068 4069 // The definition/declaration form, 4070 // @protocol Foo 4071 // - (int)someMethod; 4072 // @end 4073 4074 nextToken(); // protocol name 4075 4076 if (FormatTok->is(tok::less)) 4077 parseObjCProtocolList(); 4078 4079 // Check for protocol declaration. 4080 if (FormatTok->is(tok::semi)) { 4081 nextToken(); 4082 addUnwrappedLine(); 4083 return true; 4084 } 4085 4086 addUnwrappedLine(); 4087 parseObjCUntilAtEnd(); 4088 return true; 4089 } 4090 4091 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4092 bool IsImport = FormatTok->is(Keywords.kw_import); 4093 assert(IsImport || FormatTok->is(tok::kw_export)); 4094 nextToken(); 4095 4096 // Consume the "default" in "export default class/function". 4097 if (FormatTok->is(tok::kw_default)) 4098 nextToken(); 4099 4100 // Consume "async function", "function" and "default function", so that these 4101 // get parsed as free-standing JS functions, i.e. do not require a trailing 4102 // semicolon. 4103 if (FormatTok->is(Keywords.kw_async)) 4104 nextToken(); 4105 if (FormatTok->is(Keywords.kw_function)) { 4106 nextToken(); 4107 return; 4108 } 4109 4110 // For imports, `export *`, `export {...}`, consume the rest of the line up 4111 // to the terminating `;`. For everything else, just return and continue 4112 // parsing the structural element, i.e. the declaration or expression for 4113 // `export default`. 4114 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4115 !FormatTok->isStringLiteral() && 4116 !(FormatTok->is(Keywords.kw_type) && 4117 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4118 return; 4119 } 4120 4121 while (!eof()) { 4122 if (FormatTok->is(tok::semi)) 4123 return; 4124 if (Line->Tokens.empty()) { 4125 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4126 // import statement should terminate. 4127 return; 4128 } 4129 if (FormatTok->is(tok::l_brace)) { 4130 FormatTok->setBlockKind(BK_Block); 4131 nextToken(); 4132 parseBracedList(); 4133 } else { 4134 nextToken(); 4135 } 4136 } 4137 } 4138 4139 void UnwrappedLineParser::parseStatementMacro() { 4140 nextToken(); 4141 if (FormatTok->is(tok::l_paren)) 4142 parseParens(); 4143 if (FormatTok->is(tok::semi)) 4144 nextToken(); 4145 addUnwrappedLine(); 4146 } 4147 4148 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4149 // consume things like a::`b.c[d:e] or a::* 4150 while (true) { 4151 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4152 tok::coloncolon, tok::hash) || 4153 Keywords.isVerilogIdentifier(*FormatTok)) { 4154 nextToken(); 4155 } else if (FormatTok->is(tok::l_square)) { 4156 parseSquare(); 4157 } else { 4158 break; 4159 } 4160 } 4161 } 4162 4163 void UnwrappedLineParser::parseVerilogSensitivityList() { 4164 if (!FormatTok->is(tok::at)) 4165 return; 4166 nextToken(); 4167 // A block event expression has 2 at signs. 4168 if (FormatTok->is(tok::at)) 4169 nextToken(); 4170 switch (FormatTok->Tok.getKind()) { 4171 case tok::star: 4172 nextToken(); 4173 break; 4174 case tok::l_paren: 4175 parseParens(); 4176 break; 4177 default: 4178 parseVerilogHierarchyIdentifier(); 4179 break; 4180 } 4181 } 4182 4183 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4184 unsigned AddLevels = 0; 4185 4186 if (FormatTok->is(Keywords.kw_clocking)) { 4187 nextToken(); 4188 if (Keywords.isVerilogIdentifier(*FormatTok)) 4189 nextToken(); 4190 parseVerilogSensitivityList(); 4191 if (FormatTok->is(tok::semi)) 4192 nextToken(); 4193 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4194 Keywords.kw_casez, Keywords.kw_randcase, 4195 Keywords.kw_randsequence)) { 4196 if (Style.IndentCaseLabels) 4197 AddLevels++; 4198 nextToken(); 4199 if (FormatTok->is(tok::l_paren)) { 4200 FormatTok->setFinalizedType(TT_ConditionLParen); 4201 parseParens(); 4202 } 4203 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4204 nextToken(); 4205 // The case header has no semicolon. 4206 } else { 4207 // "module" etc. 4208 nextToken(); 4209 // all the words like the name of the module and specifiers like 4210 // "automatic" and the width of function return type 4211 while (true) { 4212 if (FormatTok->is(tok::l_square)) { 4213 auto Prev = FormatTok->getPreviousNonComment(); 4214 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4215 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4216 parseSquare(); 4217 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4218 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4219 nextToken(); 4220 } else { 4221 break; 4222 } 4223 } 4224 4225 auto NewLine = [this]() { 4226 addUnwrappedLine(); 4227 Line->IsContinuation = true; 4228 }; 4229 4230 // package imports 4231 while (FormatTok->is(Keywords.kw_import)) { 4232 NewLine(); 4233 nextToken(); 4234 parseVerilogHierarchyIdentifier(); 4235 if (FormatTok->is(tok::semi)) 4236 nextToken(); 4237 } 4238 4239 // parameters and ports 4240 if (FormatTok->is(Keywords.kw_verilogHash)) { 4241 NewLine(); 4242 nextToken(); 4243 if (FormatTok->is(tok::l_paren)) { 4244 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4245 parseParens(); 4246 } 4247 } 4248 if (FormatTok->is(tok::l_paren)) { 4249 NewLine(); 4250 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4251 parseParens(); 4252 } 4253 4254 // extends and implements 4255 if (FormatTok->is(Keywords.kw_extends)) { 4256 NewLine(); 4257 nextToken(); 4258 parseVerilogHierarchyIdentifier(); 4259 if (FormatTok->is(tok::l_paren)) 4260 parseParens(); 4261 } 4262 if (FormatTok->is(Keywords.kw_implements)) { 4263 NewLine(); 4264 do { 4265 nextToken(); 4266 parseVerilogHierarchyIdentifier(); 4267 } while (FormatTok->is(tok::comma)); 4268 } 4269 4270 // Coverage event for cover groups. 4271 if (FormatTok->is(tok::at)) { 4272 NewLine(); 4273 parseVerilogSensitivityList(); 4274 } 4275 4276 if (FormatTok->is(tok::semi)) 4277 nextToken(/*LevelDifference=*/1); 4278 addUnwrappedLine(); 4279 } 4280 4281 return AddLevels; 4282 } 4283 4284 void UnwrappedLineParser::parseVerilogTable() { 4285 assert(FormatTok->is(Keywords.kw_table)); 4286 nextToken(/*LevelDifference=*/1); 4287 addUnwrappedLine(); 4288 4289 auto InitialLevel = Line->Level++; 4290 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4291 FormatToken *Tok = FormatTok; 4292 nextToken(); 4293 if (Tok->is(tok::semi)) 4294 addUnwrappedLine(); 4295 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4296 Tok->setFinalizedType(TT_VerilogTableItem); 4297 } 4298 Line->Level = InitialLevel; 4299 nextToken(/*LevelDifference=*/-1); 4300 addUnwrappedLine(); 4301 } 4302 4303 void UnwrappedLineParser::parseVerilogCaseLabel() { 4304 // The label will get unindented in AnnotatingParser. If there are no leading 4305 // spaces, indent the rest here so that things inside the block will be 4306 // indented relative to things outside. We don't use parseLabel because we 4307 // don't know whether this colon is a label or a ternary expression at this 4308 // point. 4309 auto OrigLevel = Line->Level; 4310 auto FirstLine = CurrentLines->size(); 4311 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4312 ++Line->Level; 4313 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4314 --Line->Level; 4315 parseStructuralElement(); 4316 // Restore the indentation in both the new line and the line that has the 4317 // label. 4318 if (CurrentLines->size() > FirstLine) 4319 (*CurrentLines)[FirstLine].Level = OrigLevel; 4320 Line->Level = OrigLevel; 4321 } 4322 4323 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4324 for (const auto &N : Line.Tokens) { 4325 if (N.Tok->MacroCtx) 4326 return true; 4327 for (const UnwrappedLine &Child : N.Children) 4328 if (containsExpansion(Child)) 4329 return true; 4330 } 4331 return false; 4332 } 4333 4334 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4335 if (Line->Tokens.empty()) 4336 return; 4337 LLVM_DEBUG({ 4338 if (!parsingPPDirective()) { 4339 llvm::dbgs() << "Adding unwrapped line:\n"; 4340 printDebugInfo(*Line); 4341 } 4342 }); 4343 4344 // If this line closes a block when in Whitesmiths mode, remember that 4345 // information so that the level can be decreased after the line is added. 4346 // This has to happen after the addition of the line since the line itself 4347 // needs to be indented. 4348 bool ClosesWhitesmithsBlock = 4349 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4350 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4351 4352 // If the current line was expanded from a macro call, we use it to 4353 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4354 // line and the unexpanded token stream. 4355 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4356 if (!Reconstruct) 4357 Reconstruct.emplace(Line->Level, Unexpanded); 4358 Reconstruct->addLine(*Line); 4359 4360 // While the reconstructed unexpanded lines are stored in the normal 4361 // flow of lines, the expanded lines are stored on the side to be analyzed 4362 // in an extra step. 4363 CurrentExpandedLines.push_back(std::move(*Line)); 4364 4365 if (Reconstruct->finished()) { 4366 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4367 assert(!Reconstructed.Tokens.empty() && 4368 "Reconstructed must at least contain the macro identifier."); 4369 assert(!parsingPPDirective()); 4370 LLVM_DEBUG({ 4371 llvm::dbgs() << "Adding unexpanded line:\n"; 4372 printDebugInfo(Reconstructed); 4373 }); 4374 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4375 Lines.push_back(std::move(Reconstructed)); 4376 CurrentExpandedLines.clear(); 4377 Reconstruct.reset(); 4378 } 4379 } else { 4380 // At the top level we only get here when no unexpansion is going on, or 4381 // when conditional formatting led to unfinished macro reconstructions. 4382 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4383 CurrentLines->push_back(std::move(*Line)); 4384 } 4385 Line->Tokens.clear(); 4386 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4387 Line->FirstStartColumn = 0; 4388 Line->IsContinuation = false; 4389 4390 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4391 --Line->Level; 4392 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4393 CurrentLines->append( 4394 std::make_move_iterator(PreprocessorDirectives.begin()), 4395 std::make_move_iterator(PreprocessorDirectives.end())); 4396 PreprocessorDirectives.clear(); 4397 } 4398 // Disconnect the current token from the last token on the previous line. 4399 FormatTok->Previous = nullptr; 4400 } 4401 4402 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4403 4404 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4405 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4406 FormatTok.NewlinesBefore > 0; 4407 } 4408 4409 // Checks if \p FormatTok is a line comment that continues the line comment 4410 // section on \p Line. 4411 static bool 4412 continuesLineCommentSection(const FormatToken &FormatTok, 4413 const UnwrappedLine &Line, 4414 const llvm::Regex &CommentPragmasRegex) { 4415 if (Line.Tokens.empty()) 4416 return false; 4417 4418 StringRef IndentContent = FormatTok.TokenText; 4419 if (FormatTok.TokenText.startswith("//") || 4420 FormatTok.TokenText.startswith("/*")) { 4421 IndentContent = FormatTok.TokenText.substr(2); 4422 } 4423 if (CommentPragmasRegex.match(IndentContent)) 4424 return false; 4425 4426 // If Line starts with a line comment, then FormatTok continues the comment 4427 // section if its original column is greater or equal to the original start 4428 // column of the line. 4429 // 4430 // Define the min column token of a line as follows: if a line ends in '{' or 4431 // contains a '{' followed by a line comment, then the min column token is 4432 // that '{'. Otherwise, the min column token of the line is the first token of 4433 // the line. 4434 // 4435 // If Line starts with a token other than a line comment, then FormatTok 4436 // continues the comment section if its original column is greater than the 4437 // original start column of the min column token of the line. 4438 // 4439 // For example, the second line comment continues the first in these cases: 4440 // 4441 // // first line 4442 // // second line 4443 // 4444 // and: 4445 // 4446 // // first line 4447 // // second line 4448 // 4449 // and: 4450 // 4451 // int i; // first line 4452 // // second line 4453 // 4454 // and: 4455 // 4456 // do { // first line 4457 // // second line 4458 // int i; 4459 // } while (true); 4460 // 4461 // and: 4462 // 4463 // enum { 4464 // a, // first line 4465 // // second line 4466 // b 4467 // }; 4468 // 4469 // The second line comment doesn't continue the first in these cases: 4470 // 4471 // // first line 4472 // // second line 4473 // 4474 // and: 4475 // 4476 // int i; // first line 4477 // // second line 4478 // 4479 // and: 4480 // 4481 // do { // first line 4482 // // second line 4483 // int i; 4484 // } while (true); 4485 // 4486 // and: 4487 // 4488 // enum { 4489 // a, // first line 4490 // // second line 4491 // }; 4492 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4493 4494 // Scan for '{//'. If found, use the column of '{' as a min column for line 4495 // comment section continuation. 4496 const FormatToken *PreviousToken = nullptr; 4497 for (const UnwrappedLineNode &Node : Line.Tokens) { 4498 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4499 isLineComment(*Node.Tok)) { 4500 MinColumnToken = PreviousToken; 4501 break; 4502 } 4503 PreviousToken = Node.Tok; 4504 4505 // Grab the last newline preceding a token in this unwrapped line. 4506 if (Node.Tok->NewlinesBefore > 0) 4507 MinColumnToken = Node.Tok; 4508 } 4509 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4510 MinColumnToken = PreviousToken; 4511 4512 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4513 MinColumnToken); 4514 } 4515 4516 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4517 bool JustComments = Line->Tokens.empty(); 4518 for (FormatToken *Tok : CommentsBeforeNextToken) { 4519 // Line comments that belong to the same line comment section are put on the 4520 // same line since later we might want to reflow content between them. 4521 // Additional fine-grained breaking of line comment sections is controlled 4522 // by the class BreakableLineCommentSection in case it is desirable to keep 4523 // several line comment sections in the same unwrapped line. 4524 // 4525 // FIXME: Consider putting separate line comment sections as children to the 4526 // unwrapped line instead. 4527 Tok->ContinuesLineCommentSection = 4528 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4529 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4530 addUnwrappedLine(); 4531 pushToken(Tok); 4532 } 4533 if (NewlineBeforeNext && JustComments) 4534 addUnwrappedLine(); 4535 CommentsBeforeNextToken.clear(); 4536 } 4537 4538 void UnwrappedLineParser::nextToken(int LevelDifference) { 4539 if (eof()) 4540 return; 4541 flushComments(isOnNewLine(*FormatTok)); 4542 pushToken(FormatTok); 4543 FormatToken *Previous = FormatTok; 4544 if (!Style.isJavaScript()) 4545 readToken(LevelDifference); 4546 else 4547 readTokenWithJavaScriptASI(); 4548 FormatTok->Previous = Previous; 4549 if (Style.isVerilog()) { 4550 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4551 // keywords like `begin`, we can't treat them the same as left braces 4552 // because some contexts require one of them. For example structs use 4553 // braces and if blocks use keywords, and a left brace can occur in an if 4554 // statement, but it is not a block. For keywords like `end`, we simply 4555 // treat them the same as right braces. 4556 if (Keywords.isVerilogEnd(*FormatTok)) 4557 FormatTok->Tok.setKind(tok::r_brace); 4558 } 4559 } 4560 4561 void UnwrappedLineParser::distributeComments( 4562 const SmallVectorImpl<FormatToken *> &Comments, 4563 const FormatToken *NextTok) { 4564 // Whether or not a line comment token continues a line is controlled by 4565 // the method continuesLineCommentSection, with the following caveat: 4566 // 4567 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4568 // that each comment line from the trail is aligned with the next token, if 4569 // the next token exists. If a trail exists, the beginning of the maximal 4570 // trail is marked as a start of a new comment section. 4571 // 4572 // For example in this code: 4573 // 4574 // int a; // line about a 4575 // // line 1 about b 4576 // // line 2 about b 4577 // int b; 4578 // 4579 // the two lines about b form a maximal trail, so there are two sections, the 4580 // first one consisting of the single comment "// line about a" and the 4581 // second one consisting of the next two comments. 4582 if (Comments.empty()) 4583 return; 4584 bool ShouldPushCommentsInCurrentLine = true; 4585 bool HasTrailAlignedWithNextToken = false; 4586 unsigned StartOfTrailAlignedWithNextToken = 0; 4587 if (NextTok) { 4588 // We are skipping the first element intentionally. 4589 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4590 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4591 HasTrailAlignedWithNextToken = true; 4592 StartOfTrailAlignedWithNextToken = i; 4593 } 4594 } 4595 } 4596 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4597 FormatToken *FormatTok = Comments[i]; 4598 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4599 FormatTok->ContinuesLineCommentSection = false; 4600 } else { 4601 FormatTok->ContinuesLineCommentSection = 4602 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4603 } 4604 if (!FormatTok->ContinuesLineCommentSection && 4605 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4606 ShouldPushCommentsInCurrentLine = false; 4607 } 4608 if (ShouldPushCommentsInCurrentLine) 4609 pushToken(FormatTok); 4610 else 4611 CommentsBeforeNextToken.push_back(FormatTok); 4612 } 4613 } 4614 4615 void UnwrappedLineParser::readToken(int LevelDifference) { 4616 SmallVector<FormatToken *, 1> Comments; 4617 bool PreviousWasComment = false; 4618 bool FirstNonCommentOnLine = false; 4619 do { 4620 FormatTok = Tokens->getNextToken(); 4621 assert(FormatTok); 4622 while (FormatTok->getType() == TT_ConflictStart || 4623 FormatTok->getType() == TT_ConflictEnd || 4624 FormatTok->getType() == TT_ConflictAlternative) { 4625 if (FormatTok->getType() == TT_ConflictStart) 4626 conditionalCompilationStart(/*Unreachable=*/false); 4627 else if (FormatTok->getType() == TT_ConflictAlternative) 4628 conditionalCompilationAlternative(); 4629 else if (FormatTok->getType() == TT_ConflictEnd) 4630 conditionalCompilationEnd(); 4631 FormatTok = Tokens->getNextToken(); 4632 FormatTok->MustBreakBefore = true; 4633 } 4634 4635 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4636 const FormatToken &Tok, 4637 bool PreviousWasComment) { 4638 auto IsFirstOnLine = [](const FormatToken &Tok) { 4639 return Tok.HasUnescapedNewline || Tok.IsFirst; 4640 }; 4641 4642 // Consider preprocessor directives preceded by block comments as first 4643 // on line. 4644 if (PreviousWasComment) 4645 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4646 return IsFirstOnLine(Tok); 4647 }; 4648 4649 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4650 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4651 PreviousWasComment = FormatTok->is(tok::comment); 4652 4653 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4654 (!Style.isVerilog() || 4655 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4656 FirstNonCommentOnLine) { 4657 distributeComments(Comments, FormatTok); 4658 Comments.clear(); 4659 // If there is an unfinished unwrapped line, we flush the preprocessor 4660 // directives only after that unwrapped line was finished later. 4661 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4662 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4663 assert((LevelDifference >= 0 || 4664 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4665 "LevelDifference makes Line->Level negative"); 4666 Line->Level += LevelDifference; 4667 // Comments stored before the preprocessor directive need to be output 4668 // before the preprocessor directive, at the same level as the 4669 // preprocessor directive, as we consider them to apply to the directive. 4670 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4671 PPBranchLevel > 0) { 4672 Line->Level += PPBranchLevel; 4673 } 4674 flushComments(isOnNewLine(*FormatTok)); 4675 parsePPDirective(); 4676 PreviousWasComment = FormatTok->is(tok::comment); 4677 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4678 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4679 } 4680 4681 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4682 !Line->InPPDirective) { 4683 continue; 4684 } 4685 4686 if (FormatTok->is(tok::identifier) && 4687 Macros.defined(FormatTok->TokenText) && 4688 // FIXME: Allow expanding macros in preprocessor directives. 4689 !Line->InPPDirective) { 4690 FormatToken *ID = FormatTok; 4691 unsigned Position = Tokens->getPosition(); 4692 4693 // To correctly parse the code, we need to replace the tokens of the macro 4694 // call with its expansion. 4695 auto PreCall = std::move(Line); 4696 Line.reset(new UnwrappedLine); 4697 bool OldInExpansion = InExpansion; 4698 InExpansion = true; 4699 // We parse the macro call into a new line. 4700 auto Args = parseMacroCall(); 4701 InExpansion = OldInExpansion; 4702 assert(Line->Tokens.front().Tok == ID); 4703 // And remember the unexpanded macro call tokens. 4704 auto UnexpandedLine = std::move(Line); 4705 // Reset to the old line. 4706 Line = std::move(PreCall); 4707 4708 LLVM_DEBUG({ 4709 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4710 if (Args) { 4711 llvm::dbgs() << "("; 4712 for (const auto &Arg : Args.value()) 4713 for (const auto &T : Arg) 4714 llvm::dbgs() << T->TokenText << " "; 4715 llvm::dbgs() << ")"; 4716 } 4717 llvm::dbgs() << "\n"; 4718 }); 4719 if (Macros.objectLike(ID->TokenText) && Args && 4720 !Macros.hasArity(ID->TokenText, Args->size())) { 4721 // The macro is either 4722 // - object-like, but we got argumnets, or 4723 // - overloaded to be both object-like and function-like, but none of 4724 // the function-like arities match the number of arguments. 4725 // Thus, expand as object-like macro. 4726 LLVM_DEBUG(llvm::dbgs() 4727 << "Macro \"" << ID->TokenText 4728 << "\" not overloaded for arity " << Args->size() 4729 << "or not function-like, using object-like overload."); 4730 Args.reset(); 4731 UnexpandedLine->Tokens.resize(1); 4732 Tokens->setPosition(Position); 4733 nextToken(); 4734 assert(!Args && Macros.objectLike(ID->TokenText)); 4735 } 4736 if ((!Args && Macros.objectLike(ID->TokenText)) || 4737 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4738 // Next, we insert the expanded tokens in the token stream at the 4739 // current position, and continue parsing. 4740 Unexpanded[ID] = std::move(UnexpandedLine); 4741 SmallVector<FormatToken *, 8> Expansion = 4742 Macros.expand(ID, std::move(Args)); 4743 if (!Expansion.empty()) 4744 FormatTok = Tokens->insertTokens(Expansion); 4745 4746 LLVM_DEBUG({ 4747 llvm::dbgs() << "Expanded: "; 4748 for (const auto &T : Expansion) 4749 llvm::dbgs() << T->TokenText << " "; 4750 llvm::dbgs() << "\n"; 4751 }); 4752 } else { 4753 LLVM_DEBUG({ 4754 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4755 << "\", because it was used "; 4756 if (Args) 4757 llvm::dbgs() << "with " << Args->size(); 4758 else 4759 llvm::dbgs() << "without"; 4760 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4761 }); 4762 Tokens->setPosition(Position); 4763 FormatTok = ID; 4764 } 4765 } 4766 4767 if (!FormatTok->is(tok::comment)) { 4768 distributeComments(Comments, FormatTok); 4769 Comments.clear(); 4770 return; 4771 } 4772 4773 Comments.push_back(FormatTok); 4774 } while (!eof()); 4775 4776 distributeComments(Comments, nullptr); 4777 Comments.clear(); 4778 } 4779 4780 namespace { 4781 template <typename Iterator> 4782 void pushTokens(Iterator Begin, Iterator End, 4783 llvm::SmallVectorImpl<FormatToken *> &Into) { 4784 for (auto I = Begin; I != End; ++I) { 4785 Into.push_back(I->Tok); 4786 for (const auto &Child : I->Children) 4787 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4788 } 4789 } 4790 } // namespace 4791 4792 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4793 UnwrappedLineParser::parseMacroCall() { 4794 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4795 assert(Line->Tokens.empty()); 4796 nextToken(); 4797 if (!FormatTok->is(tok::l_paren)) 4798 return Args; 4799 unsigned Position = Tokens->getPosition(); 4800 FormatToken *Tok = FormatTok; 4801 nextToken(); 4802 Args.emplace(); 4803 auto ArgStart = std::prev(Line->Tokens.end()); 4804 4805 int Parens = 0; 4806 do { 4807 switch (FormatTok->Tok.getKind()) { 4808 case tok::l_paren: 4809 ++Parens; 4810 nextToken(); 4811 break; 4812 case tok::r_paren: { 4813 if (Parens > 0) { 4814 --Parens; 4815 nextToken(); 4816 break; 4817 } 4818 Args->push_back({}); 4819 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4820 nextToken(); 4821 return Args; 4822 } 4823 case tok::comma: { 4824 if (Parens > 0) { 4825 nextToken(); 4826 break; 4827 } 4828 Args->push_back({}); 4829 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4830 nextToken(); 4831 ArgStart = std::prev(Line->Tokens.end()); 4832 break; 4833 } 4834 default: 4835 nextToken(); 4836 break; 4837 } 4838 } while (!eof()); 4839 Line->Tokens.resize(1); 4840 Tokens->setPosition(Position); 4841 FormatTok = Tok; 4842 return {}; 4843 } 4844 4845 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4846 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4847 if (MustBreakBeforeNextToken) { 4848 Line->Tokens.back().Tok->MustBreakBefore = true; 4849 MustBreakBeforeNextToken = false; 4850 } 4851 } 4852 4853 } // end namespace format 4854 } // end namespace clang 4855