1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType() 51 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 52 << "\"] "; 53 for (SmallVectorImpl<UnwrappedLine>::const_iterator 54 CI = I->Children.begin(), 55 CE = I->Children.end(); 56 CI != CE; ++CI) { 57 OS << "\n"; 58 printLine(OS, *CI, (Prefix + " ").str()); 59 NewLine = true; 60 } 61 } 62 if (!NewLine) 63 OS << "\n"; 64 } 65 66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 67 printLine(llvm::dbgs(), Line); 68 } 69 70 class ScopedDeclarationState { 71 public: 72 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 73 bool MustBeDeclaration) 74 : Line(Line), Stack(Stack) { 75 Line.MustBeDeclaration = MustBeDeclaration; 76 Stack.push_back(MustBeDeclaration); 77 } 78 ~ScopedDeclarationState() { 79 Stack.pop_back(); 80 if (!Stack.empty()) 81 Line.MustBeDeclaration = Stack.back(); 82 else 83 Line.MustBeDeclaration = true; 84 } 85 86 private: 87 UnwrappedLine &Line; 88 llvm::BitVector &Stack; 89 }; 90 91 } // end anonymous namespace 92 93 class ScopedLineState { 94 public: 95 ScopedLineState(UnwrappedLineParser &Parser, 96 bool SwitchToPreprocessorLines = false) 97 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 98 if (SwitchToPreprocessorLines) 99 Parser.CurrentLines = &Parser.PreprocessorDirectives; 100 else if (!Parser.Line->Tokens.empty()) 101 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 102 PreBlockLine = std::move(Parser.Line); 103 Parser.Line = std::make_unique<UnwrappedLine>(); 104 Parser.Line->Level = PreBlockLine->Level; 105 Parser.Line->PPLevel = PreBlockLine->PPLevel; 106 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 107 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 108 } 109 110 ~ScopedLineState() { 111 if (!Parser.Line->Tokens.empty()) 112 Parser.addUnwrappedLine(); 113 assert(Parser.Line->Tokens.empty()); 114 Parser.Line = std::move(PreBlockLine); 115 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 116 Parser.MustBreakBeforeNextToken = true; 117 Parser.CurrentLines = OriginalLines; 118 } 119 120 private: 121 UnwrappedLineParser &Parser; 122 123 std::unique_ptr<UnwrappedLine> PreBlockLine; 124 SmallVectorImpl<UnwrappedLine> *OriginalLines; 125 }; 126 127 class CompoundStatementIndenter { 128 public: 129 CompoundStatementIndenter(UnwrappedLineParser *Parser, 130 const FormatStyle &Style, unsigned &LineLevel) 131 : CompoundStatementIndenter(Parser, LineLevel, 132 Style.BraceWrapping.AfterControlStatement, 133 Style.BraceWrapping.IndentBraces) {} 134 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 135 bool WrapBrace, bool IndentBrace) 136 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 137 if (WrapBrace) 138 Parser->addUnwrappedLine(); 139 if (IndentBrace) 140 ++LineLevel; 141 } 142 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 143 144 private: 145 unsigned &LineLevel; 146 unsigned OldLineLevel; 147 }; 148 149 UnwrappedLineParser::UnwrappedLineParser( 150 SourceManager &SourceMgr, const FormatStyle &Style, 151 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 152 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 153 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 154 IdentifierTable &IdentTable) 155 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 156 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 157 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 158 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 159 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 160 ? IG_Rejected 161 : IG_Inited), 162 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 163 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} 164 165 void UnwrappedLineParser::reset() { 166 PPBranchLevel = -1; 167 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 168 ? IG_Rejected 169 : IG_Inited; 170 IncludeGuardToken = nullptr; 171 Line.reset(new UnwrappedLine); 172 CommentsBeforeNextToken.clear(); 173 FormatTok = nullptr; 174 MustBreakBeforeNextToken = false; 175 IsDecltypeAutoFunction = false; 176 PreprocessorDirectives.clear(); 177 CurrentLines = &Lines; 178 DeclarationScopeStack.clear(); 179 NestedTooDeep.clear(); 180 NestedLambdas.clear(); 181 PPStack.clear(); 182 Line->FirstStartColumn = FirstStartColumn; 183 184 if (!Unexpanded.empty()) 185 for (FormatToken *Token : AllTokens) 186 Token->MacroCtx.reset(); 187 CurrentExpandedLines.clear(); 188 ExpandedLines.clear(); 189 Unexpanded.clear(); 190 InExpansion = false; 191 Reconstruct.reset(); 192 } 193 194 void UnwrappedLineParser::parse() { 195 IndexedTokenSource TokenSource(AllTokens); 196 Line->FirstStartColumn = FirstStartColumn; 197 do { 198 LLVM_DEBUG(llvm::dbgs() << "----\n"); 199 reset(); 200 Tokens = &TokenSource; 201 TokenSource.reset(); 202 203 readToken(); 204 parseFile(); 205 206 // If we found an include guard then all preprocessor directives (other than 207 // the guard) are over-indented by one. 208 if (IncludeGuard == IG_Found) { 209 for (auto &Line : Lines) 210 if (Line.InPPDirective && Line.Level > 0) 211 --Line.Level; 212 } 213 214 // Create line with eof token. 215 assert(eof()); 216 pushToken(FormatTok); 217 addUnwrappedLine(); 218 219 // In a first run, format everything with the lines containing macro calls 220 // replaced by the expansion. 221 if (!ExpandedLines.empty()) { 222 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 223 for (const auto &Line : Lines) { 224 if (!Line.Tokens.empty()) { 225 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 226 if (it != ExpandedLines.end()) { 227 for (const auto &Expanded : it->second) { 228 LLVM_DEBUG(printDebugInfo(Expanded)); 229 Callback.consumeUnwrappedLine(Expanded); 230 } 231 continue; 232 } 233 } 234 LLVM_DEBUG(printDebugInfo(Line)); 235 Callback.consumeUnwrappedLine(Line); 236 } 237 Callback.finishRun(); 238 } 239 240 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 241 for (const UnwrappedLine &Line : Lines) { 242 LLVM_DEBUG(printDebugInfo(Line)); 243 Callback.consumeUnwrappedLine(Line); 244 } 245 Callback.finishRun(); 246 Lines.clear(); 247 while (!PPLevelBranchIndex.empty() && 248 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 249 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 250 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 251 } 252 if (!PPLevelBranchIndex.empty()) { 253 ++PPLevelBranchIndex.back(); 254 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 255 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 256 } 257 } while (!PPLevelBranchIndex.empty()); 258 } 259 260 void UnwrappedLineParser::parseFile() { 261 // The top-level context in a file always has declarations, except for pre- 262 // processor directives and JavaScript files. 263 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 264 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 265 MustBeDeclaration); 266 if (Style.Language == FormatStyle::LK_TextProto) 267 parseBracedList(); 268 else 269 parseLevel(); 270 // Make sure to format the remaining tokens. 271 // 272 // LK_TextProto is special since its top-level is parsed as the body of a 273 // braced list, which does not necessarily have natural line separators such 274 // as a semicolon. Comments after the last entry that have been determined to 275 // not belong to that line, as in: 276 // key: value 277 // // endfile comment 278 // do not have a chance to be put on a line of their own until this point. 279 // Here we add this newline before end-of-file comments. 280 if (Style.Language == FormatStyle::LK_TextProto && 281 !CommentsBeforeNextToken.empty()) { 282 addUnwrappedLine(); 283 } 284 flushComments(true); 285 addUnwrappedLine(); 286 } 287 288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 289 do { 290 switch (FormatTok->Tok.getKind()) { 291 case tok::l_brace: 292 return; 293 default: 294 if (FormatTok->is(Keywords.kw_where)) { 295 addUnwrappedLine(); 296 nextToken(); 297 parseCSharpGenericTypeConstraint(); 298 break; 299 } 300 nextToken(); 301 break; 302 } 303 } while (!eof()); 304 } 305 306 void UnwrappedLineParser::parseCSharpAttribute() { 307 int UnpairedSquareBrackets = 1; 308 do { 309 switch (FormatTok->Tok.getKind()) { 310 case tok::r_square: 311 nextToken(); 312 --UnpairedSquareBrackets; 313 if (UnpairedSquareBrackets == 0) { 314 addUnwrappedLine(); 315 return; 316 } 317 break; 318 case tok::l_square: 319 ++UnpairedSquareBrackets; 320 nextToken(); 321 break; 322 default: 323 nextToken(); 324 break; 325 } 326 } while (!eof()); 327 } 328 329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 330 if (!Lines.empty() && Lines.back().InPPDirective) 331 return true; 332 333 const FormatToken *Previous = Tokens->getPreviousToken(); 334 return Previous && Previous->is(tok::comment) && 335 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 336 } 337 338 /// \brief Parses a level, that is ???. 339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 340 /// \param IfKind The \p if statement kind in the level. 341 /// \param IfLeftBrace The left brace of the \p if block in the level. 342 /// \returns true if a simple block of if/else/for/while, or false otherwise. 343 /// (A simple block has a single statement.) 344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 345 IfStmtKind *IfKind, 346 FormatToken **IfLeftBrace) { 347 const bool InRequiresExpression = 348 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 349 const bool IsPrecededByCommentOrPPDirective = 350 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 351 FormatToken *IfLBrace = nullptr; 352 bool HasDoWhile = false; 353 bool HasLabel = false; 354 unsigned StatementCount = 0; 355 bool SwitchLabelEncountered = false; 356 357 do { 358 if (FormatTok->isAttribute()) { 359 nextToken(); 360 continue; 361 } 362 tok::TokenKind kind = FormatTok->Tok.getKind(); 363 if (FormatTok->getType() == TT_MacroBlockBegin) 364 kind = tok::l_brace; 365 else if (FormatTok->getType() == TT_MacroBlockEnd) 366 kind = tok::r_brace; 367 368 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 369 &HasLabel, &StatementCount] { 370 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 371 HasDoWhile ? nullptr : &HasDoWhile, 372 HasLabel ? nullptr : &HasLabel); 373 ++StatementCount; 374 assert(StatementCount > 0 && "StatementCount overflow!"); 375 }; 376 377 switch (kind) { 378 case tok::comment: 379 nextToken(); 380 addUnwrappedLine(); 381 break; 382 case tok::l_brace: 383 if (InRequiresExpression) { 384 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 385 } else if (FormatTok->Previous && 386 FormatTok->Previous->ClosesRequiresClause) { 387 // We need the 'default' case here to correctly parse a function 388 // l_brace. 389 ParseDefault(); 390 continue; 391 } 392 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) && 393 tryToParseBracedList()) { 394 continue; 395 } 396 parseBlock(); 397 ++StatementCount; 398 assert(StatementCount > 0 && "StatementCount overflow!"); 399 addUnwrappedLine(); 400 break; 401 case tok::r_brace: 402 if (OpeningBrace) { 403 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 404 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 405 return false; 406 } 407 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 408 HasDoWhile || IsPrecededByCommentOrPPDirective || 409 precededByCommentOrPPDirective()) { 410 return false; 411 } 412 const FormatToken *Next = Tokens->peekNextToken(); 413 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 414 return false; 415 if (IfLeftBrace) 416 *IfLeftBrace = IfLBrace; 417 return true; 418 } 419 nextToken(); 420 addUnwrappedLine(); 421 break; 422 case tok::kw_default: { 423 unsigned StoredPosition = Tokens->getPosition(); 424 FormatToken *Next; 425 do { 426 Next = Tokens->getNextToken(); 427 assert(Next); 428 } while (Next->is(tok::comment)); 429 FormatTok = Tokens->setPosition(StoredPosition); 430 if (Next->isNot(tok::colon)) { 431 // default not followed by ':' is not a case label; treat it like 432 // an identifier. 433 parseStructuralElement(); 434 break; 435 } 436 // Else, if it is 'default:', fall through to the case handling. 437 [[fallthrough]]; 438 } 439 case tok::kw_case: 440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 441 (Style.isJavaScript() && Line->MustBeDeclaration)) { 442 // Proto: there are no switch/case statements 443 // Verilog: Case labels don't have this word. We handle case 444 // labels including default in TokenAnnotator. 445 // JavaScript: A 'case: string' style field declaration. 446 ParseDefault(); 447 break; 448 } 449 if (!SwitchLabelEncountered && 450 (Style.IndentCaseLabels || 451 (Line->InPPDirective && Line->Level == 1))) { 452 ++Line->Level; 453 } 454 SwitchLabelEncountered = true; 455 parseStructuralElement(); 456 break; 457 case tok::l_square: 458 if (Style.isCSharp()) { 459 nextToken(); 460 parseCSharpAttribute(); 461 break; 462 } 463 if (handleCppAttributes()) 464 break; 465 [[fallthrough]]; 466 default: 467 ParseDefault(); 468 break; 469 } 470 } while (!eof()); 471 472 return false; 473 } 474 475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 476 // We'll parse forward through the tokens until we hit 477 // a closing brace or eof - note that getNextToken() will 478 // parse macros, so this will magically work inside macro 479 // definitions, too. 480 unsigned StoredPosition = Tokens->getPosition(); 481 FormatToken *Tok = FormatTok; 482 const FormatToken *PrevTok = Tok->Previous; 483 // Keep a stack of positions of lbrace tokens. We will 484 // update information about whether an lbrace starts a 485 // braced init list or a different block during the loop. 486 struct StackEntry { 487 FormatToken *Tok; 488 const FormatToken *PrevTok; 489 }; 490 SmallVector<StackEntry, 8> LBraceStack; 491 assert(Tok->is(tok::l_brace)); 492 do { 493 // Get next non-comment, non-preprocessor token. 494 FormatToken *NextTok; 495 do { 496 NextTok = Tokens->getNextToken(); 497 } while (NextTok->is(tok::comment)); 498 while (NextTok->is(tok::hash) && !Line->InMacroBody) { 499 NextTok = Tokens->getNextToken(); 500 do { 501 NextTok = Tokens->getNextToken(); 502 } while (NextTok->is(tok::comment) || 503 (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof))); 504 } 505 506 switch (Tok->Tok.getKind()) { 507 case tok::l_brace: 508 if (Style.isJavaScript() && PrevTok) { 509 if (PrevTok->isOneOf(tok::colon, tok::less)) { 510 // A ':' indicates this code is in a type, or a braced list 511 // following a label in an object literal ({a: {b: 1}}). 512 // A '<' could be an object used in a comparison, but that is nonsense 513 // code (can never return true), so more likely it is a generic type 514 // argument (`X<{a: string; b: number}>`). 515 // The code below could be confused by semicolons between the 516 // individual members in a type member list, which would normally 517 // trigger BK_Block. In both cases, this must be parsed as an inline 518 // braced init. 519 Tok->setBlockKind(BK_BracedInit); 520 } else if (PrevTok->is(tok::r_paren)) { 521 // `) { }` can only occur in function or method declarations in JS. 522 Tok->setBlockKind(BK_Block); 523 } 524 } else { 525 Tok->setBlockKind(BK_Unknown); 526 } 527 LBraceStack.push_back({Tok, PrevTok}); 528 break; 529 case tok::r_brace: 530 if (LBraceStack.empty()) 531 break; 532 if (LBraceStack.back().Tok->is(BK_Unknown)) { 533 bool ProbablyBracedList = false; 534 if (Style.Language == FormatStyle::LK_Proto) { 535 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 536 } else { 537 // Skip NextTok over preprocessor lines, otherwise we may not 538 // properly diagnose the block as a braced intializer 539 // if the comma separator appears after the pp directive. 540 while (NextTok->is(tok::hash)) { 541 ScopedMacroState MacroState(*Line, Tokens, NextTok); 542 do { 543 NextTok = Tokens->getNextToken(); 544 } while (NextTok->isNot(tok::eof)); 545 } 546 547 // Using OriginalColumn to distinguish between ObjC methods and 548 // binary operators is a bit hacky. 549 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 550 NextTok->OriginalColumn == 0; 551 552 // Try to detect a braced list. Note that regardless how we mark inner 553 // braces here, we will overwrite the BlockKind later if we parse a 554 // braced list (where all blocks inside are by default braced lists), 555 // or when we explicitly detect blocks (for example while parsing 556 // lambdas). 557 558 // If we already marked the opening brace as braced list, the closing 559 // must also be part of it. 560 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace); 561 562 ProbablyBracedList = ProbablyBracedList || 563 (Style.isJavaScript() && 564 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 565 Keywords.kw_as)); 566 ProbablyBracedList = ProbablyBracedList || 567 (Style.isCpp() && NextTok->is(tok::l_paren)); 568 569 // If there is a comma, semicolon or right paren after the closing 570 // brace, we assume this is a braced initializer list. 571 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 572 // braced list in JS. 573 ProbablyBracedList = 574 ProbablyBracedList || 575 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 576 tok::r_paren, tok::r_square, tok::ellipsis); 577 578 // Distinguish between braced list in a constructor initializer list 579 // followed by constructor body, or just adjacent blocks. 580 ProbablyBracedList = 581 ProbablyBracedList || 582 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 583 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 584 tok::greater)); 585 586 ProbablyBracedList = 587 ProbablyBracedList || 588 (NextTok->is(tok::identifier) && 589 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 590 591 ProbablyBracedList = ProbablyBracedList || 592 (NextTok->is(tok::semi) && 593 (!ExpectClassBody || LBraceStack.size() != 1)); 594 595 ProbablyBracedList = 596 ProbablyBracedList || 597 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 598 599 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 600 // We can have an array subscript after a braced init 601 // list, but C++11 attributes are expected after blocks. 602 NextTok = Tokens->getNextToken(); 603 ProbablyBracedList = NextTok->isNot(tok::l_square); 604 } 605 } 606 if (ProbablyBracedList) { 607 Tok->setBlockKind(BK_BracedInit); 608 LBraceStack.back().Tok->setBlockKind(BK_BracedInit); 609 } else { 610 Tok->setBlockKind(BK_Block); 611 LBraceStack.back().Tok->setBlockKind(BK_Block); 612 } 613 } 614 LBraceStack.pop_back(); 615 break; 616 case tok::identifier: 617 if (Tok->isNot(TT_StatementMacro)) 618 break; 619 [[fallthrough]]; 620 case tok::at: 621 case tok::semi: 622 case tok::kw_if: 623 case tok::kw_while: 624 case tok::kw_for: 625 case tok::kw_switch: 626 case tok::kw_try: 627 case tok::kw___try: 628 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 629 LBraceStack.back().Tok->setBlockKind(BK_Block); 630 break; 631 default: 632 break; 633 } 634 PrevTok = Tok; 635 Tok = NextTok; 636 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 637 638 // Assume other blocks for all unclosed opening braces. 639 for (const auto &Entry : LBraceStack) 640 if (Entry.Tok->is(BK_Unknown)) 641 Entry.Tok->setBlockKind(BK_Block); 642 643 FormatTok = Tokens->setPosition(StoredPosition); 644 } 645 646 // Sets the token type of the directly previous right brace. 647 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 648 if (auto Prev = FormatTok->getPreviousNonComment(); 649 Prev && Prev->is(tok::r_brace)) { 650 Prev->setFinalizedType(Type); 651 } 652 } 653 654 template <class T> 655 static inline void hash_combine(std::size_t &seed, const T &v) { 656 std::hash<T> hasher; 657 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 658 } 659 660 size_t UnwrappedLineParser::computePPHash() const { 661 size_t h = 0; 662 for (const auto &i : PPStack) { 663 hash_combine(h, size_t(i.Kind)); 664 hash_combine(h, i.Line); 665 } 666 return h; 667 } 668 669 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 670 // is not null, subtracts its length (plus the preceding space) when computing 671 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 672 // running the token annotator on it so that we can restore them afterward. 673 bool UnwrappedLineParser::mightFitOnOneLine( 674 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 675 const auto ColumnLimit = Style.ColumnLimit; 676 if (ColumnLimit == 0) 677 return true; 678 679 auto &Tokens = ParsedLine.Tokens; 680 assert(!Tokens.empty()); 681 682 const auto *LastToken = Tokens.back().Tok; 683 assert(LastToken); 684 685 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 686 687 int Index = 0; 688 for (const auto &Token : Tokens) { 689 assert(Token.Tok); 690 auto &SavedToken = SavedTokens[Index++]; 691 SavedToken.Tok = new FormatToken; 692 SavedToken.Tok->copyFrom(*Token.Tok); 693 SavedToken.Children = std::move(Token.Children); 694 } 695 696 AnnotatedLine Line(ParsedLine); 697 assert(Line.Last == LastToken); 698 699 TokenAnnotator Annotator(Style, Keywords); 700 Annotator.annotate(Line); 701 Annotator.calculateFormattingInformation(Line); 702 703 auto Length = LastToken->TotalLength; 704 if (OpeningBrace) { 705 assert(OpeningBrace != Tokens.front().Tok); 706 if (auto Prev = OpeningBrace->Previous; 707 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 708 Length -= ColumnLimit; 709 } 710 Length -= OpeningBrace->TokenText.size() + 1; 711 } 712 713 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 714 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 715 Length -= FirstToken->TokenText.size() + 1; 716 } 717 718 Index = 0; 719 for (auto &Token : Tokens) { 720 const auto &SavedToken = SavedTokens[Index++]; 721 Token.Tok->copyFrom(*SavedToken.Tok); 722 Token.Children = std::move(SavedToken.Children); 723 delete SavedToken.Tok; 724 } 725 726 // If these change PPLevel needs to be used for get correct indentation. 727 assert(!Line.InMacroBody); 728 assert(!Line.InPPDirective); 729 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 730 } 731 732 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 733 unsigned AddLevels, bool MunchSemi, 734 bool KeepBraces, 735 IfStmtKind *IfKind, 736 bool UnindentWhitesmithsBraces) { 737 auto HandleVerilogBlockLabel = [this]() { 738 // ":" name 739 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 740 nextToken(); 741 if (Keywords.isVerilogIdentifier(*FormatTok)) 742 nextToken(); 743 } 744 }; 745 746 // Whether this is a Verilog-specific block that has a special header like a 747 // module. 748 const bool VerilogHierarchy = 749 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 750 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 751 (Style.isVerilog() && 752 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 753 "'{' or macro block token expected"); 754 FormatToken *Tok = FormatTok; 755 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 756 auto Index = CurrentLines->size(); 757 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 758 FormatTok->setBlockKind(BK_Block); 759 760 // For Whitesmiths mode, jump to the next level prior to skipping over the 761 // braces. 762 if (!VerilogHierarchy && AddLevels > 0 && 763 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 764 ++Line->Level; 765 } 766 767 size_t PPStartHash = computePPHash(); 768 769 const unsigned InitialLevel = Line->Level; 770 if (VerilogHierarchy) { 771 AddLevels += parseVerilogHierarchyHeader(); 772 } else { 773 nextToken(/*LevelDifference=*/AddLevels); 774 HandleVerilogBlockLabel(); 775 } 776 777 // Bail out if there are too many levels. Otherwise, the stack might overflow. 778 if (Line->Level > 300) 779 return nullptr; 780 781 if (MacroBlock && FormatTok->is(tok::l_paren)) 782 parseParens(); 783 784 size_t NbPreprocessorDirectives = 785 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 786 addUnwrappedLine(); 787 size_t OpeningLineIndex = 788 CurrentLines->empty() 789 ? (UnwrappedLine::kInvalidIndex) 790 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 791 792 // Whitesmiths is weird here. The brace needs to be indented for the namespace 793 // block, but the block itself may not be indented depending on the style 794 // settings. This allows the format to back up one level in those cases. 795 if (UnindentWhitesmithsBraces) 796 --Line->Level; 797 798 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 799 MustBeDeclaration); 800 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 801 Line->Level += AddLevels; 802 803 FormatToken *IfLBrace = nullptr; 804 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 805 806 if (eof()) 807 return IfLBrace; 808 809 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 810 : FormatTok->isNot(tok::r_brace)) { 811 Line->Level = InitialLevel; 812 FormatTok->setBlockKind(BK_Block); 813 return IfLBrace; 814 } 815 816 if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace)) 817 FormatTok->setFinalizedType(TT_NamespaceRBrace); 818 819 const bool IsFunctionRBrace = 820 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 821 822 auto RemoveBraces = [=]() mutable { 823 if (!SimpleBlock) 824 return false; 825 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 826 assert(FormatTok->is(tok::r_brace)); 827 const bool WrappedOpeningBrace = !Tok->Previous; 828 if (WrappedOpeningBrace && FollowedByComment) 829 return false; 830 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 831 if (KeepBraces && !HasRequiredIfBraces) 832 return false; 833 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 834 const FormatToken *Previous = Tokens->getPreviousToken(); 835 assert(Previous); 836 if (Previous->is(tok::r_brace) && !Previous->Optional) 837 return false; 838 } 839 assert(!CurrentLines->empty()); 840 auto &LastLine = CurrentLines->back(); 841 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 842 return false; 843 if (Tok->is(TT_ElseLBrace)) 844 return true; 845 if (WrappedOpeningBrace) { 846 assert(Index > 0); 847 --Index; // The line above the wrapped l_brace. 848 Tok = nullptr; 849 } 850 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 851 }; 852 if (RemoveBraces()) { 853 Tok->MatchingParen = FormatTok; 854 FormatTok->MatchingParen = Tok; 855 } 856 857 size_t PPEndHash = computePPHash(); 858 859 // Munch the closing brace. 860 nextToken(/*LevelDifference=*/-AddLevels); 861 862 // When this is a function block and there is an unnecessary semicolon 863 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 864 // it later). 865 if (Style.RemoveSemicolon && IsFunctionRBrace) { 866 while (FormatTok->is(tok::semi)) { 867 FormatTok->Optional = true; 868 nextToken(); 869 } 870 } 871 872 HandleVerilogBlockLabel(); 873 874 if (MacroBlock && FormatTok->is(tok::l_paren)) 875 parseParens(); 876 877 Line->Level = InitialLevel; 878 879 if (FormatTok->is(tok::kw_noexcept)) { 880 // A noexcept in a requires expression. 881 nextToken(); 882 } 883 884 if (FormatTok->is(tok::arrow)) { 885 // Following the } or noexcept we can find a trailing return type arrow 886 // as part of an implicit conversion constraint. 887 nextToken(); 888 parseStructuralElement(); 889 } 890 891 if (MunchSemi && FormatTok->is(tok::semi)) 892 nextToken(); 893 894 if (PPStartHash == PPEndHash) { 895 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 896 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 897 // Update the opening line to add the forward reference as well 898 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 899 CurrentLines->size() - 1; 900 } 901 } 902 903 return IfLBrace; 904 } 905 906 static bool isGoogScope(const UnwrappedLine &Line) { 907 // FIXME: Closure-library specific stuff should not be hard-coded but be 908 // configurable. 909 if (Line.Tokens.size() < 4) 910 return false; 911 auto I = Line.Tokens.begin(); 912 if (I->Tok->TokenText != "goog") 913 return false; 914 ++I; 915 if (I->Tok->isNot(tok::period)) 916 return false; 917 ++I; 918 if (I->Tok->TokenText != "scope") 919 return false; 920 ++I; 921 return I->Tok->is(tok::l_paren); 922 } 923 924 static bool isIIFE(const UnwrappedLine &Line, 925 const AdditionalKeywords &Keywords) { 926 // Look for the start of an immediately invoked anonymous function. 927 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 928 // This is commonly done in JavaScript to create a new, anonymous scope. 929 // Example: (function() { ... })() 930 if (Line.Tokens.size() < 3) 931 return false; 932 auto I = Line.Tokens.begin(); 933 if (I->Tok->isNot(tok::l_paren)) 934 return false; 935 ++I; 936 if (I->Tok->isNot(Keywords.kw_function)) 937 return false; 938 ++I; 939 return I->Tok->is(tok::l_paren); 940 } 941 942 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 943 const FormatToken &InitialToken) { 944 tok::TokenKind Kind = InitialToken.Tok.getKind(); 945 if (InitialToken.is(TT_NamespaceMacro)) 946 Kind = tok::kw_namespace; 947 948 switch (Kind) { 949 case tok::kw_namespace: 950 return Style.BraceWrapping.AfterNamespace; 951 case tok::kw_class: 952 return Style.BraceWrapping.AfterClass; 953 case tok::kw_union: 954 return Style.BraceWrapping.AfterUnion; 955 case tok::kw_struct: 956 return Style.BraceWrapping.AfterStruct; 957 case tok::kw_enum: 958 return Style.BraceWrapping.AfterEnum; 959 default: 960 return false; 961 } 962 } 963 964 void UnwrappedLineParser::parseChildBlock() { 965 assert(FormatTok->is(tok::l_brace)); 966 FormatTok->setBlockKind(BK_Block); 967 const FormatToken *OpeningBrace = FormatTok; 968 nextToken(); 969 { 970 bool SkipIndent = (Style.isJavaScript() && 971 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 972 ScopedLineState LineState(*this); 973 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 974 /*MustBeDeclaration=*/false); 975 Line->Level += SkipIndent ? 0 : 1; 976 parseLevel(OpeningBrace); 977 flushComments(isOnNewLine(*FormatTok)); 978 Line->Level -= SkipIndent ? 0 : 1; 979 } 980 nextToken(); 981 } 982 983 void UnwrappedLineParser::parsePPDirective() { 984 assert(FormatTok->is(tok::hash) && "'#' expected"); 985 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 986 987 nextToken(); 988 989 if (!FormatTok->Tok.getIdentifierInfo()) { 990 parsePPUnknown(); 991 return; 992 } 993 994 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 995 case tok::pp_define: 996 parsePPDefine(); 997 return; 998 case tok::pp_if: 999 parsePPIf(/*IfDef=*/false); 1000 break; 1001 case tok::pp_ifdef: 1002 case tok::pp_ifndef: 1003 parsePPIf(/*IfDef=*/true); 1004 break; 1005 case tok::pp_else: 1006 case tok::pp_elifdef: 1007 case tok::pp_elifndef: 1008 case tok::pp_elif: 1009 parsePPElse(); 1010 break; 1011 case tok::pp_endif: 1012 parsePPEndIf(); 1013 break; 1014 case tok::pp_pragma: 1015 parsePPPragma(); 1016 break; 1017 default: 1018 parsePPUnknown(); 1019 break; 1020 } 1021 } 1022 1023 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1024 size_t Line = CurrentLines->size(); 1025 if (CurrentLines == &PreprocessorDirectives) 1026 Line += Lines.size(); 1027 1028 if (Unreachable || 1029 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1030 PPStack.push_back({PP_Unreachable, Line}); 1031 } else { 1032 PPStack.push_back({PP_Conditional, Line}); 1033 } 1034 } 1035 1036 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1037 ++PPBranchLevel; 1038 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1039 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1040 PPLevelBranchIndex.push_back(0); 1041 PPLevelBranchCount.push_back(0); 1042 } 1043 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1044 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1045 conditionalCompilationCondition(Unreachable || Skip); 1046 } 1047 1048 void UnwrappedLineParser::conditionalCompilationAlternative() { 1049 if (!PPStack.empty()) 1050 PPStack.pop_back(); 1051 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1052 if (!PPChainBranchIndex.empty()) 1053 ++PPChainBranchIndex.top(); 1054 conditionalCompilationCondition( 1055 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1056 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1057 } 1058 1059 void UnwrappedLineParser::conditionalCompilationEnd() { 1060 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1061 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1062 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1063 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1064 } 1065 // Guard against #endif's without #if. 1066 if (PPBranchLevel > -1) 1067 --PPBranchLevel; 1068 if (!PPChainBranchIndex.empty()) 1069 PPChainBranchIndex.pop(); 1070 if (!PPStack.empty()) 1071 PPStack.pop_back(); 1072 } 1073 1074 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1075 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1076 nextToken(); 1077 bool Unreachable = false; 1078 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1079 Unreachable = true; 1080 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1081 Unreachable = true; 1082 conditionalCompilationStart(Unreachable); 1083 FormatToken *IfCondition = FormatTok; 1084 // If there's a #ifndef on the first line, and the only lines before it are 1085 // comments, it could be an include guard. 1086 bool MaybeIncludeGuard = IfNDef; 1087 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1088 for (auto &Line : Lines) { 1089 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1090 MaybeIncludeGuard = false; 1091 IncludeGuard = IG_Rejected; 1092 break; 1093 } 1094 } 1095 } 1096 --PPBranchLevel; 1097 parsePPUnknown(); 1098 ++PPBranchLevel; 1099 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1100 IncludeGuard = IG_IfNdefed; 1101 IncludeGuardToken = IfCondition; 1102 } 1103 } 1104 1105 void UnwrappedLineParser::parsePPElse() { 1106 // If a potential include guard has an #else, it's not an include guard. 1107 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1108 IncludeGuard = IG_Rejected; 1109 // Don't crash when there is an #else without an #if. 1110 assert(PPBranchLevel >= -1); 1111 if (PPBranchLevel == -1) 1112 conditionalCompilationStart(/*Unreachable=*/true); 1113 conditionalCompilationAlternative(); 1114 --PPBranchLevel; 1115 parsePPUnknown(); 1116 ++PPBranchLevel; 1117 } 1118 1119 void UnwrappedLineParser::parsePPEndIf() { 1120 conditionalCompilationEnd(); 1121 parsePPUnknown(); 1122 // If the #endif of a potential include guard is the last thing in the file, 1123 // then we found an include guard. 1124 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1125 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1126 IncludeGuard = IG_Found; 1127 } 1128 } 1129 1130 void UnwrappedLineParser::parsePPDefine() { 1131 nextToken(); 1132 1133 if (!FormatTok->Tok.getIdentifierInfo()) { 1134 IncludeGuard = IG_Rejected; 1135 IncludeGuardToken = nullptr; 1136 parsePPUnknown(); 1137 return; 1138 } 1139 1140 if (IncludeGuard == IG_IfNdefed && 1141 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1142 IncludeGuard = IG_Defined; 1143 IncludeGuardToken = nullptr; 1144 for (auto &Line : Lines) { 1145 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1146 IncludeGuard = IG_Rejected; 1147 break; 1148 } 1149 } 1150 } 1151 1152 // In the context of a define, even keywords should be treated as normal 1153 // identifiers. Setting the kind to identifier is not enough, because we need 1154 // to treat additional keywords like __except as well, which are already 1155 // identifiers. Setting the identifier info to null interferes with include 1156 // guard processing above, and changes preprocessing nesting. 1157 FormatTok->Tok.setKind(tok::identifier); 1158 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1159 nextToken(); 1160 if (FormatTok->Tok.getKind() == tok::l_paren && 1161 !FormatTok->hasWhitespaceBefore()) { 1162 parseParens(); 1163 } 1164 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1165 Line->Level += PPBranchLevel + 1; 1166 addUnwrappedLine(); 1167 ++Line->Level; 1168 1169 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1170 assert((int)Line->PPLevel >= 0); 1171 Line->InMacroBody = true; 1172 1173 if (Style.SkipMacroDefinitionBody) { 1174 do { 1175 FormatTok->Finalized = true; 1176 nextToken(); 1177 } while (!eof()); 1178 addUnwrappedLine(); 1179 return; 1180 } 1181 1182 if (FormatTok->is(tok::identifier) && 1183 Tokens->peekNextToken()->is(tok::colon)) { 1184 nextToken(); 1185 nextToken(); 1186 } 1187 1188 // Errors during a preprocessor directive can only affect the layout of the 1189 // preprocessor directive, and thus we ignore them. An alternative approach 1190 // would be to use the same approach we use on the file level (no 1191 // re-indentation if there was a structural error) within the macro 1192 // definition. 1193 parseFile(); 1194 } 1195 1196 void UnwrappedLineParser::parsePPPragma() { 1197 Line->InPragmaDirective = true; 1198 parsePPUnknown(); 1199 } 1200 1201 void UnwrappedLineParser::parsePPUnknown() { 1202 do { 1203 nextToken(); 1204 } while (!eof()); 1205 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1206 Line->Level += PPBranchLevel + 1; 1207 addUnwrappedLine(); 1208 } 1209 1210 // Here we exclude certain tokens that are not usually the first token in an 1211 // unwrapped line. This is used in attempt to distinguish macro calls without 1212 // trailing semicolons from other constructs split to several lines. 1213 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1214 // Semicolon can be a null-statement, l_square can be a start of a macro or 1215 // a C++11 attribute, but this doesn't seem to be common. 1216 assert(Tok.isNot(TT_AttributeSquare)); 1217 return !Tok.isOneOf(tok::semi, tok::l_brace, 1218 // Tokens that can only be used as binary operators and a 1219 // part of overloaded operator names. 1220 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1221 tok::less, tok::greater, tok::slash, tok::percent, 1222 tok::lessless, tok::greatergreater, tok::equal, 1223 tok::plusequal, tok::minusequal, tok::starequal, 1224 tok::slashequal, tok::percentequal, tok::ampequal, 1225 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1226 tok::lesslessequal, 1227 // Colon is used in labels, base class lists, initializer 1228 // lists, range-based for loops, ternary operator, but 1229 // should never be the first token in an unwrapped line. 1230 tok::colon, 1231 // 'noexcept' is a trailing annotation. 1232 tok::kw_noexcept); 1233 } 1234 1235 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1236 const FormatToken *FormatTok) { 1237 // FIXME: This returns true for C/C++ keywords like 'struct'. 1238 return FormatTok->is(tok::identifier) && 1239 (!FormatTok->Tok.getIdentifierInfo() || 1240 !FormatTok->isOneOf( 1241 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1242 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1243 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1244 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1245 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1246 Keywords.kw_instanceof, Keywords.kw_interface, 1247 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1248 } 1249 1250 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1251 const FormatToken *FormatTok) { 1252 return FormatTok->Tok.isLiteral() || 1253 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1254 mustBeJSIdent(Keywords, FormatTok); 1255 } 1256 1257 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1258 // when encountered after a value (see mustBeJSIdentOrValue). 1259 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1260 const FormatToken *FormatTok) { 1261 return FormatTok->isOneOf( 1262 tok::kw_return, Keywords.kw_yield, 1263 // conditionals 1264 tok::kw_if, tok::kw_else, 1265 // loops 1266 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1267 // switch/case 1268 tok::kw_switch, tok::kw_case, 1269 // exceptions 1270 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1271 // declaration 1272 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1273 Keywords.kw_async, Keywords.kw_function, 1274 // import/export 1275 Keywords.kw_import, tok::kw_export); 1276 } 1277 1278 // Checks whether a token is a type in K&R C (aka C78). 1279 static bool isC78Type(const FormatToken &Tok) { 1280 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1281 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1282 tok::identifier); 1283 } 1284 1285 // This function checks whether a token starts the first parameter declaration 1286 // in a K&R C (aka C78) function definition, e.g.: 1287 // int f(a, b) 1288 // short a, b; 1289 // { 1290 // return a + b; 1291 // } 1292 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1293 const FormatToken *FuncName) { 1294 assert(Tok); 1295 assert(Next); 1296 assert(FuncName); 1297 1298 if (FuncName->isNot(tok::identifier)) 1299 return false; 1300 1301 const FormatToken *Prev = FuncName->Previous; 1302 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1303 return false; 1304 1305 if (!isC78Type(*Tok) && 1306 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1307 return false; 1308 } 1309 1310 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1311 return false; 1312 1313 Tok = Tok->Previous; 1314 if (!Tok || Tok->isNot(tok::r_paren)) 1315 return false; 1316 1317 Tok = Tok->Previous; 1318 if (!Tok || Tok->isNot(tok::identifier)) 1319 return false; 1320 1321 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1322 } 1323 1324 bool UnwrappedLineParser::parseModuleImport() { 1325 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1326 1327 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1328 !Token->Tok.getIdentifierInfo() && 1329 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1330 return false; 1331 } 1332 1333 nextToken(); 1334 while (!eof()) { 1335 if (FormatTok->is(tok::colon)) { 1336 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1337 } 1338 // Handle import <foo/bar.h> as we would an include statement. 1339 else if (FormatTok->is(tok::less)) { 1340 nextToken(); 1341 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1342 // Mark tokens up to the trailing line comments as implicit string 1343 // literals. 1344 if (FormatTok->isNot(tok::comment) && 1345 !FormatTok->TokenText.starts_with("//")) { 1346 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1347 } 1348 nextToken(); 1349 } 1350 } 1351 if (FormatTok->is(tok::semi)) { 1352 nextToken(); 1353 break; 1354 } 1355 nextToken(); 1356 } 1357 1358 addUnwrappedLine(); 1359 return true; 1360 } 1361 1362 // readTokenWithJavaScriptASI reads the next token and terminates the current 1363 // line if JavaScript Automatic Semicolon Insertion must 1364 // happen between the current token and the next token. 1365 // 1366 // This method is conservative - it cannot cover all edge cases of JavaScript, 1367 // but only aims to correctly handle certain well known cases. It *must not* 1368 // return true in speculative cases. 1369 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1370 FormatToken *Previous = FormatTok; 1371 readToken(); 1372 FormatToken *Next = FormatTok; 1373 1374 bool IsOnSameLine = 1375 CommentsBeforeNextToken.empty() 1376 ? Next->NewlinesBefore == 0 1377 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1378 if (IsOnSameLine) 1379 return; 1380 1381 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1382 bool PreviousStartsTemplateExpr = 1383 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1384 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1385 // If the line contains an '@' sign, the previous token might be an 1386 // annotation, which can precede another identifier/value. 1387 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1388 return LineNode.Tok->is(tok::at); 1389 }); 1390 if (HasAt) 1391 return; 1392 } 1393 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1394 return addUnwrappedLine(); 1395 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1396 bool NextEndsTemplateExpr = 1397 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1398 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1399 (PreviousMustBeValue || 1400 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1401 tok::minusminus))) { 1402 return addUnwrappedLine(); 1403 } 1404 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1405 isJSDeclOrStmt(Keywords, Next)) { 1406 return addUnwrappedLine(); 1407 } 1408 } 1409 1410 void UnwrappedLineParser::parseStructuralElement( 1411 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1412 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1413 if (Style.Language == FormatStyle::LK_TableGen && 1414 FormatTok->is(tok::pp_include)) { 1415 nextToken(); 1416 if (FormatTok->is(tok::string_literal)) 1417 nextToken(); 1418 addUnwrappedLine(); 1419 return; 1420 } 1421 1422 if (Style.isCpp()) { 1423 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1424 } 1425 } else if (Style.isVerilog()) { 1426 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1427 parseForOrWhileLoop(/*HasParens=*/false); 1428 return; 1429 } 1430 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1431 parseForOrWhileLoop(); 1432 return; 1433 } 1434 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1435 Keywords.kw_assume, Keywords.kw_cover)) { 1436 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1437 return; 1438 } 1439 1440 // Skip things that can exist before keywords like 'if' and 'case'. 1441 while (true) { 1442 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1443 Keywords.kw_unique0)) { 1444 nextToken(); 1445 } else if (FormatTok->is(tok::l_paren) && 1446 Tokens->peekNextToken()->is(tok::star)) { 1447 parseParens(); 1448 } else { 1449 break; 1450 } 1451 } 1452 } 1453 1454 // Tokens that only make sense at the beginning of a line. 1455 switch (FormatTok->Tok.getKind()) { 1456 case tok::kw_asm: 1457 nextToken(); 1458 if (FormatTok->is(tok::l_brace)) { 1459 FormatTok->setFinalizedType(TT_InlineASMBrace); 1460 nextToken(); 1461 while (FormatTok && !eof()) { 1462 if (FormatTok->is(tok::r_brace)) { 1463 FormatTok->setFinalizedType(TT_InlineASMBrace); 1464 nextToken(); 1465 addUnwrappedLine(); 1466 break; 1467 } 1468 FormatTok->Finalized = true; 1469 nextToken(); 1470 } 1471 } 1472 break; 1473 case tok::kw_namespace: 1474 parseNamespace(); 1475 return; 1476 case tok::kw_public: 1477 case tok::kw_protected: 1478 case tok::kw_private: 1479 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1480 Style.isCSharp()) { 1481 nextToken(); 1482 } else { 1483 parseAccessSpecifier(); 1484 } 1485 return; 1486 case tok::kw_if: { 1487 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1488 // field/method declaration. 1489 break; 1490 } 1491 FormatToken *Tok = parseIfThenElse(IfKind); 1492 if (IfLeftBrace) 1493 *IfLeftBrace = Tok; 1494 return; 1495 } 1496 case tok::kw_for: 1497 case tok::kw_while: 1498 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1499 // field/method declaration. 1500 break; 1501 } 1502 parseForOrWhileLoop(); 1503 return; 1504 case tok::kw_do: 1505 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1506 // field/method declaration. 1507 break; 1508 } 1509 parseDoWhile(); 1510 if (HasDoWhile) 1511 *HasDoWhile = true; 1512 return; 1513 case tok::kw_switch: 1514 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1515 // 'switch: string' field declaration. 1516 break; 1517 } 1518 parseSwitch(); 1519 return; 1520 case tok::kw_default: 1521 // In Verilog default along with other labels are handled in the next loop. 1522 if (Style.isVerilog()) 1523 break; 1524 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1525 // 'default: string' field declaration. 1526 break; 1527 } 1528 nextToken(); 1529 if (FormatTok->is(tok::colon)) { 1530 FormatTok->setFinalizedType(TT_CaseLabelColon); 1531 parseLabel(); 1532 return; 1533 } 1534 // e.g. "default void f() {}" in a Java interface. 1535 break; 1536 case tok::kw_case: 1537 // Proto: there are no switch/case statements. 1538 if (Style.Language == FormatStyle::LK_Proto) { 1539 nextToken(); 1540 return; 1541 } 1542 if (Style.isVerilog()) { 1543 parseBlock(); 1544 addUnwrappedLine(); 1545 return; 1546 } 1547 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1548 // 'case: string' field declaration. 1549 nextToken(); 1550 break; 1551 } 1552 parseCaseLabel(); 1553 return; 1554 case tok::kw_try: 1555 case tok::kw___try: 1556 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1557 // field/method declaration. 1558 break; 1559 } 1560 parseTryCatch(); 1561 return; 1562 case tok::kw_extern: 1563 nextToken(); 1564 if (Style.isVerilog()) { 1565 // In Verilog and extern module declaration looks like a start of module. 1566 // But there is no body and endmodule. So we handle it separately. 1567 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1568 parseVerilogHierarchyHeader(); 1569 return; 1570 } 1571 } else if (FormatTok->is(tok::string_literal)) { 1572 nextToken(); 1573 if (FormatTok->is(tok::l_brace)) { 1574 if (Style.BraceWrapping.AfterExternBlock) 1575 addUnwrappedLine(); 1576 // Either we indent or for backwards compatibility we follow the 1577 // AfterExternBlock style. 1578 unsigned AddLevels = 1579 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1580 (Style.BraceWrapping.AfterExternBlock && 1581 Style.IndentExternBlock == 1582 FormatStyle::IEBS_AfterExternBlock) 1583 ? 1u 1584 : 0u; 1585 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1586 addUnwrappedLine(); 1587 return; 1588 } 1589 } 1590 break; 1591 case tok::kw_export: 1592 if (Style.isJavaScript()) { 1593 parseJavaScriptEs6ImportExport(); 1594 return; 1595 } 1596 if (Style.isCpp()) { 1597 nextToken(); 1598 if (FormatTok->is(tok::kw_namespace)) { 1599 parseNamespace(); 1600 return; 1601 } 1602 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1603 return; 1604 } 1605 break; 1606 case tok::kw_inline: 1607 nextToken(); 1608 if (FormatTok->is(tok::kw_namespace)) { 1609 parseNamespace(); 1610 return; 1611 } 1612 break; 1613 case tok::identifier: 1614 if (FormatTok->is(TT_ForEachMacro)) { 1615 parseForOrWhileLoop(); 1616 return; 1617 } 1618 if (FormatTok->is(TT_MacroBlockBegin)) { 1619 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1620 /*MunchSemi=*/false); 1621 return; 1622 } 1623 if (FormatTok->is(Keywords.kw_import)) { 1624 if (Style.isJavaScript()) { 1625 parseJavaScriptEs6ImportExport(); 1626 return; 1627 } 1628 if (Style.Language == FormatStyle::LK_Proto) { 1629 nextToken(); 1630 if (FormatTok->is(tok::kw_public)) 1631 nextToken(); 1632 if (FormatTok->isNot(tok::string_literal)) 1633 return; 1634 nextToken(); 1635 if (FormatTok->is(tok::semi)) 1636 nextToken(); 1637 addUnwrappedLine(); 1638 return; 1639 } 1640 if (Style.isCpp() && parseModuleImport()) 1641 return; 1642 } 1643 if (Style.isCpp() && 1644 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1645 Keywords.kw_slots, Keywords.kw_qslots)) { 1646 nextToken(); 1647 if (FormatTok->is(tok::colon)) { 1648 nextToken(); 1649 addUnwrappedLine(); 1650 return; 1651 } 1652 } 1653 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1654 parseStatementMacro(); 1655 return; 1656 } 1657 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1658 parseNamespace(); 1659 return; 1660 } 1661 // In Verilog labels can be any expression, so we don't do them here. 1662 // JS doesn't have macros, and within classes colons indicate fields, not 1663 // labels. 1664 // TableGen doesn't have labels. 1665 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1666 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1667 nextToken(); 1668 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1669 FormatTok->setFinalizedType(TT_GotoLabelColon); 1670 parseLabel(!Style.IndentGotoLabels); 1671 if (HasLabel) 1672 *HasLabel = true; 1673 return; 1674 } 1675 // In all other cases, parse the declaration. 1676 break; 1677 default: 1678 break; 1679 } 1680 1681 const bool InRequiresExpression = 1682 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 1683 do { 1684 const FormatToken *Previous = FormatTok->Previous; 1685 switch (FormatTok->Tok.getKind()) { 1686 case tok::at: 1687 nextToken(); 1688 if (FormatTok->is(tok::l_brace)) { 1689 nextToken(); 1690 parseBracedList(); 1691 break; 1692 } else if (Style.Language == FormatStyle::LK_Java && 1693 FormatTok->is(Keywords.kw_interface)) { 1694 nextToken(); 1695 break; 1696 } 1697 switch (FormatTok->Tok.getObjCKeywordID()) { 1698 case tok::objc_public: 1699 case tok::objc_protected: 1700 case tok::objc_package: 1701 case tok::objc_private: 1702 return parseAccessSpecifier(); 1703 case tok::objc_interface: 1704 case tok::objc_implementation: 1705 return parseObjCInterfaceOrImplementation(); 1706 case tok::objc_protocol: 1707 if (parseObjCProtocol()) 1708 return; 1709 break; 1710 case tok::objc_end: 1711 return; // Handled by the caller. 1712 case tok::objc_optional: 1713 case tok::objc_required: 1714 nextToken(); 1715 addUnwrappedLine(); 1716 return; 1717 case tok::objc_autoreleasepool: 1718 nextToken(); 1719 if (FormatTok->is(tok::l_brace)) { 1720 if (Style.BraceWrapping.AfterControlStatement == 1721 FormatStyle::BWACS_Always) { 1722 addUnwrappedLine(); 1723 } 1724 parseBlock(); 1725 } 1726 addUnwrappedLine(); 1727 return; 1728 case tok::objc_synchronized: 1729 nextToken(); 1730 if (FormatTok->is(tok::l_paren)) { 1731 // Skip synchronization object 1732 parseParens(); 1733 } 1734 if (FormatTok->is(tok::l_brace)) { 1735 if (Style.BraceWrapping.AfterControlStatement == 1736 FormatStyle::BWACS_Always) { 1737 addUnwrappedLine(); 1738 } 1739 parseBlock(); 1740 } 1741 addUnwrappedLine(); 1742 return; 1743 case tok::objc_try: 1744 // This branch isn't strictly necessary (the kw_try case below would 1745 // do this too after the tok::at is parsed above). But be explicit. 1746 parseTryCatch(); 1747 return; 1748 default: 1749 break; 1750 } 1751 break; 1752 case tok::kw_requires: { 1753 if (Style.isCpp()) { 1754 bool ParsedClause = parseRequires(); 1755 if (ParsedClause) 1756 return; 1757 } else { 1758 nextToken(); 1759 } 1760 break; 1761 } 1762 case tok::kw_enum: 1763 // Ignore if this is part of "template <enum ...". 1764 if (Previous && Previous->is(tok::less)) { 1765 nextToken(); 1766 break; 1767 } 1768 1769 // parseEnum falls through and does not yet add an unwrapped line as an 1770 // enum definition can start a structural element. 1771 if (!parseEnum()) 1772 break; 1773 // This only applies to C++ and Verilog. 1774 if (!Style.isCpp() && !Style.isVerilog()) { 1775 addUnwrappedLine(); 1776 return; 1777 } 1778 break; 1779 case tok::kw_typedef: 1780 nextToken(); 1781 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1782 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1783 Keywords.kw_CF_CLOSED_ENUM, 1784 Keywords.kw_NS_CLOSED_ENUM)) { 1785 parseEnum(); 1786 } 1787 break; 1788 case tok::kw_class: 1789 if (Style.isVerilog()) { 1790 parseBlock(); 1791 addUnwrappedLine(); 1792 return; 1793 } 1794 if (Style.isTableGen()) { 1795 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1796 // This is same as def and so on. 1797 nextToken(); 1798 break; 1799 } 1800 [[fallthrough]]; 1801 case tok::kw_struct: 1802 case tok::kw_union: 1803 if (parseStructLike()) 1804 return; 1805 break; 1806 case tok::kw_decltype: 1807 nextToken(); 1808 if (FormatTok->is(tok::l_paren)) { 1809 parseParens(); 1810 assert(FormatTok->Previous); 1811 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1812 tok::l_paren)) { 1813 Line->SeenDecltypeAuto = true; 1814 } 1815 } 1816 break; 1817 case tok::period: 1818 nextToken(); 1819 // In Java, classes have an implicit static member "class". 1820 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1821 FormatTok->is(tok::kw_class)) { 1822 nextToken(); 1823 } 1824 if (Style.isJavaScript() && FormatTok && 1825 FormatTok->Tok.getIdentifierInfo()) { 1826 // JavaScript only has pseudo keywords, all keywords are allowed to 1827 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1828 nextToken(); 1829 } 1830 break; 1831 case tok::semi: 1832 nextToken(); 1833 addUnwrappedLine(); 1834 return; 1835 case tok::r_brace: 1836 addUnwrappedLine(); 1837 return; 1838 case tok::l_paren: { 1839 parseParens(); 1840 // Break the unwrapped line if a K&R C function definition has a parameter 1841 // declaration. 1842 if (OpeningBrace || !Style.isCpp() || !Previous || eof()) 1843 break; 1844 if (isC78ParameterDecl(FormatTok, 1845 Tokens->peekNextToken(/*SkipComment=*/true), 1846 Previous)) { 1847 addUnwrappedLine(); 1848 return; 1849 } 1850 break; 1851 } 1852 case tok::kw_operator: 1853 nextToken(); 1854 if (FormatTok->isBinaryOperator()) 1855 nextToken(); 1856 break; 1857 case tok::caret: 1858 nextToken(); 1859 // Block return type. 1860 if (FormatTok->Tok.isAnyIdentifier() || 1861 FormatTok->isSimpleTypeSpecifier()) { 1862 nextToken(); 1863 // Return types: pointers are ok too. 1864 while (FormatTok->is(tok::star)) 1865 nextToken(); 1866 } 1867 // Block argument list. 1868 if (FormatTok->is(tok::l_paren)) 1869 parseParens(); 1870 // Block body. 1871 if (FormatTok->is(tok::l_brace)) 1872 parseChildBlock(); 1873 break; 1874 case tok::l_brace: 1875 if (InRequiresExpression) 1876 FormatTok->setFinalizedType(TT_BracedListLBrace); 1877 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1878 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1879 // A block outside of parentheses must be the last part of a 1880 // structural element. 1881 // FIXME: Figure out cases where this is not true, and add projections 1882 // for them (the one we know is missing are lambdas). 1883 if (Style.Language == FormatStyle::LK_Java && 1884 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1885 // If necessary, we could set the type to something different than 1886 // TT_FunctionLBrace. 1887 if (Style.BraceWrapping.AfterControlStatement == 1888 FormatStyle::BWACS_Always) { 1889 addUnwrappedLine(); 1890 } 1891 } else if (Style.BraceWrapping.AfterFunction) { 1892 addUnwrappedLine(); 1893 } 1894 FormatTok->setFinalizedType(TT_FunctionLBrace); 1895 parseBlock(); 1896 IsDecltypeAutoFunction = false; 1897 addUnwrappedLine(); 1898 return; 1899 } 1900 // Otherwise this was a braced init list, and the structural 1901 // element continues. 1902 break; 1903 case tok::kw_try: 1904 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1905 // field/method declaration. 1906 nextToken(); 1907 break; 1908 } 1909 // We arrive here when parsing function-try blocks. 1910 if (Style.BraceWrapping.AfterFunction) 1911 addUnwrappedLine(); 1912 parseTryCatch(); 1913 return; 1914 case tok::identifier: { 1915 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1916 Line->MustBeDeclaration) { 1917 addUnwrappedLine(); 1918 parseCSharpGenericTypeConstraint(); 1919 break; 1920 } 1921 if (FormatTok->is(TT_MacroBlockEnd)) { 1922 addUnwrappedLine(); 1923 return; 1924 } 1925 1926 // Function declarations (as opposed to function expressions) are parsed 1927 // on their own unwrapped line by continuing this loop. Function 1928 // expressions (functions that are not on their own line) must not create 1929 // a new unwrapped line, so they are special cased below. 1930 size_t TokenCount = Line->Tokens.size(); 1931 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1932 (TokenCount > 1 || 1933 (TokenCount == 1 && 1934 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1935 tryToParseJSFunction(); 1936 break; 1937 } 1938 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1939 FormatTok->is(Keywords.kw_interface)) { 1940 if (Style.isJavaScript()) { 1941 // In JavaScript/TypeScript, "interface" can be used as a standalone 1942 // identifier, e.g. in `var interface = 1;`. If "interface" is 1943 // followed by another identifier, it is very like to be an actual 1944 // interface declaration. 1945 unsigned StoredPosition = Tokens->getPosition(); 1946 FormatToken *Next = Tokens->getNextToken(); 1947 FormatTok = Tokens->setPosition(StoredPosition); 1948 if (!mustBeJSIdent(Keywords, Next)) { 1949 nextToken(); 1950 break; 1951 } 1952 } 1953 parseRecord(); 1954 addUnwrappedLine(); 1955 return; 1956 } 1957 1958 if (Style.isVerilog()) { 1959 if (FormatTok->is(Keywords.kw_table)) { 1960 parseVerilogTable(); 1961 return; 1962 } 1963 if (Keywords.isVerilogBegin(*FormatTok) || 1964 Keywords.isVerilogHierarchy(*FormatTok)) { 1965 parseBlock(); 1966 addUnwrappedLine(); 1967 return; 1968 } 1969 } 1970 1971 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) { 1972 if (parseStructLike()) 1973 return; 1974 break; 1975 } 1976 1977 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1978 parseStatementMacro(); 1979 return; 1980 } 1981 1982 // See if the following token should start a new unwrapped line. 1983 StringRef Text = FormatTok->TokenText; 1984 1985 FormatToken *PreviousToken = FormatTok; 1986 nextToken(); 1987 1988 // JS doesn't have macros, and within classes colons indicate fields, not 1989 // labels. 1990 if (Style.isJavaScript()) 1991 break; 1992 1993 auto OneTokenSoFar = [&]() { 1994 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 1995 while (I != E && I->Tok->is(tok::comment)) 1996 ++I; 1997 if (Style.isVerilog()) 1998 while (I != E && I->Tok->is(tok::hash)) 1999 ++I; 2000 return I != E && (++I == E); 2001 }; 2002 if (OneTokenSoFar()) { 2003 // Recognize function-like macro usages without trailing semicolon as 2004 // well as free-standing macros like Q_OBJECT. 2005 bool FunctionLike = FormatTok->is(tok::l_paren); 2006 if (FunctionLike) 2007 parseParens(); 2008 2009 bool FollowedByNewline = 2010 CommentsBeforeNextToken.empty() 2011 ? FormatTok->NewlinesBefore > 0 2012 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2013 2014 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2015 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2016 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2017 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2018 addUnwrappedLine(); 2019 return; 2020 } 2021 } 2022 break; 2023 } 2024 case tok::equal: 2025 if ((Style.isJavaScript() || Style.isCSharp()) && 2026 FormatTok->is(TT_FatArrow)) { 2027 tryToParseChildBlock(); 2028 break; 2029 } 2030 2031 nextToken(); 2032 if (FormatTok->is(tok::l_brace)) { 2033 // Block kind should probably be set to BK_BracedInit for any language. 2034 // C# needs this change to ensure that array initialisers and object 2035 // initialisers are indented the same way. 2036 if (Style.isCSharp()) 2037 FormatTok->setBlockKind(BK_BracedInit); 2038 // TableGen's defset statement has syntax of the form, 2039 // `defset <type> <name> = { <statement>... }` 2040 if (Style.isTableGen() && 2041 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2042 FormatTok->setFinalizedType(TT_FunctionLBrace); 2043 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2044 /*MunchSemi=*/false); 2045 addUnwrappedLine(); 2046 break; 2047 } 2048 nextToken(); 2049 parseBracedList(); 2050 } else if (Style.Language == FormatStyle::LK_Proto && 2051 FormatTok->is(tok::less)) { 2052 nextToken(); 2053 parseBracedList(/*IsAngleBracket=*/true); 2054 } 2055 break; 2056 case tok::l_square: 2057 parseSquare(); 2058 break; 2059 case tok::kw_new: 2060 parseNew(); 2061 break; 2062 case tok::kw_case: 2063 // Proto: there are no switch/case statements. 2064 if (Style.Language == FormatStyle::LK_Proto) { 2065 nextToken(); 2066 return; 2067 } 2068 // In Verilog switch is called case. 2069 if (Style.isVerilog()) { 2070 parseBlock(); 2071 addUnwrappedLine(); 2072 return; 2073 } 2074 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2075 // 'case: string' field declaration. 2076 nextToken(); 2077 break; 2078 } 2079 parseCaseLabel(); 2080 break; 2081 case tok::kw_default: 2082 nextToken(); 2083 if (Style.isVerilog()) { 2084 if (FormatTok->is(tok::colon)) { 2085 // The label will be handled in the next iteration. 2086 break; 2087 } 2088 if (FormatTok->is(Keywords.kw_clocking)) { 2089 // A default clocking block. 2090 parseBlock(); 2091 addUnwrappedLine(); 2092 return; 2093 } 2094 parseVerilogCaseLabel(); 2095 return; 2096 } 2097 break; 2098 case tok::colon: 2099 nextToken(); 2100 if (Style.isVerilog()) { 2101 parseVerilogCaseLabel(); 2102 return; 2103 } 2104 break; 2105 default: 2106 nextToken(); 2107 break; 2108 } 2109 } while (!eof()); 2110 } 2111 2112 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2113 assert(FormatTok->is(tok::l_brace)); 2114 if (!Style.isCSharp()) 2115 return false; 2116 // See if it's a property accessor. 2117 if (FormatTok->Previous->isNot(tok::identifier)) 2118 return false; 2119 2120 // See if we are inside a property accessor. 2121 // 2122 // Record the current tokenPosition so that we can advance and 2123 // reset the current token. `Next` is not set yet so we need 2124 // another way to advance along the token stream. 2125 unsigned int StoredPosition = Tokens->getPosition(); 2126 FormatToken *Tok = Tokens->getNextToken(); 2127 2128 // A trivial property accessor is of the form: 2129 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2130 // Track these as they do not require line breaks to be introduced. 2131 bool HasSpecialAccessor = false; 2132 bool IsTrivialPropertyAccessor = true; 2133 while (!eof()) { 2134 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 2135 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 2136 Keywords.kw_init, Keywords.kw_set)) { 2137 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2138 HasSpecialAccessor = true; 2139 Tok = Tokens->getNextToken(); 2140 continue; 2141 } 2142 if (Tok->isNot(tok::r_brace)) 2143 IsTrivialPropertyAccessor = false; 2144 break; 2145 } 2146 2147 if (!HasSpecialAccessor) { 2148 Tokens->setPosition(StoredPosition); 2149 return false; 2150 } 2151 2152 // Try to parse the property accessor: 2153 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2154 Tokens->setPosition(StoredPosition); 2155 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2156 addUnwrappedLine(); 2157 nextToken(); 2158 do { 2159 switch (FormatTok->Tok.getKind()) { 2160 case tok::r_brace: 2161 nextToken(); 2162 if (FormatTok->is(tok::equal)) { 2163 while (!eof() && FormatTok->isNot(tok::semi)) 2164 nextToken(); 2165 nextToken(); 2166 } 2167 addUnwrappedLine(); 2168 return true; 2169 case tok::l_brace: 2170 ++Line->Level; 2171 parseBlock(/*MustBeDeclaration=*/true); 2172 addUnwrappedLine(); 2173 --Line->Level; 2174 break; 2175 case tok::equal: 2176 if (FormatTok->is(TT_FatArrow)) { 2177 ++Line->Level; 2178 do { 2179 nextToken(); 2180 } while (!eof() && FormatTok->isNot(tok::semi)); 2181 nextToken(); 2182 addUnwrappedLine(); 2183 --Line->Level; 2184 break; 2185 } 2186 nextToken(); 2187 break; 2188 default: 2189 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2190 Keywords.kw_set) && 2191 !IsTrivialPropertyAccessor) { 2192 // Non-trivial get/set needs to be on its own line. 2193 addUnwrappedLine(); 2194 } 2195 nextToken(); 2196 } 2197 } while (!eof()); 2198 2199 // Unreachable for well-formed code (paired '{' and '}'). 2200 return true; 2201 } 2202 2203 bool UnwrappedLineParser::tryToParseLambda() { 2204 assert(FormatTok->is(tok::l_square)); 2205 if (!Style.isCpp()) { 2206 nextToken(); 2207 return false; 2208 } 2209 FormatToken &LSquare = *FormatTok; 2210 if (!tryToParseLambdaIntroducer()) 2211 return false; 2212 2213 bool SeenArrow = false; 2214 bool InTemplateParameterList = false; 2215 2216 while (FormatTok->isNot(tok::l_brace)) { 2217 if (FormatTok->isSimpleTypeSpecifier()) { 2218 nextToken(); 2219 continue; 2220 } 2221 switch (FormatTok->Tok.getKind()) { 2222 case tok::l_brace: 2223 break; 2224 case tok::l_paren: 2225 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2226 break; 2227 case tok::l_square: 2228 parseSquare(); 2229 break; 2230 case tok::less: 2231 assert(FormatTok->Previous); 2232 if (FormatTok->Previous->is(tok::r_square)) 2233 InTemplateParameterList = true; 2234 nextToken(); 2235 break; 2236 case tok::kw_auto: 2237 case tok::kw_class: 2238 case tok::kw_template: 2239 case tok::kw_typename: 2240 case tok::amp: 2241 case tok::star: 2242 case tok::kw_const: 2243 case tok::kw_constexpr: 2244 case tok::kw_consteval: 2245 case tok::comma: 2246 case tok::greater: 2247 case tok::identifier: 2248 case tok::numeric_constant: 2249 case tok::coloncolon: 2250 case tok::kw_mutable: 2251 case tok::kw_noexcept: 2252 case tok::kw_static: 2253 nextToken(); 2254 break; 2255 // Specialization of a template with an integer parameter can contain 2256 // arithmetic, logical, comparison and ternary operators. 2257 // 2258 // FIXME: This also accepts sequences of operators that are not in the scope 2259 // of a template argument list. 2260 // 2261 // In a C++ lambda a template type can only occur after an arrow. We use 2262 // this as an heuristic to distinguish between Objective-C expressions 2263 // followed by an `a->b` expression, such as: 2264 // ([obj func:arg] + a->b) 2265 // Otherwise the code below would parse as a lambda. 2266 case tok::plus: 2267 case tok::minus: 2268 case tok::exclaim: 2269 case tok::tilde: 2270 case tok::slash: 2271 case tok::percent: 2272 case tok::lessless: 2273 case tok::pipe: 2274 case tok::pipepipe: 2275 case tok::ampamp: 2276 case tok::caret: 2277 case tok::equalequal: 2278 case tok::exclaimequal: 2279 case tok::greaterequal: 2280 case tok::lessequal: 2281 case tok::question: 2282 case tok::colon: 2283 case tok::ellipsis: 2284 case tok::kw_true: 2285 case tok::kw_false: 2286 if (SeenArrow || InTemplateParameterList) { 2287 nextToken(); 2288 break; 2289 } 2290 return true; 2291 case tok::arrow: 2292 // This might or might not actually be a lambda arrow (this could be an 2293 // ObjC method invocation followed by a dereferencing arrow). We might 2294 // reset this back to TT_Unknown in TokenAnnotator. 2295 FormatTok->setFinalizedType(TT_TrailingReturnArrow); 2296 SeenArrow = true; 2297 nextToken(); 2298 break; 2299 case tok::kw_requires: { 2300 auto *RequiresToken = FormatTok; 2301 nextToken(); 2302 parseRequiresClause(RequiresToken); 2303 break; 2304 } 2305 case tok::equal: 2306 if (!InTemplateParameterList) 2307 return true; 2308 nextToken(); 2309 break; 2310 default: 2311 return true; 2312 } 2313 } 2314 2315 FormatTok->setFinalizedType(TT_LambdaLBrace); 2316 LSquare.setFinalizedType(TT_LambdaLSquare); 2317 2318 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2319 parseChildBlock(); 2320 assert(!NestedLambdas.empty()); 2321 NestedLambdas.pop_back(); 2322 2323 return true; 2324 } 2325 2326 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2327 const FormatToken *Previous = FormatTok->Previous; 2328 const FormatToken *LeftSquare = FormatTok; 2329 nextToken(); 2330 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2331 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2332 tok::kw_co_yield, tok::kw_co_return)) || 2333 Previous->closesScope())) || 2334 LeftSquare->isCppStructuredBinding(Style)) { 2335 return false; 2336 } 2337 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2338 return false; 2339 if (FormatTok->is(tok::r_square)) { 2340 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2341 if (Next->is(tok::greater)) 2342 return false; 2343 } 2344 parseSquare(/*LambdaIntroducer=*/true); 2345 return true; 2346 } 2347 2348 void UnwrappedLineParser::tryToParseJSFunction() { 2349 assert(FormatTok->is(Keywords.kw_function)); 2350 if (FormatTok->is(Keywords.kw_async)) 2351 nextToken(); 2352 // Consume "function". 2353 nextToken(); 2354 2355 // Consume * (generator function). Treat it like C++'s overloaded operators. 2356 if (FormatTok->is(tok::star)) { 2357 FormatTok->setFinalizedType(TT_OverloadedOperator); 2358 nextToken(); 2359 } 2360 2361 // Consume function name. 2362 if (FormatTok->is(tok::identifier)) 2363 nextToken(); 2364 2365 if (FormatTok->isNot(tok::l_paren)) 2366 return; 2367 2368 // Parse formal parameter list. 2369 parseParens(); 2370 2371 if (FormatTok->is(tok::colon)) { 2372 // Parse a type definition. 2373 nextToken(); 2374 2375 // Eat the type declaration. For braced inline object types, balance braces, 2376 // otherwise just parse until finding an l_brace for the function body. 2377 if (FormatTok->is(tok::l_brace)) 2378 tryToParseBracedList(); 2379 else 2380 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2381 nextToken(); 2382 } 2383 2384 if (FormatTok->is(tok::semi)) 2385 return; 2386 2387 parseChildBlock(); 2388 } 2389 2390 bool UnwrappedLineParser::tryToParseBracedList() { 2391 if (FormatTok->is(BK_Unknown)) 2392 calculateBraceTypes(); 2393 assert(FormatTok->isNot(BK_Unknown)); 2394 if (FormatTok->is(BK_Block)) 2395 return false; 2396 nextToken(); 2397 parseBracedList(); 2398 return true; 2399 } 2400 2401 bool UnwrappedLineParser::tryToParseChildBlock() { 2402 assert(Style.isJavaScript() || Style.isCSharp()); 2403 assert(FormatTok->is(TT_FatArrow)); 2404 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2405 // They always start an expression or a child block if followed by a curly 2406 // brace. 2407 nextToken(); 2408 if (FormatTok->isNot(tok::l_brace)) 2409 return false; 2410 parseChildBlock(); 2411 return true; 2412 } 2413 2414 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2415 bool HasError = false; 2416 2417 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2418 // replace this by using parseAssignmentExpression() inside. 2419 do { 2420 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2421 tryToParseChildBlock()) { 2422 continue; 2423 } 2424 if (Style.isJavaScript()) { 2425 if (FormatTok->is(Keywords.kw_function)) { 2426 tryToParseJSFunction(); 2427 continue; 2428 } 2429 if (FormatTok->is(tok::l_brace)) { 2430 // Could be a method inside of a braced list `{a() { return 1; }}`. 2431 if (tryToParseBracedList()) 2432 continue; 2433 parseChildBlock(); 2434 } 2435 } 2436 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2437 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2438 addUnwrappedLine(); 2439 nextToken(); 2440 return !HasError; 2441 } 2442 switch (FormatTok->Tok.getKind()) { 2443 case tok::l_square: 2444 if (Style.isCSharp()) 2445 parseSquare(); 2446 else 2447 tryToParseLambda(); 2448 break; 2449 case tok::l_paren: 2450 parseParens(); 2451 // JavaScript can just have free standing methods and getters/setters in 2452 // object literals. Detect them by a "{" following ")". 2453 if (Style.isJavaScript()) { 2454 if (FormatTok->is(tok::l_brace)) 2455 parseChildBlock(); 2456 break; 2457 } 2458 break; 2459 case tok::l_brace: 2460 // Assume there are no blocks inside a braced init list apart 2461 // from the ones we explicitly parse out (like lambdas). 2462 FormatTok->setBlockKind(BK_BracedInit); 2463 nextToken(); 2464 parseBracedList(); 2465 break; 2466 case tok::less: 2467 nextToken(); 2468 if (IsAngleBracket) 2469 parseBracedList(/*IsAngleBracket=*/true); 2470 break; 2471 case tok::semi: 2472 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2473 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2474 // used for error recovery if we have otherwise determined that this is 2475 // a braced list. 2476 if (Style.isJavaScript()) { 2477 nextToken(); 2478 break; 2479 } 2480 HasError = true; 2481 if (!IsEnum) 2482 return false; 2483 nextToken(); 2484 break; 2485 case tok::comma: 2486 nextToken(); 2487 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2488 addUnwrappedLine(); 2489 break; 2490 default: 2491 nextToken(); 2492 break; 2493 } 2494 } while (!eof()); 2495 return false; 2496 } 2497 2498 /// \brief Parses a pair of parentheses (and everything between them). 2499 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2500 /// double ampersands. This applies for all nested scopes as well. 2501 /// 2502 /// Returns whether there is a `=` token between the parentheses. 2503 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2504 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2505 auto *LeftParen = FormatTok; 2506 bool SeenEqual = false; 2507 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2508 nextToken(); 2509 do { 2510 switch (FormatTok->Tok.getKind()) { 2511 case tok::l_paren: 2512 if (parseParens(AmpAmpTokenType)) 2513 SeenEqual = true; 2514 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2515 parseChildBlock(); 2516 break; 2517 case tok::r_paren: 2518 if (!MightBeStmtExpr && !Line->InMacroBody && 2519 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2520 const auto *Prev = LeftParen->Previous; 2521 const auto *Next = Tokens->peekNextToken(); 2522 const bool DoubleParens = 2523 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2524 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2525 const bool Blacklisted = 2526 PrevPrev && 2527 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2528 (SeenEqual && 2529 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2530 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2531 const bool ReturnParens = 2532 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2533 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2534 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2535 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2536 Next->is(tok::semi); 2537 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2538 LeftParen->Optional = true; 2539 FormatTok->Optional = true; 2540 } 2541 } 2542 nextToken(); 2543 return SeenEqual; 2544 case tok::r_brace: 2545 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2546 return SeenEqual; 2547 case tok::l_square: 2548 tryToParseLambda(); 2549 break; 2550 case tok::l_brace: 2551 if (!tryToParseBracedList()) 2552 parseChildBlock(); 2553 break; 2554 case tok::at: 2555 nextToken(); 2556 if (FormatTok->is(tok::l_brace)) { 2557 nextToken(); 2558 parseBracedList(); 2559 } 2560 break; 2561 case tok::equal: 2562 SeenEqual = true; 2563 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2564 tryToParseChildBlock(); 2565 else 2566 nextToken(); 2567 break; 2568 case tok::kw_class: 2569 if (Style.isJavaScript()) 2570 parseRecord(/*ParseAsExpr=*/true); 2571 else 2572 nextToken(); 2573 break; 2574 case tok::identifier: 2575 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2576 tryToParseJSFunction(); 2577 else 2578 nextToken(); 2579 break; 2580 case tok::kw_requires: { 2581 auto RequiresToken = FormatTok; 2582 nextToken(); 2583 parseRequiresExpression(RequiresToken); 2584 break; 2585 } 2586 case tok::ampamp: 2587 if (AmpAmpTokenType != TT_Unknown) 2588 FormatTok->setFinalizedType(AmpAmpTokenType); 2589 [[fallthrough]]; 2590 default: 2591 nextToken(); 2592 break; 2593 } 2594 } while (!eof()); 2595 return SeenEqual; 2596 } 2597 2598 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2599 if (!LambdaIntroducer) { 2600 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2601 if (tryToParseLambda()) 2602 return; 2603 } 2604 do { 2605 switch (FormatTok->Tok.getKind()) { 2606 case tok::l_paren: 2607 parseParens(); 2608 break; 2609 case tok::r_square: 2610 nextToken(); 2611 return; 2612 case tok::r_brace: 2613 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2614 return; 2615 case tok::l_square: 2616 parseSquare(); 2617 break; 2618 case tok::l_brace: { 2619 if (!tryToParseBracedList()) 2620 parseChildBlock(); 2621 break; 2622 } 2623 case tok::at: 2624 nextToken(); 2625 if (FormatTok->is(tok::l_brace)) { 2626 nextToken(); 2627 parseBracedList(); 2628 } 2629 break; 2630 default: 2631 nextToken(); 2632 break; 2633 } 2634 } while (!eof()); 2635 } 2636 2637 void UnwrappedLineParser::keepAncestorBraces() { 2638 if (!Style.RemoveBracesLLVM) 2639 return; 2640 2641 const int MaxNestingLevels = 2; 2642 const int Size = NestedTooDeep.size(); 2643 if (Size >= MaxNestingLevels) 2644 NestedTooDeep[Size - MaxNestingLevels] = true; 2645 NestedTooDeep.push_back(false); 2646 } 2647 2648 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2649 for (const auto &Token : llvm::reverse(Line.Tokens)) 2650 if (Token.Tok->isNot(tok::comment)) 2651 return Token.Tok; 2652 2653 return nullptr; 2654 } 2655 2656 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2657 FormatToken *Tok = nullptr; 2658 2659 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2660 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2661 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2662 ? getLastNonComment(*Line) 2663 : Line->Tokens.back().Tok; 2664 assert(Tok); 2665 if (Tok->BraceCount < 0) { 2666 assert(Tok->BraceCount == -1); 2667 Tok = nullptr; 2668 } else { 2669 Tok->BraceCount = -1; 2670 } 2671 } 2672 2673 addUnwrappedLine(); 2674 ++Line->Level; 2675 parseStructuralElement(); 2676 2677 if (Tok) { 2678 assert(!Line->InPPDirective); 2679 Tok = nullptr; 2680 for (const auto &L : llvm::reverse(*CurrentLines)) { 2681 if (!L.InPPDirective && getLastNonComment(L)) { 2682 Tok = L.Tokens.back().Tok; 2683 break; 2684 } 2685 } 2686 assert(Tok); 2687 ++Tok->BraceCount; 2688 } 2689 2690 if (CheckEOF && eof()) 2691 addUnwrappedLine(); 2692 2693 --Line->Level; 2694 } 2695 2696 static void markOptionalBraces(FormatToken *LeftBrace) { 2697 if (!LeftBrace) 2698 return; 2699 2700 assert(LeftBrace->is(tok::l_brace)); 2701 2702 FormatToken *RightBrace = LeftBrace->MatchingParen; 2703 if (!RightBrace) { 2704 assert(!LeftBrace->Optional); 2705 return; 2706 } 2707 2708 assert(RightBrace->is(tok::r_brace)); 2709 assert(RightBrace->MatchingParen == LeftBrace); 2710 assert(LeftBrace->Optional == RightBrace->Optional); 2711 2712 LeftBrace->Optional = true; 2713 RightBrace->Optional = true; 2714 } 2715 2716 void UnwrappedLineParser::handleAttributes() { 2717 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2718 if (FormatTok->isAttribute()) 2719 nextToken(); 2720 else if (FormatTok->is(tok::l_square)) 2721 handleCppAttributes(); 2722 } 2723 2724 bool UnwrappedLineParser::handleCppAttributes() { 2725 // Handle [[likely]] / [[unlikely]] attributes. 2726 assert(FormatTok->is(tok::l_square)); 2727 if (!tryToParseSimpleAttribute()) 2728 return false; 2729 parseSquare(); 2730 return true; 2731 } 2732 2733 /// Returns whether \c Tok begins a block. 2734 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2735 // FIXME: rename the function or make 2736 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2737 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2738 : Tok.is(tok::l_brace); 2739 } 2740 2741 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2742 bool KeepBraces, 2743 bool IsVerilogAssert) { 2744 assert((FormatTok->is(tok::kw_if) || 2745 (Style.isVerilog() && 2746 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2747 Keywords.kw_assume, Keywords.kw_cover))) && 2748 "'if' expected"); 2749 nextToken(); 2750 2751 if (IsVerilogAssert) { 2752 // Handle `assert #0` and `assert final`. 2753 if (FormatTok->is(Keywords.kw_verilogHash)) { 2754 nextToken(); 2755 if (FormatTok->is(tok::numeric_constant)) 2756 nextToken(); 2757 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2758 Keywords.kw_sequence)) { 2759 nextToken(); 2760 } 2761 } 2762 2763 // TableGen's if statement has the form of `if <cond> then { ... }`. 2764 if (Style.isTableGen()) { 2765 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2766 // Simply skip until then. This range only contains a value. 2767 nextToken(); 2768 } 2769 } 2770 2771 // Handle `if !consteval`. 2772 if (FormatTok->is(tok::exclaim)) 2773 nextToken(); 2774 2775 bool KeepIfBraces = true; 2776 if (FormatTok->is(tok::kw_consteval)) { 2777 nextToken(); 2778 } else { 2779 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2780 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2781 nextToken(); 2782 if (FormatTok->is(tok::l_paren)) { 2783 FormatTok->setFinalizedType(TT_ConditionLParen); 2784 parseParens(); 2785 } 2786 } 2787 handleAttributes(); 2788 // The then action is optional in Verilog assert statements. 2789 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2790 nextToken(); 2791 addUnwrappedLine(); 2792 return nullptr; 2793 } 2794 2795 bool NeedsUnwrappedLine = false; 2796 keepAncestorBraces(); 2797 2798 FormatToken *IfLeftBrace = nullptr; 2799 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2800 2801 if (isBlockBegin(*FormatTok)) { 2802 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2803 IfLeftBrace = FormatTok; 2804 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2805 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2806 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2807 setPreviousRBraceType(TT_ControlStatementRBrace); 2808 if (Style.BraceWrapping.BeforeElse) 2809 addUnwrappedLine(); 2810 else 2811 NeedsUnwrappedLine = true; 2812 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2813 addUnwrappedLine(); 2814 } else { 2815 parseUnbracedBody(); 2816 } 2817 2818 if (Style.RemoveBracesLLVM) { 2819 assert(!NestedTooDeep.empty()); 2820 KeepIfBraces = KeepIfBraces || 2821 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2822 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2823 IfBlockKind == IfStmtKind::IfElseIf; 2824 } 2825 2826 bool KeepElseBraces = KeepIfBraces; 2827 FormatToken *ElseLeftBrace = nullptr; 2828 IfStmtKind Kind = IfStmtKind::IfOnly; 2829 2830 if (FormatTok->is(tok::kw_else)) { 2831 if (Style.RemoveBracesLLVM) { 2832 NestedTooDeep.back() = false; 2833 Kind = IfStmtKind::IfElse; 2834 } 2835 nextToken(); 2836 handleAttributes(); 2837 if (isBlockBegin(*FormatTok)) { 2838 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2839 FormatTok->setFinalizedType(TT_ElseLBrace); 2840 ElseLeftBrace = FormatTok; 2841 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2842 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2843 FormatToken *IfLBrace = 2844 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2845 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2846 setPreviousRBraceType(TT_ElseRBrace); 2847 if (FormatTok->is(tok::kw_else)) { 2848 KeepElseBraces = KeepElseBraces || 2849 ElseBlockKind == IfStmtKind::IfOnly || 2850 ElseBlockKind == IfStmtKind::IfElseIf; 2851 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2852 KeepElseBraces = true; 2853 assert(ElseLeftBrace->MatchingParen); 2854 markOptionalBraces(ElseLeftBrace); 2855 } 2856 addUnwrappedLine(); 2857 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2858 const FormatToken *Previous = Tokens->getPreviousToken(); 2859 assert(Previous); 2860 const bool IsPrecededByComment = Previous->is(tok::comment); 2861 if (IsPrecededByComment) { 2862 addUnwrappedLine(); 2863 ++Line->Level; 2864 } 2865 bool TooDeep = true; 2866 if (Style.RemoveBracesLLVM) { 2867 Kind = IfStmtKind::IfElseIf; 2868 TooDeep = NestedTooDeep.pop_back_val(); 2869 } 2870 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2871 if (Style.RemoveBracesLLVM) 2872 NestedTooDeep.push_back(TooDeep); 2873 if (IsPrecededByComment) 2874 --Line->Level; 2875 } else { 2876 parseUnbracedBody(/*CheckEOF=*/true); 2877 } 2878 } else { 2879 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2880 if (NeedsUnwrappedLine) 2881 addUnwrappedLine(); 2882 } 2883 2884 if (!Style.RemoveBracesLLVM) 2885 return nullptr; 2886 2887 assert(!NestedTooDeep.empty()); 2888 KeepElseBraces = KeepElseBraces || 2889 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2890 NestedTooDeep.back(); 2891 2892 NestedTooDeep.pop_back(); 2893 2894 if (!KeepIfBraces && !KeepElseBraces) { 2895 markOptionalBraces(IfLeftBrace); 2896 markOptionalBraces(ElseLeftBrace); 2897 } else if (IfLeftBrace) { 2898 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2899 if (IfRightBrace) { 2900 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2901 assert(!IfLeftBrace->Optional); 2902 assert(!IfRightBrace->Optional); 2903 IfLeftBrace->MatchingParen = nullptr; 2904 IfRightBrace->MatchingParen = nullptr; 2905 } 2906 } 2907 2908 if (IfKind) 2909 *IfKind = Kind; 2910 2911 return IfLeftBrace; 2912 } 2913 2914 void UnwrappedLineParser::parseTryCatch() { 2915 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2916 nextToken(); 2917 bool NeedsUnwrappedLine = false; 2918 if (FormatTok->is(tok::colon)) { 2919 // We are in a function try block, what comes is an initializer list. 2920 nextToken(); 2921 2922 // In case identifiers were removed by clang-tidy, what might follow is 2923 // multiple commas in sequence - before the first identifier. 2924 while (FormatTok->is(tok::comma)) 2925 nextToken(); 2926 2927 while (FormatTok->is(tok::identifier)) { 2928 nextToken(); 2929 if (FormatTok->is(tok::l_paren)) 2930 parseParens(); 2931 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2932 FormatTok->is(tok::l_brace)) { 2933 do { 2934 nextToken(); 2935 } while (FormatTok->isNot(tok::r_brace)); 2936 nextToken(); 2937 } 2938 2939 // In case identifiers were removed by clang-tidy, what might follow is 2940 // multiple commas in sequence - after the first identifier. 2941 while (FormatTok->is(tok::comma)) 2942 nextToken(); 2943 } 2944 } 2945 // Parse try with resource. 2946 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2947 parseParens(); 2948 2949 keepAncestorBraces(); 2950 2951 if (FormatTok->is(tok::l_brace)) { 2952 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2953 parseBlock(); 2954 if (Style.BraceWrapping.BeforeCatch) 2955 addUnwrappedLine(); 2956 else 2957 NeedsUnwrappedLine = true; 2958 } else if (FormatTok->isNot(tok::kw_catch)) { 2959 // The C++ standard requires a compound-statement after a try. 2960 // If there's none, we try to assume there's a structuralElement 2961 // and try to continue. 2962 addUnwrappedLine(); 2963 ++Line->Level; 2964 parseStructuralElement(); 2965 --Line->Level; 2966 } 2967 while (true) { 2968 if (FormatTok->is(tok::at)) 2969 nextToken(); 2970 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2971 tok::kw___finally) || 2972 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2973 FormatTok->is(Keywords.kw_finally)) || 2974 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2975 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2976 break; 2977 } 2978 nextToken(); 2979 while (FormatTok->isNot(tok::l_brace)) { 2980 if (FormatTok->is(tok::l_paren)) { 2981 parseParens(); 2982 continue; 2983 } 2984 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2985 if (Style.RemoveBracesLLVM) 2986 NestedTooDeep.pop_back(); 2987 return; 2988 } 2989 nextToken(); 2990 } 2991 NeedsUnwrappedLine = false; 2992 Line->MustBeDeclaration = false; 2993 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2994 parseBlock(); 2995 if (Style.BraceWrapping.BeforeCatch) 2996 addUnwrappedLine(); 2997 else 2998 NeedsUnwrappedLine = true; 2999 } 3000 3001 if (Style.RemoveBracesLLVM) 3002 NestedTooDeep.pop_back(); 3003 3004 if (NeedsUnwrappedLine) 3005 addUnwrappedLine(); 3006 } 3007 3008 void UnwrappedLineParser::parseNamespace() { 3009 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3010 "'namespace' expected"); 3011 3012 const FormatToken &InitialToken = *FormatTok; 3013 nextToken(); 3014 if (InitialToken.is(TT_NamespaceMacro)) { 3015 parseParens(); 3016 } else { 3017 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3018 tok::l_square, tok::period, tok::l_paren) || 3019 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3020 if (FormatTok->is(tok::l_square)) 3021 parseSquare(); 3022 else if (FormatTok->is(tok::l_paren)) 3023 parseParens(); 3024 else 3025 nextToken(); 3026 } 3027 } 3028 if (FormatTok->is(tok::l_brace)) { 3029 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3030 3031 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3032 addUnwrappedLine(); 3033 3034 unsigned AddLevels = 3035 Style.NamespaceIndentation == FormatStyle::NI_All || 3036 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3037 DeclarationScopeStack.size() > 1) 3038 ? 1u 3039 : 0u; 3040 bool ManageWhitesmithsBraces = 3041 AddLevels == 0u && 3042 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3043 3044 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3045 // the whole block. 3046 if (ManageWhitesmithsBraces) 3047 ++Line->Level; 3048 3049 // Munch the semicolon after a namespace. This is more common than one would 3050 // think. Putting the semicolon into its own line is very ugly. 3051 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3052 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3053 ManageWhitesmithsBraces); 3054 3055 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3056 3057 if (ManageWhitesmithsBraces) 3058 --Line->Level; 3059 } 3060 // FIXME: Add error handling. 3061 } 3062 3063 void UnwrappedLineParser::parseNew() { 3064 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3065 nextToken(); 3066 3067 if (Style.isCSharp()) { 3068 do { 3069 // Handle constructor invocation, e.g. `new(field: value)`. 3070 if (FormatTok->is(tok::l_paren)) 3071 parseParens(); 3072 3073 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3074 if (FormatTok->is(tok::l_brace)) 3075 parseBracedList(); 3076 3077 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3078 return; 3079 3080 nextToken(); 3081 } while (!eof()); 3082 } 3083 3084 if (Style.Language != FormatStyle::LK_Java) 3085 return; 3086 3087 // In Java, we can parse everything up to the parens, which aren't optional. 3088 do { 3089 // There should not be a ;, { or } before the new's open paren. 3090 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3091 return; 3092 3093 // Consume the parens. 3094 if (FormatTok->is(tok::l_paren)) { 3095 parseParens(); 3096 3097 // If there is a class body of an anonymous class, consume that as child. 3098 if (FormatTok->is(tok::l_brace)) 3099 parseChildBlock(); 3100 return; 3101 } 3102 nextToken(); 3103 } while (!eof()); 3104 } 3105 3106 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3107 keepAncestorBraces(); 3108 3109 if (isBlockBegin(*FormatTok)) { 3110 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3111 FormatToken *LeftBrace = FormatTok; 3112 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3113 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3114 /*MunchSemi=*/true, KeepBraces); 3115 setPreviousRBraceType(TT_ControlStatementRBrace); 3116 if (!KeepBraces) { 3117 assert(!NestedTooDeep.empty()); 3118 if (!NestedTooDeep.back()) 3119 markOptionalBraces(LeftBrace); 3120 } 3121 if (WrapRightBrace) 3122 addUnwrappedLine(); 3123 } else { 3124 parseUnbracedBody(); 3125 } 3126 3127 if (!KeepBraces) 3128 NestedTooDeep.pop_back(); 3129 } 3130 3131 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3132 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3133 (Style.isVerilog() && 3134 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3135 Keywords.kw_always_ff, Keywords.kw_always_latch, 3136 Keywords.kw_final, Keywords.kw_initial, 3137 Keywords.kw_foreach, Keywords.kw_forever, 3138 Keywords.kw_repeat))) && 3139 "'for', 'while' or foreach macro expected"); 3140 const bool KeepBraces = !Style.RemoveBracesLLVM || 3141 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3142 3143 nextToken(); 3144 // JS' for await ( ... 3145 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3146 nextToken(); 3147 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 3148 nextToken(); 3149 if (HasParens && FormatTok->is(tok::l_paren)) { 3150 // The type is only set for Verilog basically because we were afraid to 3151 // change the existing behavior for loops. See the discussion on D121756 for 3152 // details. 3153 if (Style.isVerilog()) 3154 FormatTok->setFinalizedType(TT_ConditionLParen); 3155 parseParens(); 3156 } 3157 3158 if (Style.isVerilog()) { 3159 // Event control. 3160 parseVerilogSensitivityList(); 3161 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3162 Tokens->getPreviousToken()->is(tok::r_paren)) { 3163 nextToken(); 3164 addUnwrappedLine(); 3165 return; 3166 } 3167 3168 handleAttributes(); 3169 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3170 } 3171 3172 void UnwrappedLineParser::parseDoWhile() { 3173 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3174 nextToken(); 3175 3176 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3177 3178 // FIXME: Add error handling. 3179 if (FormatTok->isNot(tok::kw_while)) { 3180 addUnwrappedLine(); 3181 return; 3182 } 3183 3184 FormatTok->setFinalizedType(TT_DoWhile); 3185 3186 // If in Whitesmiths mode, the line with the while() needs to be indented 3187 // to the same level as the block. 3188 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3189 ++Line->Level; 3190 3191 nextToken(); 3192 parseStructuralElement(); 3193 } 3194 3195 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3196 nextToken(); 3197 unsigned OldLineLevel = Line->Level; 3198 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3199 --Line->Level; 3200 if (LeftAlignLabel) 3201 Line->Level = 0; 3202 3203 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3204 FormatTok->is(tok::l_brace)) { 3205 3206 CompoundStatementIndenter Indenter(this, Line->Level, 3207 Style.BraceWrapping.AfterCaseLabel, 3208 Style.BraceWrapping.IndentBraces); 3209 parseBlock(); 3210 if (FormatTok->is(tok::kw_break)) { 3211 if (Style.BraceWrapping.AfterControlStatement == 3212 FormatStyle::BWACS_Always) { 3213 addUnwrappedLine(); 3214 if (!Style.IndentCaseBlocks && 3215 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3216 ++Line->Level; 3217 } 3218 } 3219 parseStructuralElement(); 3220 } 3221 addUnwrappedLine(); 3222 } else { 3223 if (FormatTok->is(tok::semi)) 3224 nextToken(); 3225 addUnwrappedLine(); 3226 } 3227 Line->Level = OldLineLevel; 3228 if (FormatTok->isNot(tok::l_brace)) { 3229 parseStructuralElement(); 3230 addUnwrappedLine(); 3231 } 3232 } 3233 3234 void UnwrappedLineParser::parseCaseLabel() { 3235 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3236 3237 // FIXME: fix handling of complex expressions here. 3238 do { 3239 nextToken(); 3240 if (FormatTok->is(tok::colon)) { 3241 FormatTok->setFinalizedType(TT_CaseLabelColon); 3242 break; 3243 } 3244 } while (!eof()); 3245 parseLabel(); 3246 } 3247 3248 void UnwrappedLineParser::parseSwitch() { 3249 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3250 nextToken(); 3251 if (FormatTok->is(tok::l_paren)) 3252 parseParens(); 3253 3254 keepAncestorBraces(); 3255 3256 if (FormatTok->is(tok::l_brace)) { 3257 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3258 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3259 parseBlock(); 3260 setPreviousRBraceType(TT_ControlStatementRBrace); 3261 addUnwrappedLine(); 3262 } else { 3263 addUnwrappedLine(); 3264 ++Line->Level; 3265 parseStructuralElement(); 3266 --Line->Level; 3267 } 3268 3269 if (Style.RemoveBracesLLVM) 3270 NestedTooDeep.pop_back(); 3271 } 3272 3273 // Operators that can follow a C variable. 3274 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 3275 switch (kind) { 3276 case tok::ampamp: 3277 case tok::ampequal: 3278 case tok::arrow: 3279 case tok::caret: 3280 case tok::caretequal: 3281 case tok::comma: 3282 case tok::ellipsis: 3283 case tok::equal: 3284 case tok::equalequal: 3285 case tok::exclaim: 3286 case tok::exclaimequal: 3287 case tok::greater: 3288 case tok::greaterequal: 3289 case tok::greatergreater: 3290 case tok::greatergreaterequal: 3291 case tok::l_paren: 3292 case tok::l_square: 3293 case tok::less: 3294 case tok::lessequal: 3295 case tok::lessless: 3296 case tok::lesslessequal: 3297 case tok::minus: 3298 case tok::minusequal: 3299 case tok::minusminus: 3300 case tok::percent: 3301 case tok::percentequal: 3302 case tok::period: 3303 case tok::pipe: 3304 case tok::pipeequal: 3305 case tok::pipepipe: 3306 case tok::plus: 3307 case tok::plusequal: 3308 case tok::plusplus: 3309 case tok::question: 3310 case tok::r_brace: 3311 case tok::r_paren: 3312 case tok::r_square: 3313 case tok::semi: 3314 case tok::slash: 3315 case tok::slashequal: 3316 case tok::star: 3317 case tok::starequal: 3318 return true; 3319 default: 3320 return false; 3321 } 3322 } 3323 3324 void UnwrappedLineParser::parseAccessSpecifier() { 3325 FormatToken *AccessSpecifierCandidate = FormatTok; 3326 nextToken(); 3327 // Understand Qt's slots. 3328 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3329 nextToken(); 3330 // Otherwise, we don't know what it is, and we'd better keep the next token. 3331 if (FormatTok->is(tok::colon)) { 3332 nextToken(); 3333 addUnwrappedLine(); 3334 } else if (FormatTok->isNot(tok::coloncolon) && 3335 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3336 // Not a variable name nor namespace name. 3337 addUnwrappedLine(); 3338 } else if (AccessSpecifierCandidate) { 3339 // Consider the access specifier to be a C identifier. 3340 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3341 } 3342 } 3343 3344 /// \brief Parses a requires, decides if it is a clause or an expression. 3345 /// \pre The current token has to be the requires keyword. 3346 /// \returns true if it parsed a clause. 3347 bool clang::format::UnwrappedLineParser::parseRequires() { 3348 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3349 auto RequiresToken = FormatTok; 3350 3351 // We try to guess if it is a requires clause, or a requires expression. For 3352 // that we first consume the keyword and check the next token. 3353 nextToken(); 3354 3355 switch (FormatTok->Tok.getKind()) { 3356 case tok::l_brace: 3357 // This can only be an expression, never a clause. 3358 parseRequiresExpression(RequiresToken); 3359 return false; 3360 case tok::l_paren: 3361 // Clauses and expression can start with a paren, it's unclear what we have. 3362 break; 3363 default: 3364 // All other tokens can only be a clause. 3365 parseRequiresClause(RequiresToken); 3366 return true; 3367 } 3368 3369 // Looking forward we would have to decide if there are function declaration 3370 // like arguments to the requires expression: 3371 // requires (T t) { 3372 // Or there is a constraint expression for the requires clause: 3373 // requires (C<T> && ... 3374 3375 // But first let's look behind. 3376 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3377 3378 if (!PreviousNonComment || 3379 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3380 // If there is no token, or an expression left brace, we are a requires 3381 // clause within a requires expression. 3382 parseRequiresClause(RequiresToken); 3383 return true; 3384 } 3385 3386 switch (PreviousNonComment->Tok.getKind()) { 3387 case tok::greater: 3388 case tok::r_paren: 3389 case tok::kw_noexcept: 3390 case tok::kw_const: 3391 // This is a requires clause. 3392 parseRequiresClause(RequiresToken); 3393 return true; 3394 case tok::amp: 3395 case tok::ampamp: { 3396 // This can be either: 3397 // if (... && requires (T t) ...) 3398 // Or 3399 // void member(...) && requires (C<T> ... 3400 // We check the one token before that for a const: 3401 // void member(...) const && requires (C<T> ... 3402 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3403 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3404 parseRequiresClause(RequiresToken); 3405 return true; 3406 } 3407 break; 3408 } 3409 default: 3410 if (PreviousNonComment->isTypeOrIdentifier()) { 3411 // This is a requires clause. 3412 parseRequiresClause(RequiresToken); 3413 return true; 3414 } 3415 // It's an expression. 3416 parseRequiresExpression(RequiresToken); 3417 return false; 3418 } 3419 3420 // Now we look forward and try to check if the paren content is a parameter 3421 // list. The parameters can be cv-qualified and contain references or 3422 // pointers. 3423 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3424 // of stuff: typename, const, *, &, &&, ::, identifiers. 3425 3426 unsigned StoredPosition = Tokens->getPosition(); 3427 FormatToken *NextToken = Tokens->getNextToken(); 3428 int Lookahead = 0; 3429 auto PeekNext = [&Lookahead, &NextToken, this] { 3430 ++Lookahead; 3431 NextToken = Tokens->getNextToken(); 3432 }; 3433 3434 bool FoundType = false; 3435 bool LastWasColonColon = false; 3436 int OpenAngles = 0; 3437 3438 for (; Lookahead < 50; PeekNext()) { 3439 switch (NextToken->Tok.getKind()) { 3440 case tok::kw_volatile: 3441 case tok::kw_const: 3442 case tok::comma: 3443 if (OpenAngles == 0) { 3444 FormatTok = Tokens->setPosition(StoredPosition); 3445 parseRequiresExpression(RequiresToken); 3446 return false; 3447 } 3448 break; 3449 case tok::r_paren: 3450 case tok::pipepipe: 3451 FormatTok = Tokens->setPosition(StoredPosition); 3452 parseRequiresClause(RequiresToken); 3453 return true; 3454 case tok::eof: 3455 // Break out of the loop. 3456 Lookahead = 50; 3457 break; 3458 case tok::coloncolon: 3459 LastWasColonColon = true; 3460 break; 3461 case tok::identifier: 3462 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3463 FormatTok = Tokens->setPosition(StoredPosition); 3464 parseRequiresExpression(RequiresToken); 3465 return false; 3466 } 3467 FoundType = true; 3468 LastWasColonColon = false; 3469 break; 3470 case tok::less: 3471 ++OpenAngles; 3472 break; 3473 case tok::greater: 3474 --OpenAngles; 3475 break; 3476 default: 3477 if (NextToken->isSimpleTypeSpecifier()) { 3478 FormatTok = Tokens->setPosition(StoredPosition); 3479 parseRequiresExpression(RequiresToken); 3480 return false; 3481 } 3482 break; 3483 } 3484 } 3485 // This seems to be a complicated expression, just assume it's a clause. 3486 FormatTok = Tokens->setPosition(StoredPosition); 3487 parseRequiresClause(RequiresToken); 3488 return true; 3489 } 3490 3491 /// \brief Parses a requires clause. 3492 /// \param RequiresToken The requires keyword token, which starts this clause. 3493 /// \pre We need to be on the next token after the requires keyword. 3494 /// \sa parseRequiresExpression 3495 /// 3496 /// Returns if it either has finished parsing the clause, or it detects, that 3497 /// the clause is incorrect. 3498 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3499 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3500 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3501 3502 // If there is no previous token, we are within a requires expression, 3503 // otherwise we will always have the template or function declaration in front 3504 // of it. 3505 bool InRequiresExpression = 3506 !RequiresToken->Previous || 3507 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3508 3509 RequiresToken->setFinalizedType(InRequiresExpression 3510 ? TT_RequiresClauseInARequiresExpression 3511 : TT_RequiresClause); 3512 3513 // NOTE: parseConstraintExpression is only ever called from this function. 3514 // It could be inlined into here. 3515 parseConstraintExpression(); 3516 3517 if (!InRequiresExpression) 3518 FormatTok->Previous->ClosesRequiresClause = true; 3519 } 3520 3521 /// \brief Parses a requires expression. 3522 /// \param RequiresToken The requires keyword token, which starts this clause. 3523 /// \pre We need to be on the next token after the requires keyword. 3524 /// \sa parseRequiresClause 3525 /// 3526 /// Returns if it either has finished parsing the expression, or it detects, 3527 /// that the expression is incorrect. 3528 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3529 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3530 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3531 3532 RequiresToken->setFinalizedType(TT_RequiresExpression); 3533 3534 if (FormatTok->is(tok::l_paren)) { 3535 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3536 parseParens(); 3537 } 3538 3539 if (FormatTok->is(tok::l_brace)) { 3540 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3541 parseChildBlock(); 3542 } 3543 } 3544 3545 /// \brief Parses a constraint expression. 3546 /// 3547 /// This is the body of a requires clause. It returns, when the parsing is 3548 /// complete, or the expression is incorrect. 3549 void UnwrappedLineParser::parseConstraintExpression() { 3550 // The special handling for lambdas is needed since tryToParseLambda() eats a 3551 // token and if a requires expression is the last part of a requires clause 3552 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3553 // not set on the correct token. Thus we need to be aware if we even expect a 3554 // lambda to be possible. 3555 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3556 bool LambdaNextTimeAllowed = true; 3557 3558 // Within lambda declarations, it is permitted to put a requires clause after 3559 // its template parameter list, which would place the requires clause right 3560 // before the parentheses of the parameters of the lambda declaration. Thus, 3561 // we track if we expect to see grouping parentheses at all. 3562 // Without this check, `requires foo<T> (T t)` in the below example would be 3563 // seen as the whole requires clause, accidentally eating the parameters of 3564 // the lambda. 3565 // [&]<typename T> requires foo<T> (T t) { ... }; 3566 bool TopLevelParensAllowed = true; 3567 3568 do { 3569 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3570 3571 switch (FormatTok->Tok.getKind()) { 3572 case tok::kw_requires: { 3573 auto RequiresToken = FormatTok; 3574 nextToken(); 3575 parseRequiresExpression(RequiresToken); 3576 break; 3577 } 3578 3579 case tok::l_paren: 3580 if (!TopLevelParensAllowed) 3581 return; 3582 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3583 TopLevelParensAllowed = false; 3584 break; 3585 3586 case tok::l_square: 3587 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3588 return; 3589 break; 3590 3591 case tok::kw_const: 3592 case tok::semi: 3593 case tok::kw_class: 3594 case tok::kw_struct: 3595 case tok::kw_union: 3596 return; 3597 3598 case tok::l_brace: 3599 // Potential function body. 3600 return; 3601 3602 case tok::ampamp: 3603 case tok::pipepipe: 3604 FormatTok->setFinalizedType(TT_BinaryOperator); 3605 nextToken(); 3606 LambdaNextTimeAllowed = true; 3607 TopLevelParensAllowed = true; 3608 break; 3609 3610 case tok::comma: 3611 case tok::comment: 3612 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3613 nextToken(); 3614 break; 3615 3616 case tok::kw_sizeof: 3617 case tok::greater: 3618 case tok::greaterequal: 3619 case tok::greatergreater: 3620 case tok::less: 3621 case tok::lessequal: 3622 case tok::lessless: 3623 case tok::equalequal: 3624 case tok::exclaim: 3625 case tok::exclaimequal: 3626 case tok::plus: 3627 case tok::minus: 3628 case tok::star: 3629 case tok::slash: 3630 LambdaNextTimeAllowed = true; 3631 TopLevelParensAllowed = true; 3632 // Just eat them. 3633 nextToken(); 3634 break; 3635 3636 case tok::numeric_constant: 3637 case tok::coloncolon: 3638 case tok::kw_true: 3639 case tok::kw_false: 3640 TopLevelParensAllowed = false; 3641 // Just eat them. 3642 nextToken(); 3643 break; 3644 3645 case tok::kw_static_cast: 3646 case tok::kw_const_cast: 3647 case tok::kw_reinterpret_cast: 3648 case tok::kw_dynamic_cast: 3649 nextToken(); 3650 if (FormatTok->isNot(tok::less)) 3651 return; 3652 3653 nextToken(); 3654 parseBracedList(/*IsAngleBracket=*/true); 3655 break; 3656 3657 default: 3658 if (!FormatTok->Tok.getIdentifierInfo()) { 3659 // Identifiers are part of the default case, we check for more then 3660 // tok::identifier to handle builtin type traits. 3661 return; 3662 } 3663 3664 // We need to differentiate identifiers for a template deduction guide, 3665 // variables, or function return types (the constraint expression has 3666 // ended before that), and basically all other cases. But it's easier to 3667 // check the other way around. 3668 assert(FormatTok->Previous); 3669 switch (FormatTok->Previous->Tok.getKind()) { 3670 case tok::coloncolon: // Nested identifier. 3671 case tok::ampamp: // Start of a function or variable for the 3672 case tok::pipepipe: // constraint expression. (binary) 3673 case tok::exclaim: // The same as above, but unary. 3674 case tok::kw_requires: // Initial identifier of a requires clause. 3675 case tok::equal: // Initial identifier of a concept declaration. 3676 break; 3677 default: 3678 return; 3679 } 3680 3681 // Read identifier with optional template declaration. 3682 nextToken(); 3683 if (FormatTok->is(tok::less)) { 3684 nextToken(); 3685 parseBracedList(/*IsAngleBracket=*/true); 3686 } 3687 TopLevelParensAllowed = false; 3688 break; 3689 } 3690 } while (!eof()); 3691 } 3692 3693 bool UnwrappedLineParser::parseEnum() { 3694 const FormatToken &InitialToken = *FormatTok; 3695 3696 // Won't be 'enum' for NS_ENUMs. 3697 if (FormatTok->is(tok::kw_enum)) 3698 nextToken(); 3699 3700 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3701 // declarations. An "enum" keyword followed by a colon would be a syntax 3702 // error and thus assume it is just an identifier. 3703 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3704 return false; 3705 3706 // In protobuf, "enum" can be used as a field name. 3707 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3708 return false; 3709 3710 // Eat up enum class ... 3711 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3712 nextToken(); 3713 3714 while (FormatTok->Tok.getIdentifierInfo() || 3715 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3716 tok::greater, tok::comma, tok::question, 3717 tok::l_square, tok::r_square)) { 3718 if (Style.isVerilog()) { 3719 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3720 nextToken(); 3721 // In Verilog the base type can have dimensions. 3722 while (FormatTok->is(tok::l_square)) 3723 parseSquare(); 3724 } else { 3725 nextToken(); 3726 } 3727 // We can have macros or attributes in between 'enum' and the enum name. 3728 if (FormatTok->is(tok::l_paren)) 3729 parseParens(); 3730 assert(FormatTok->isNot(TT_AttributeSquare)); 3731 if (FormatTok->is(tok::identifier)) { 3732 nextToken(); 3733 // If there are two identifiers in a row, this is likely an elaborate 3734 // return type. In Java, this can be "implements", etc. 3735 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3736 return false; 3737 } 3738 } 3739 3740 // Just a declaration or something is wrong. 3741 if (FormatTok->isNot(tok::l_brace)) 3742 return true; 3743 FormatTok->setFinalizedType(TT_EnumLBrace); 3744 FormatTok->setBlockKind(BK_Block); 3745 3746 if (Style.Language == FormatStyle::LK_Java) { 3747 // Java enums are different. 3748 parseJavaEnumBody(); 3749 return true; 3750 } 3751 if (Style.Language == FormatStyle::LK_Proto) { 3752 parseBlock(/*MustBeDeclaration=*/true); 3753 return true; 3754 } 3755 3756 if (!Style.AllowShortEnumsOnASingleLine && 3757 ShouldBreakBeforeBrace(Style, InitialToken)) { 3758 addUnwrappedLine(); 3759 } 3760 // Parse enum body. 3761 nextToken(); 3762 if (!Style.AllowShortEnumsOnASingleLine) { 3763 addUnwrappedLine(); 3764 Line->Level += 1; 3765 } 3766 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3767 if (!Style.AllowShortEnumsOnASingleLine) 3768 Line->Level -= 1; 3769 if (HasError) { 3770 if (FormatTok->is(tok::semi)) 3771 nextToken(); 3772 addUnwrappedLine(); 3773 } 3774 setPreviousRBraceType(TT_EnumRBrace); 3775 return true; 3776 3777 // There is no addUnwrappedLine() here so that we fall through to parsing a 3778 // structural element afterwards. Thus, in "enum A {} n, m;", 3779 // "} n, m;" will end up in one unwrapped line. 3780 } 3781 3782 bool UnwrappedLineParser::parseStructLike() { 3783 // parseRecord falls through and does not yet add an unwrapped line as a 3784 // record declaration or definition can start a structural element. 3785 parseRecord(); 3786 // This does not apply to Java, JavaScript and C#. 3787 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3788 Style.isCSharp()) { 3789 if (FormatTok->is(tok::semi)) 3790 nextToken(); 3791 addUnwrappedLine(); 3792 return true; 3793 } 3794 return false; 3795 } 3796 3797 namespace { 3798 // A class used to set and restore the Token position when peeking 3799 // ahead in the token source. 3800 class ScopedTokenPosition { 3801 unsigned StoredPosition; 3802 FormatTokenSource *Tokens; 3803 3804 public: 3805 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3806 assert(Tokens && "Tokens expected to not be null"); 3807 StoredPosition = Tokens->getPosition(); 3808 } 3809 3810 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3811 }; 3812 } // namespace 3813 3814 // Look to see if we have [[ by looking ahead, if 3815 // its not then rewind to the original position. 3816 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3817 ScopedTokenPosition AutoPosition(Tokens); 3818 FormatToken *Tok = Tokens->getNextToken(); 3819 // We already read the first [ check for the second. 3820 if (Tok->isNot(tok::l_square)) 3821 return false; 3822 // Double check that the attribute is just something 3823 // fairly simple. 3824 while (Tok->isNot(tok::eof)) { 3825 if (Tok->is(tok::r_square)) 3826 break; 3827 Tok = Tokens->getNextToken(); 3828 } 3829 if (Tok->is(tok::eof)) 3830 return false; 3831 Tok = Tokens->getNextToken(); 3832 if (Tok->isNot(tok::r_square)) 3833 return false; 3834 Tok = Tokens->getNextToken(); 3835 if (Tok->is(tok::semi)) 3836 return false; 3837 return true; 3838 } 3839 3840 void UnwrappedLineParser::parseJavaEnumBody() { 3841 assert(FormatTok->is(tok::l_brace)); 3842 const FormatToken *OpeningBrace = FormatTok; 3843 3844 // Determine whether the enum is simple, i.e. does not have a semicolon or 3845 // constants with class bodies. Simple enums can be formatted like braced 3846 // lists, contracted to a single line, etc. 3847 unsigned StoredPosition = Tokens->getPosition(); 3848 bool IsSimple = true; 3849 FormatToken *Tok = Tokens->getNextToken(); 3850 while (Tok->isNot(tok::eof)) { 3851 if (Tok->is(tok::r_brace)) 3852 break; 3853 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3854 IsSimple = false; 3855 break; 3856 } 3857 // FIXME: This will also mark enums with braces in the arguments to enum 3858 // constants as "not simple". This is probably fine in practice, though. 3859 Tok = Tokens->getNextToken(); 3860 } 3861 FormatTok = Tokens->setPosition(StoredPosition); 3862 3863 if (IsSimple) { 3864 nextToken(); 3865 parseBracedList(); 3866 addUnwrappedLine(); 3867 return; 3868 } 3869 3870 // Parse the body of a more complex enum. 3871 // First add a line for everything up to the "{". 3872 nextToken(); 3873 addUnwrappedLine(); 3874 ++Line->Level; 3875 3876 // Parse the enum constants. 3877 while (!eof()) { 3878 if (FormatTok->is(tok::l_brace)) { 3879 // Parse the constant's class body. 3880 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3881 /*MunchSemi=*/false); 3882 } else if (FormatTok->is(tok::l_paren)) { 3883 parseParens(); 3884 } else if (FormatTok->is(tok::comma)) { 3885 nextToken(); 3886 addUnwrappedLine(); 3887 } else if (FormatTok->is(tok::semi)) { 3888 nextToken(); 3889 addUnwrappedLine(); 3890 break; 3891 } else if (FormatTok->is(tok::r_brace)) { 3892 addUnwrappedLine(); 3893 break; 3894 } else { 3895 nextToken(); 3896 } 3897 } 3898 3899 // Parse the class body after the enum's ";" if any. 3900 parseLevel(OpeningBrace); 3901 nextToken(); 3902 --Line->Level; 3903 addUnwrappedLine(); 3904 } 3905 3906 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3907 const FormatToken &InitialToken = *FormatTok; 3908 nextToken(); 3909 3910 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 3911 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 3912 }; 3913 // The actual identifier can be a nested name specifier, and in macros 3914 // it is often token-pasted. 3915 // An [[attribute]] can be before the identifier. 3916 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3917 tok::kw_alignas, tok::l_square) || 3918 FormatTok->isAttribute() || 3919 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3920 FormatTok->isOneOf(tok::period, tok::comma))) { 3921 if (Style.isJavaScript() && 3922 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3923 // JavaScript/TypeScript supports inline object types in 3924 // extends/implements positions: 3925 // class Foo implements {bar: number} { } 3926 nextToken(); 3927 if (FormatTok->is(tok::l_brace)) { 3928 tryToParseBracedList(); 3929 continue; 3930 } 3931 } 3932 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 3933 continue; 3934 nextToken(); 3935 // We can have macros in between 'class' and the class name. 3936 if (!IsNonMacroIdentifier(FormatTok->Previous) && 3937 FormatTok->is(tok::l_paren)) { 3938 parseParens(); 3939 } 3940 } 3941 3942 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3943 int AngleNestingLevel = 0; 3944 do { 3945 if (FormatTok->is(tok::less)) 3946 ++AngleNestingLevel; 3947 else if (FormatTok->is(tok::greater)) 3948 --AngleNestingLevel; 3949 3950 if (AngleNestingLevel == 0 && FormatTok->is(tok::l_paren) && 3951 IsNonMacroIdentifier(FormatTok->Previous)) { 3952 break; 3953 } 3954 if (FormatTok->is(tok::l_brace)) { 3955 calculateBraceTypes(/*ExpectClassBody=*/true); 3956 if (!tryToParseBracedList()) 3957 break; 3958 } 3959 if (FormatTok->is(tok::l_square)) { 3960 FormatToken *Previous = FormatTok->Previous; 3961 if (!Previous || 3962 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3963 // Don't try parsing a lambda if we had a closing parenthesis before, 3964 // it was probably a pointer to an array: int (*)[]. 3965 if (!tryToParseLambda()) 3966 continue; 3967 } else { 3968 parseSquare(); 3969 continue; 3970 } 3971 } 3972 if (FormatTok->is(tok::semi)) 3973 return; 3974 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3975 addUnwrappedLine(); 3976 nextToken(); 3977 parseCSharpGenericTypeConstraint(); 3978 break; 3979 } 3980 nextToken(); 3981 } while (!eof()); 3982 } 3983 3984 auto GetBraceTypes = 3985 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 3986 switch (RecordTok.Tok.getKind()) { 3987 case tok::kw_class: 3988 return {TT_ClassLBrace, TT_ClassRBrace}; 3989 case tok::kw_struct: 3990 return {TT_StructLBrace, TT_StructRBrace}; 3991 case tok::kw_union: 3992 return {TT_UnionLBrace, TT_UnionRBrace}; 3993 default: 3994 // Useful for e.g. interface. 3995 return {TT_RecordLBrace, TT_RecordRBrace}; 3996 } 3997 }; 3998 if (FormatTok->is(tok::l_brace)) { 3999 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4000 FormatTok->setFinalizedType(OpenBraceType); 4001 if (ParseAsExpr) { 4002 parseChildBlock(); 4003 } else { 4004 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4005 addUnwrappedLine(); 4006 4007 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4008 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4009 } 4010 setPreviousRBraceType(ClosingBraceType); 4011 } 4012 // There is no addUnwrappedLine() here so that we fall through to parsing a 4013 // structural element afterwards. Thus, in "class A {} n, m;", 4014 // "} n, m;" will end up in one unwrapped line. 4015 } 4016 4017 void UnwrappedLineParser::parseObjCMethod() { 4018 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4019 "'(' or identifier expected."); 4020 do { 4021 if (FormatTok->is(tok::semi)) { 4022 nextToken(); 4023 addUnwrappedLine(); 4024 return; 4025 } else if (FormatTok->is(tok::l_brace)) { 4026 if (Style.BraceWrapping.AfterFunction) 4027 addUnwrappedLine(); 4028 parseBlock(); 4029 addUnwrappedLine(); 4030 return; 4031 } else { 4032 nextToken(); 4033 } 4034 } while (!eof()); 4035 } 4036 4037 void UnwrappedLineParser::parseObjCProtocolList() { 4038 assert(FormatTok->is(tok::less) && "'<' expected."); 4039 do { 4040 nextToken(); 4041 // Early exit in case someone forgot a close angle. 4042 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4043 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4044 return; 4045 } 4046 } while (!eof() && FormatTok->isNot(tok::greater)); 4047 nextToken(); // Skip '>'. 4048 } 4049 4050 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4051 do { 4052 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4053 nextToken(); 4054 addUnwrappedLine(); 4055 break; 4056 } 4057 if (FormatTok->is(tok::l_brace)) { 4058 parseBlock(); 4059 // In ObjC interfaces, nothing should be following the "}". 4060 addUnwrappedLine(); 4061 } else if (FormatTok->is(tok::r_brace)) { 4062 // Ignore stray "}". parseStructuralElement doesn't consume them. 4063 nextToken(); 4064 addUnwrappedLine(); 4065 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4066 nextToken(); 4067 parseObjCMethod(); 4068 } else { 4069 parseStructuralElement(); 4070 } 4071 } while (!eof()); 4072 } 4073 4074 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4075 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4076 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4077 nextToken(); 4078 nextToken(); // interface name 4079 4080 // @interface can be followed by a lightweight generic 4081 // specialization list, then either a base class or a category. 4082 if (FormatTok->is(tok::less)) 4083 parseObjCLightweightGenerics(); 4084 if (FormatTok->is(tok::colon)) { 4085 nextToken(); 4086 nextToken(); // base class name 4087 // The base class can also have lightweight generics applied to it. 4088 if (FormatTok->is(tok::less)) 4089 parseObjCLightweightGenerics(); 4090 } else if (FormatTok->is(tok::l_paren)) { 4091 // Skip category, if present. 4092 parseParens(); 4093 } 4094 4095 if (FormatTok->is(tok::less)) 4096 parseObjCProtocolList(); 4097 4098 if (FormatTok->is(tok::l_brace)) { 4099 if (Style.BraceWrapping.AfterObjCDeclaration) 4100 addUnwrappedLine(); 4101 parseBlock(/*MustBeDeclaration=*/true); 4102 } 4103 4104 // With instance variables, this puts '}' on its own line. Without instance 4105 // variables, this ends the @interface line. 4106 addUnwrappedLine(); 4107 4108 parseObjCUntilAtEnd(); 4109 } 4110 4111 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4112 assert(FormatTok->is(tok::less)); 4113 // Unlike protocol lists, generic parameterizations support 4114 // nested angles: 4115 // 4116 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4117 // NSObject <NSCopying, NSSecureCoding> 4118 // 4119 // so we need to count how many open angles we have left. 4120 unsigned NumOpenAngles = 1; 4121 do { 4122 nextToken(); 4123 // Early exit in case someone forgot a close angle. 4124 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4125 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4126 break; 4127 } 4128 if (FormatTok->is(tok::less)) { 4129 ++NumOpenAngles; 4130 } else if (FormatTok->is(tok::greater)) { 4131 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4132 --NumOpenAngles; 4133 } 4134 } while (!eof() && NumOpenAngles != 0); 4135 nextToken(); // Skip '>'. 4136 } 4137 4138 // Returns true for the declaration/definition form of @protocol, 4139 // false for the expression form. 4140 bool UnwrappedLineParser::parseObjCProtocol() { 4141 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4142 nextToken(); 4143 4144 if (FormatTok->is(tok::l_paren)) { 4145 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4146 return false; 4147 } 4148 4149 // The definition/declaration form, 4150 // @protocol Foo 4151 // - (int)someMethod; 4152 // @end 4153 4154 nextToken(); // protocol name 4155 4156 if (FormatTok->is(tok::less)) 4157 parseObjCProtocolList(); 4158 4159 // Check for protocol declaration. 4160 if (FormatTok->is(tok::semi)) { 4161 nextToken(); 4162 addUnwrappedLine(); 4163 return true; 4164 } 4165 4166 addUnwrappedLine(); 4167 parseObjCUntilAtEnd(); 4168 return true; 4169 } 4170 4171 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4172 bool IsImport = FormatTok->is(Keywords.kw_import); 4173 assert(IsImport || FormatTok->is(tok::kw_export)); 4174 nextToken(); 4175 4176 // Consume the "default" in "export default class/function". 4177 if (FormatTok->is(tok::kw_default)) 4178 nextToken(); 4179 4180 // Consume "async function", "function" and "default function", so that these 4181 // get parsed as free-standing JS functions, i.e. do not require a trailing 4182 // semicolon. 4183 if (FormatTok->is(Keywords.kw_async)) 4184 nextToken(); 4185 if (FormatTok->is(Keywords.kw_function)) { 4186 nextToken(); 4187 return; 4188 } 4189 4190 // For imports, `export *`, `export {...}`, consume the rest of the line up 4191 // to the terminating `;`. For everything else, just return and continue 4192 // parsing the structural element, i.e. the declaration or expression for 4193 // `export default`. 4194 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4195 !FormatTok->isStringLiteral() && 4196 !(FormatTok->is(Keywords.kw_type) && 4197 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4198 return; 4199 } 4200 4201 while (!eof()) { 4202 if (FormatTok->is(tok::semi)) 4203 return; 4204 if (Line->Tokens.empty()) { 4205 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4206 // import statement should terminate. 4207 return; 4208 } 4209 if (FormatTok->is(tok::l_brace)) { 4210 FormatTok->setBlockKind(BK_Block); 4211 nextToken(); 4212 parseBracedList(); 4213 } else { 4214 nextToken(); 4215 } 4216 } 4217 } 4218 4219 void UnwrappedLineParser::parseStatementMacro() { 4220 nextToken(); 4221 if (FormatTok->is(tok::l_paren)) 4222 parseParens(); 4223 if (FormatTok->is(tok::semi)) 4224 nextToken(); 4225 addUnwrappedLine(); 4226 } 4227 4228 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4229 // consume things like a::`b.c[d:e] or a::* 4230 while (true) { 4231 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4232 tok::coloncolon, tok::hash) || 4233 Keywords.isVerilogIdentifier(*FormatTok)) { 4234 nextToken(); 4235 } else if (FormatTok->is(tok::l_square)) { 4236 parseSquare(); 4237 } else { 4238 break; 4239 } 4240 } 4241 } 4242 4243 void UnwrappedLineParser::parseVerilogSensitivityList() { 4244 if (FormatTok->isNot(tok::at)) 4245 return; 4246 nextToken(); 4247 // A block event expression has 2 at signs. 4248 if (FormatTok->is(tok::at)) 4249 nextToken(); 4250 switch (FormatTok->Tok.getKind()) { 4251 case tok::star: 4252 nextToken(); 4253 break; 4254 case tok::l_paren: 4255 parseParens(); 4256 break; 4257 default: 4258 parseVerilogHierarchyIdentifier(); 4259 break; 4260 } 4261 } 4262 4263 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4264 unsigned AddLevels = 0; 4265 4266 if (FormatTok->is(Keywords.kw_clocking)) { 4267 nextToken(); 4268 if (Keywords.isVerilogIdentifier(*FormatTok)) 4269 nextToken(); 4270 parseVerilogSensitivityList(); 4271 if (FormatTok->is(tok::semi)) 4272 nextToken(); 4273 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4274 Keywords.kw_casez, Keywords.kw_randcase, 4275 Keywords.kw_randsequence)) { 4276 if (Style.IndentCaseLabels) 4277 AddLevels++; 4278 nextToken(); 4279 if (FormatTok->is(tok::l_paren)) { 4280 FormatTok->setFinalizedType(TT_ConditionLParen); 4281 parseParens(); 4282 } 4283 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4284 nextToken(); 4285 // The case header has no semicolon. 4286 } else { 4287 // "module" etc. 4288 nextToken(); 4289 // all the words like the name of the module and specifiers like 4290 // "automatic" and the width of function return type 4291 while (true) { 4292 if (FormatTok->is(tok::l_square)) { 4293 auto Prev = FormatTok->getPreviousNonComment(); 4294 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4295 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4296 parseSquare(); 4297 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4298 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4299 nextToken(); 4300 } else { 4301 break; 4302 } 4303 } 4304 4305 auto NewLine = [this]() { 4306 addUnwrappedLine(); 4307 Line->IsContinuation = true; 4308 }; 4309 4310 // package imports 4311 while (FormatTok->is(Keywords.kw_import)) { 4312 NewLine(); 4313 nextToken(); 4314 parseVerilogHierarchyIdentifier(); 4315 if (FormatTok->is(tok::semi)) 4316 nextToken(); 4317 } 4318 4319 // parameters and ports 4320 if (FormatTok->is(Keywords.kw_verilogHash)) { 4321 NewLine(); 4322 nextToken(); 4323 if (FormatTok->is(tok::l_paren)) { 4324 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4325 parseParens(); 4326 } 4327 } 4328 if (FormatTok->is(tok::l_paren)) { 4329 NewLine(); 4330 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4331 parseParens(); 4332 } 4333 4334 // extends and implements 4335 if (FormatTok->is(Keywords.kw_extends)) { 4336 NewLine(); 4337 nextToken(); 4338 parseVerilogHierarchyIdentifier(); 4339 if (FormatTok->is(tok::l_paren)) 4340 parseParens(); 4341 } 4342 if (FormatTok->is(Keywords.kw_implements)) { 4343 NewLine(); 4344 do { 4345 nextToken(); 4346 parseVerilogHierarchyIdentifier(); 4347 } while (FormatTok->is(tok::comma)); 4348 } 4349 4350 // Coverage event for cover groups. 4351 if (FormatTok->is(tok::at)) { 4352 NewLine(); 4353 parseVerilogSensitivityList(); 4354 } 4355 4356 if (FormatTok->is(tok::semi)) 4357 nextToken(/*LevelDifference=*/1); 4358 addUnwrappedLine(); 4359 } 4360 4361 return AddLevels; 4362 } 4363 4364 void UnwrappedLineParser::parseVerilogTable() { 4365 assert(FormatTok->is(Keywords.kw_table)); 4366 nextToken(/*LevelDifference=*/1); 4367 addUnwrappedLine(); 4368 4369 auto InitialLevel = Line->Level++; 4370 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4371 FormatToken *Tok = FormatTok; 4372 nextToken(); 4373 if (Tok->is(tok::semi)) 4374 addUnwrappedLine(); 4375 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4376 Tok->setFinalizedType(TT_VerilogTableItem); 4377 } 4378 Line->Level = InitialLevel; 4379 nextToken(/*LevelDifference=*/-1); 4380 addUnwrappedLine(); 4381 } 4382 4383 void UnwrappedLineParser::parseVerilogCaseLabel() { 4384 // The label will get unindented in AnnotatingParser. If there are no leading 4385 // spaces, indent the rest here so that things inside the block will be 4386 // indented relative to things outside. We don't use parseLabel because we 4387 // don't know whether this colon is a label or a ternary expression at this 4388 // point. 4389 auto OrigLevel = Line->Level; 4390 auto FirstLine = CurrentLines->size(); 4391 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4392 ++Line->Level; 4393 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4394 --Line->Level; 4395 parseStructuralElement(); 4396 // Restore the indentation in both the new line and the line that has the 4397 // label. 4398 if (CurrentLines->size() > FirstLine) 4399 (*CurrentLines)[FirstLine].Level = OrigLevel; 4400 Line->Level = OrigLevel; 4401 } 4402 4403 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4404 for (const auto &N : Line.Tokens) { 4405 if (N.Tok->MacroCtx) 4406 return true; 4407 for (const UnwrappedLine &Child : N.Children) 4408 if (containsExpansion(Child)) 4409 return true; 4410 } 4411 return false; 4412 } 4413 4414 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4415 if (Line->Tokens.empty()) 4416 return; 4417 LLVM_DEBUG({ 4418 if (!parsingPPDirective()) { 4419 llvm::dbgs() << "Adding unwrapped line:\n"; 4420 printDebugInfo(*Line); 4421 } 4422 }); 4423 4424 // If this line closes a block when in Whitesmiths mode, remember that 4425 // information so that the level can be decreased after the line is added. 4426 // This has to happen after the addition of the line since the line itself 4427 // needs to be indented. 4428 bool ClosesWhitesmithsBlock = 4429 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4430 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4431 4432 // If the current line was expanded from a macro call, we use it to 4433 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4434 // line and the unexpanded token stream. 4435 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4436 if (!Reconstruct) 4437 Reconstruct.emplace(Line->Level, Unexpanded); 4438 Reconstruct->addLine(*Line); 4439 4440 // While the reconstructed unexpanded lines are stored in the normal 4441 // flow of lines, the expanded lines are stored on the side to be analyzed 4442 // in an extra step. 4443 CurrentExpandedLines.push_back(std::move(*Line)); 4444 4445 if (Reconstruct->finished()) { 4446 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4447 assert(!Reconstructed.Tokens.empty() && 4448 "Reconstructed must at least contain the macro identifier."); 4449 assert(!parsingPPDirective()); 4450 LLVM_DEBUG({ 4451 llvm::dbgs() << "Adding unexpanded line:\n"; 4452 printDebugInfo(Reconstructed); 4453 }); 4454 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4455 Lines.push_back(std::move(Reconstructed)); 4456 CurrentExpandedLines.clear(); 4457 Reconstruct.reset(); 4458 } 4459 } else { 4460 // At the top level we only get here when no unexpansion is going on, or 4461 // when conditional formatting led to unfinished macro reconstructions. 4462 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4463 CurrentLines->push_back(std::move(*Line)); 4464 } 4465 Line->Tokens.clear(); 4466 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4467 Line->FirstStartColumn = 0; 4468 Line->IsContinuation = false; 4469 Line->SeenDecltypeAuto = false; 4470 4471 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4472 --Line->Level; 4473 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4474 CurrentLines->append( 4475 std::make_move_iterator(PreprocessorDirectives.begin()), 4476 std::make_move_iterator(PreprocessorDirectives.end())); 4477 PreprocessorDirectives.clear(); 4478 } 4479 // Disconnect the current token from the last token on the previous line. 4480 FormatTok->Previous = nullptr; 4481 } 4482 4483 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4484 4485 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4486 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4487 FormatTok.NewlinesBefore > 0; 4488 } 4489 4490 // Checks if \p FormatTok is a line comment that continues the line comment 4491 // section on \p Line. 4492 static bool 4493 continuesLineCommentSection(const FormatToken &FormatTok, 4494 const UnwrappedLine &Line, 4495 const llvm::Regex &CommentPragmasRegex) { 4496 if (Line.Tokens.empty()) 4497 return false; 4498 4499 StringRef IndentContent = FormatTok.TokenText; 4500 if (FormatTok.TokenText.starts_with("//") || 4501 FormatTok.TokenText.starts_with("/*")) { 4502 IndentContent = FormatTok.TokenText.substr(2); 4503 } 4504 if (CommentPragmasRegex.match(IndentContent)) 4505 return false; 4506 4507 // If Line starts with a line comment, then FormatTok continues the comment 4508 // section if its original column is greater or equal to the original start 4509 // column of the line. 4510 // 4511 // Define the min column token of a line as follows: if a line ends in '{' or 4512 // contains a '{' followed by a line comment, then the min column token is 4513 // that '{'. Otherwise, the min column token of the line is the first token of 4514 // the line. 4515 // 4516 // If Line starts with a token other than a line comment, then FormatTok 4517 // continues the comment section if its original column is greater than the 4518 // original start column of the min column token of the line. 4519 // 4520 // For example, the second line comment continues the first in these cases: 4521 // 4522 // // first line 4523 // // second line 4524 // 4525 // and: 4526 // 4527 // // first line 4528 // // second line 4529 // 4530 // and: 4531 // 4532 // int i; // first line 4533 // // second line 4534 // 4535 // and: 4536 // 4537 // do { // first line 4538 // // second line 4539 // int i; 4540 // } while (true); 4541 // 4542 // and: 4543 // 4544 // enum { 4545 // a, // first line 4546 // // second line 4547 // b 4548 // }; 4549 // 4550 // The second line comment doesn't continue the first in these cases: 4551 // 4552 // // first line 4553 // // second line 4554 // 4555 // and: 4556 // 4557 // int i; // first line 4558 // // second line 4559 // 4560 // and: 4561 // 4562 // do { // first line 4563 // // second line 4564 // int i; 4565 // } while (true); 4566 // 4567 // and: 4568 // 4569 // enum { 4570 // a, // first line 4571 // // second line 4572 // }; 4573 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4574 4575 // Scan for '{//'. If found, use the column of '{' as a min column for line 4576 // comment section continuation. 4577 const FormatToken *PreviousToken = nullptr; 4578 for (const UnwrappedLineNode &Node : Line.Tokens) { 4579 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4580 isLineComment(*Node.Tok)) { 4581 MinColumnToken = PreviousToken; 4582 break; 4583 } 4584 PreviousToken = Node.Tok; 4585 4586 // Grab the last newline preceding a token in this unwrapped line. 4587 if (Node.Tok->NewlinesBefore > 0) 4588 MinColumnToken = Node.Tok; 4589 } 4590 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4591 MinColumnToken = PreviousToken; 4592 4593 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4594 MinColumnToken); 4595 } 4596 4597 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4598 bool JustComments = Line->Tokens.empty(); 4599 for (FormatToken *Tok : CommentsBeforeNextToken) { 4600 // Line comments that belong to the same line comment section are put on the 4601 // same line since later we might want to reflow content between them. 4602 // Additional fine-grained breaking of line comment sections is controlled 4603 // by the class BreakableLineCommentSection in case it is desirable to keep 4604 // several line comment sections in the same unwrapped line. 4605 // 4606 // FIXME: Consider putting separate line comment sections as children to the 4607 // unwrapped line instead. 4608 Tok->ContinuesLineCommentSection = 4609 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4610 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4611 addUnwrappedLine(); 4612 pushToken(Tok); 4613 } 4614 if (NewlineBeforeNext && JustComments) 4615 addUnwrappedLine(); 4616 CommentsBeforeNextToken.clear(); 4617 } 4618 4619 void UnwrappedLineParser::nextToken(int LevelDifference) { 4620 if (eof()) 4621 return; 4622 flushComments(isOnNewLine(*FormatTok)); 4623 pushToken(FormatTok); 4624 FormatToken *Previous = FormatTok; 4625 if (!Style.isJavaScript()) 4626 readToken(LevelDifference); 4627 else 4628 readTokenWithJavaScriptASI(); 4629 FormatTok->Previous = Previous; 4630 if (Style.isVerilog()) { 4631 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4632 // keywords like `begin`, we can't treat them the same as left braces 4633 // because some contexts require one of them. For example structs use 4634 // braces and if blocks use keywords, and a left brace can occur in an if 4635 // statement, but it is not a block. For keywords like `end`, we simply 4636 // treat them the same as right braces. 4637 if (Keywords.isVerilogEnd(*FormatTok)) 4638 FormatTok->Tok.setKind(tok::r_brace); 4639 } 4640 } 4641 4642 void UnwrappedLineParser::distributeComments( 4643 const SmallVectorImpl<FormatToken *> &Comments, 4644 const FormatToken *NextTok) { 4645 // Whether or not a line comment token continues a line is controlled by 4646 // the method continuesLineCommentSection, with the following caveat: 4647 // 4648 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4649 // that each comment line from the trail is aligned with the next token, if 4650 // the next token exists. If a trail exists, the beginning of the maximal 4651 // trail is marked as a start of a new comment section. 4652 // 4653 // For example in this code: 4654 // 4655 // int a; // line about a 4656 // // line 1 about b 4657 // // line 2 about b 4658 // int b; 4659 // 4660 // the two lines about b form a maximal trail, so there are two sections, the 4661 // first one consisting of the single comment "// line about a" and the 4662 // second one consisting of the next two comments. 4663 if (Comments.empty()) 4664 return; 4665 bool ShouldPushCommentsInCurrentLine = true; 4666 bool HasTrailAlignedWithNextToken = false; 4667 unsigned StartOfTrailAlignedWithNextToken = 0; 4668 if (NextTok) { 4669 // We are skipping the first element intentionally. 4670 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4671 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4672 HasTrailAlignedWithNextToken = true; 4673 StartOfTrailAlignedWithNextToken = i; 4674 } 4675 } 4676 } 4677 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4678 FormatToken *FormatTok = Comments[i]; 4679 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4680 FormatTok->ContinuesLineCommentSection = false; 4681 } else { 4682 FormatTok->ContinuesLineCommentSection = 4683 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4684 } 4685 if (!FormatTok->ContinuesLineCommentSection && 4686 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4687 ShouldPushCommentsInCurrentLine = false; 4688 } 4689 if (ShouldPushCommentsInCurrentLine) 4690 pushToken(FormatTok); 4691 else 4692 CommentsBeforeNextToken.push_back(FormatTok); 4693 } 4694 } 4695 4696 void UnwrappedLineParser::readToken(int LevelDifference) { 4697 SmallVector<FormatToken *, 1> Comments; 4698 bool PreviousWasComment = false; 4699 bool FirstNonCommentOnLine = false; 4700 do { 4701 FormatTok = Tokens->getNextToken(); 4702 assert(FormatTok); 4703 while (FormatTok->getType() == TT_ConflictStart || 4704 FormatTok->getType() == TT_ConflictEnd || 4705 FormatTok->getType() == TT_ConflictAlternative) { 4706 if (FormatTok->getType() == TT_ConflictStart) 4707 conditionalCompilationStart(/*Unreachable=*/false); 4708 else if (FormatTok->getType() == TT_ConflictAlternative) 4709 conditionalCompilationAlternative(); 4710 else if (FormatTok->getType() == TT_ConflictEnd) 4711 conditionalCompilationEnd(); 4712 FormatTok = Tokens->getNextToken(); 4713 FormatTok->MustBreakBefore = true; 4714 FormatTok->MustBreakBeforeFinalized = true; 4715 } 4716 4717 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4718 const FormatToken &Tok, 4719 bool PreviousWasComment) { 4720 auto IsFirstOnLine = [](const FormatToken &Tok) { 4721 return Tok.HasUnescapedNewline || Tok.IsFirst; 4722 }; 4723 4724 // Consider preprocessor directives preceded by block comments as first 4725 // on line. 4726 if (PreviousWasComment) 4727 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4728 return IsFirstOnLine(Tok); 4729 }; 4730 4731 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4732 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4733 PreviousWasComment = FormatTok->is(tok::comment); 4734 4735 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4736 (!Style.isVerilog() || 4737 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4738 FirstNonCommentOnLine) { 4739 distributeComments(Comments, FormatTok); 4740 Comments.clear(); 4741 // If there is an unfinished unwrapped line, we flush the preprocessor 4742 // directives only after that unwrapped line was finished later. 4743 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4744 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4745 assert((LevelDifference >= 0 || 4746 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4747 "LevelDifference makes Line->Level negative"); 4748 Line->Level += LevelDifference; 4749 // Comments stored before the preprocessor directive need to be output 4750 // before the preprocessor directive, at the same level as the 4751 // preprocessor directive, as we consider them to apply to the directive. 4752 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4753 PPBranchLevel > 0) { 4754 Line->Level += PPBranchLevel; 4755 } 4756 flushComments(isOnNewLine(*FormatTok)); 4757 parsePPDirective(); 4758 PreviousWasComment = FormatTok->is(tok::comment); 4759 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4760 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4761 } 4762 4763 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4764 !Line->InPPDirective) { 4765 continue; 4766 } 4767 4768 if (FormatTok->is(tok::identifier) && 4769 Macros.defined(FormatTok->TokenText) && 4770 // FIXME: Allow expanding macros in preprocessor directives. 4771 !Line->InPPDirective) { 4772 FormatToken *ID = FormatTok; 4773 unsigned Position = Tokens->getPosition(); 4774 4775 // To correctly parse the code, we need to replace the tokens of the macro 4776 // call with its expansion. 4777 auto PreCall = std::move(Line); 4778 Line.reset(new UnwrappedLine); 4779 bool OldInExpansion = InExpansion; 4780 InExpansion = true; 4781 // We parse the macro call into a new line. 4782 auto Args = parseMacroCall(); 4783 InExpansion = OldInExpansion; 4784 assert(Line->Tokens.front().Tok == ID); 4785 // And remember the unexpanded macro call tokens. 4786 auto UnexpandedLine = std::move(Line); 4787 // Reset to the old line. 4788 Line = std::move(PreCall); 4789 4790 LLVM_DEBUG({ 4791 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4792 if (Args) { 4793 llvm::dbgs() << "("; 4794 for (const auto &Arg : Args.value()) 4795 for (const auto &T : Arg) 4796 llvm::dbgs() << T->TokenText << " "; 4797 llvm::dbgs() << ")"; 4798 } 4799 llvm::dbgs() << "\n"; 4800 }); 4801 if (Macros.objectLike(ID->TokenText) && Args && 4802 !Macros.hasArity(ID->TokenText, Args->size())) { 4803 // The macro is either 4804 // - object-like, but we got argumnets, or 4805 // - overloaded to be both object-like and function-like, but none of 4806 // the function-like arities match the number of arguments. 4807 // Thus, expand as object-like macro. 4808 LLVM_DEBUG(llvm::dbgs() 4809 << "Macro \"" << ID->TokenText 4810 << "\" not overloaded for arity " << Args->size() 4811 << "or not function-like, using object-like overload."); 4812 Args.reset(); 4813 UnexpandedLine->Tokens.resize(1); 4814 Tokens->setPosition(Position); 4815 nextToken(); 4816 assert(!Args && Macros.objectLike(ID->TokenText)); 4817 } 4818 if ((!Args && Macros.objectLike(ID->TokenText)) || 4819 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4820 // Next, we insert the expanded tokens in the token stream at the 4821 // current position, and continue parsing. 4822 Unexpanded[ID] = std::move(UnexpandedLine); 4823 SmallVector<FormatToken *, 8> Expansion = 4824 Macros.expand(ID, std::move(Args)); 4825 if (!Expansion.empty()) 4826 FormatTok = Tokens->insertTokens(Expansion); 4827 4828 LLVM_DEBUG({ 4829 llvm::dbgs() << "Expanded: "; 4830 for (const auto &T : Expansion) 4831 llvm::dbgs() << T->TokenText << " "; 4832 llvm::dbgs() << "\n"; 4833 }); 4834 } else { 4835 LLVM_DEBUG({ 4836 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4837 << "\", because it was used "; 4838 if (Args) 4839 llvm::dbgs() << "with " << Args->size(); 4840 else 4841 llvm::dbgs() << "without"; 4842 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4843 }); 4844 Tokens->setPosition(Position); 4845 FormatTok = ID; 4846 } 4847 } 4848 4849 if (FormatTok->isNot(tok::comment)) { 4850 distributeComments(Comments, FormatTok); 4851 Comments.clear(); 4852 return; 4853 } 4854 4855 Comments.push_back(FormatTok); 4856 } while (!eof()); 4857 4858 distributeComments(Comments, nullptr); 4859 Comments.clear(); 4860 } 4861 4862 namespace { 4863 template <typename Iterator> 4864 void pushTokens(Iterator Begin, Iterator End, 4865 llvm::SmallVectorImpl<FormatToken *> &Into) { 4866 for (auto I = Begin; I != End; ++I) { 4867 Into.push_back(I->Tok); 4868 for (const auto &Child : I->Children) 4869 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4870 } 4871 } 4872 } // namespace 4873 4874 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4875 UnwrappedLineParser::parseMacroCall() { 4876 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4877 assert(Line->Tokens.empty()); 4878 nextToken(); 4879 if (FormatTok->isNot(tok::l_paren)) 4880 return Args; 4881 unsigned Position = Tokens->getPosition(); 4882 FormatToken *Tok = FormatTok; 4883 nextToken(); 4884 Args.emplace(); 4885 auto ArgStart = std::prev(Line->Tokens.end()); 4886 4887 int Parens = 0; 4888 do { 4889 switch (FormatTok->Tok.getKind()) { 4890 case tok::l_paren: 4891 ++Parens; 4892 nextToken(); 4893 break; 4894 case tok::r_paren: { 4895 if (Parens > 0) { 4896 --Parens; 4897 nextToken(); 4898 break; 4899 } 4900 Args->push_back({}); 4901 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4902 nextToken(); 4903 return Args; 4904 } 4905 case tok::comma: { 4906 if (Parens > 0) { 4907 nextToken(); 4908 break; 4909 } 4910 Args->push_back({}); 4911 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 4912 nextToken(); 4913 ArgStart = std::prev(Line->Tokens.end()); 4914 break; 4915 } 4916 default: 4917 nextToken(); 4918 break; 4919 } 4920 } while (!eof()); 4921 Line->Tokens.resize(1); 4922 Tokens->setPosition(Position); 4923 FormatTok = Tok; 4924 return {}; 4925 } 4926 4927 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4928 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4929 if (MustBreakBeforeNextToken) { 4930 Line->Tokens.back().Tok->MustBreakBefore = true; 4931 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 4932 MustBreakBeforeNextToken = false; 4933 } 4934 } 4935 4936 } // end namespace format 4937 } // end namespace clang 4938