1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "clang/Basic/TokenKinds.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/raw_ostream.h" 22 23 #include <algorithm> 24 #include <utility> 25 26 #define DEBUG_TYPE "format-parser" 27 28 namespace clang { 29 namespace format { 30 31 class FormatTokenSource { 32 public: 33 virtual ~FormatTokenSource() {} 34 35 // Returns the next token in the token stream. 36 virtual FormatToken *getNextToken() = 0; 37 38 // Returns the token preceding the token returned by the last call to 39 // getNextToken() in the token stream, or nullptr if no such token exists. 40 virtual FormatToken *getPreviousToken() = 0; 41 42 // Returns the token that would be returned by the next call to 43 // getNextToken(). 44 virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; 45 46 // Returns whether we are at the end of the file. 47 // This can be different from whether getNextToken() returned an eof token 48 // when the FormatTokenSource is a view on a part of the token stream. 49 virtual bool isEOF() = 0; 50 51 // Gets the current position in the token stream, to be used by setPosition(). 52 virtual unsigned getPosition() = 0; 53 54 // Resets the token stream to the state it was in when getPosition() returned 55 // Position, and return the token at that position in the stream. 56 virtual FormatToken *setPosition(unsigned Position) = 0; 57 }; 58 59 namespace { 60 61 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 62 StringRef Prefix = "", bool PrintText = false) { 63 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 64 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 65 bool NewLine = false; 66 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 67 E = Line.Tokens.end(); 68 I != E; ++I) { 69 if (NewLine) { 70 OS << Prefix; 71 NewLine = false; 72 } 73 OS << I->Tok->Tok.getName() << "[" 74 << "T=" << (unsigned)I->Tok->getType() 75 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 76 << "\"] "; 77 for (SmallVectorImpl<UnwrappedLine>::const_iterator 78 CI = I->Children.begin(), 79 CE = I->Children.end(); 80 CI != CE; ++CI) { 81 OS << "\n"; 82 printLine(OS, *CI, (Prefix + " ").str()); 83 NewLine = true; 84 } 85 } 86 if (!NewLine) 87 OS << "\n"; 88 } 89 90 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 91 printLine(llvm::dbgs(), Line); 92 } 93 94 class ScopedDeclarationState { 95 public: 96 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 97 bool MustBeDeclaration) 98 : Line(Line), Stack(Stack) { 99 Line.MustBeDeclaration = MustBeDeclaration; 100 Stack.push_back(MustBeDeclaration); 101 } 102 ~ScopedDeclarationState() { 103 Stack.pop_back(); 104 if (!Stack.empty()) 105 Line.MustBeDeclaration = Stack.back(); 106 else 107 Line.MustBeDeclaration = true; 108 } 109 110 private: 111 UnwrappedLine &Line; 112 llvm::BitVector &Stack; 113 }; 114 115 static bool isLineComment(const FormatToken &FormatTok) { 116 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 117 } 118 119 // Checks if \p FormatTok is a line comment that continues the line comment 120 // \p Previous. The original column of \p MinColumnToken is used to determine 121 // whether \p FormatTok is indented enough to the right to continue \p Previous. 122 static bool continuesLineComment(const FormatToken &FormatTok, 123 const FormatToken *Previous, 124 const FormatToken *MinColumnToken) { 125 if (!Previous || !MinColumnToken) 126 return false; 127 unsigned MinContinueColumn = 128 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 129 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 130 isLineComment(*Previous) && 131 FormatTok.OriginalColumn >= MinContinueColumn; 132 } 133 134 class ScopedMacroState : public FormatTokenSource { 135 public: 136 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 137 FormatToken *&ResetToken) 138 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 139 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 140 Token(nullptr), PreviousToken(nullptr) { 141 FakeEOF.Tok.startToken(); 142 FakeEOF.Tok.setKind(tok::eof); 143 TokenSource = this; 144 Line.Level = 0; 145 Line.InPPDirective = true; 146 // InMacroBody gets set after the `#define x` part. 147 } 148 149 ~ScopedMacroState() override { 150 TokenSource = PreviousTokenSource; 151 ResetToken = Token; 152 Line.InPPDirective = false; 153 Line.InMacroBody = false; 154 Line.Level = PreviousLineLevel; 155 } 156 157 FormatToken *getNextToken() override { 158 // The \c UnwrappedLineParser guards against this by never calling 159 // \c getNextToken() after it has encountered the first eof token. 160 assert(!eof()); 161 PreviousToken = Token; 162 Token = PreviousTokenSource->getNextToken(); 163 if (eof()) 164 return &FakeEOF; 165 return Token; 166 } 167 168 FormatToken *getPreviousToken() override { 169 return PreviousTokenSource->getPreviousToken(); 170 } 171 172 FormatToken *peekNextToken(bool SkipComment) override { 173 if (eof()) 174 return &FakeEOF; 175 return PreviousTokenSource->peekNextToken(SkipComment); 176 } 177 178 bool isEOF() override { return PreviousTokenSource->isEOF(); } 179 180 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 181 182 FormatToken *setPosition(unsigned Position) override { 183 PreviousToken = nullptr; 184 Token = PreviousTokenSource->setPosition(Position); 185 return Token; 186 } 187 188 private: 189 bool eof() { 190 return Token && Token->HasUnescapedNewline && 191 !continuesLineComment(*Token, PreviousToken, 192 /*MinColumnToken=*/PreviousToken); 193 } 194 195 FormatToken FakeEOF; 196 UnwrappedLine &Line; 197 FormatTokenSource *&TokenSource; 198 FormatToken *&ResetToken; 199 unsigned PreviousLineLevel; 200 FormatTokenSource *PreviousTokenSource; 201 202 FormatToken *Token; 203 FormatToken *PreviousToken; 204 }; 205 206 } // end anonymous namespace 207 208 class ScopedLineState { 209 public: 210 ScopedLineState(UnwrappedLineParser &Parser, 211 bool SwitchToPreprocessorLines = false) 212 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 213 if (SwitchToPreprocessorLines) 214 Parser.CurrentLines = &Parser.PreprocessorDirectives; 215 else if (!Parser.Line->Tokens.empty()) 216 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 217 PreBlockLine = std::move(Parser.Line); 218 Parser.Line = std::make_unique<UnwrappedLine>(); 219 Parser.Line->Level = PreBlockLine->Level; 220 Parser.Line->PPLevel = PreBlockLine->PPLevel; 221 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 222 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 223 } 224 225 ~ScopedLineState() { 226 if (!Parser.Line->Tokens.empty()) 227 Parser.addUnwrappedLine(); 228 assert(Parser.Line->Tokens.empty()); 229 Parser.Line = std::move(PreBlockLine); 230 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 231 Parser.MustBreakBeforeNextToken = true; 232 Parser.CurrentLines = OriginalLines; 233 } 234 235 private: 236 UnwrappedLineParser &Parser; 237 238 std::unique_ptr<UnwrappedLine> PreBlockLine; 239 SmallVectorImpl<UnwrappedLine> *OriginalLines; 240 }; 241 242 class CompoundStatementIndenter { 243 public: 244 CompoundStatementIndenter(UnwrappedLineParser *Parser, 245 const FormatStyle &Style, unsigned &LineLevel) 246 : CompoundStatementIndenter(Parser, LineLevel, 247 Style.BraceWrapping.AfterControlStatement, 248 Style.BraceWrapping.IndentBraces) {} 249 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 250 bool WrapBrace, bool IndentBrace) 251 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 252 if (WrapBrace) 253 Parser->addUnwrappedLine(); 254 if (IndentBrace) 255 ++LineLevel; 256 } 257 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 258 259 private: 260 unsigned &LineLevel; 261 unsigned OldLineLevel; 262 }; 263 264 namespace { 265 266 class IndexedTokenSource : public FormatTokenSource { 267 public: 268 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 269 : Tokens(Tokens), Position(-1) {} 270 271 FormatToken *getNextToken() override { 272 if (Position >= 0 && isEOF()) { 273 LLVM_DEBUG({ 274 llvm::dbgs() << "Next "; 275 dbgToken(Position); 276 }); 277 return Tokens[Position]; 278 } 279 ++Position; 280 LLVM_DEBUG({ 281 llvm::dbgs() << "Next "; 282 dbgToken(Position); 283 }); 284 return Tokens[Position]; 285 } 286 287 FormatToken *getPreviousToken() override { 288 return Position > 0 ? Tokens[Position - 1] : nullptr; 289 } 290 291 FormatToken *peekNextToken(bool SkipComment) override { 292 int Next = Position + 1; 293 if (SkipComment) 294 while (Tokens[Next]->is(tok::comment)) 295 ++Next; 296 LLVM_DEBUG({ 297 llvm::dbgs() << "Peeking "; 298 dbgToken(Next); 299 }); 300 return Tokens[Next]; 301 } 302 303 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 304 305 unsigned getPosition() override { 306 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 307 assert(Position >= 0); 308 return Position; 309 } 310 311 FormatToken *setPosition(unsigned P) override { 312 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 313 Position = P; 314 return Tokens[Position]; 315 } 316 317 void reset() { Position = -1; } 318 319 private: 320 void dbgToken(int Position, llvm::StringRef Indent = "") { 321 FormatToken *Tok = Tokens[Position]; 322 llvm::dbgs() << Indent << "[" << Position 323 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 324 << ", Macro: " << !!Tok->MacroCtx << "\n"; 325 } 326 327 ArrayRef<FormatToken *> Tokens; 328 int Position; 329 }; 330 331 } // end anonymous namespace 332 333 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 334 const AdditionalKeywords &Keywords, 335 unsigned FirstStartColumn, 336 ArrayRef<FormatToken *> Tokens, 337 UnwrappedLineConsumer &Callback) 338 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 339 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 340 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 341 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 342 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 343 ? IG_Rejected 344 : IG_Inited), 345 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 346 347 void UnwrappedLineParser::reset() { 348 PPBranchLevel = -1; 349 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 350 ? IG_Rejected 351 : IG_Inited; 352 IncludeGuardToken = nullptr; 353 Line.reset(new UnwrappedLine); 354 CommentsBeforeNextToken.clear(); 355 FormatTok = nullptr; 356 MustBreakBeforeNextToken = false; 357 PreprocessorDirectives.clear(); 358 CurrentLines = &Lines; 359 DeclarationScopeStack.clear(); 360 NestedTooDeep.clear(); 361 PPStack.clear(); 362 Line->FirstStartColumn = FirstStartColumn; 363 } 364 365 void UnwrappedLineParser::parse() { 366 IndexedTokenSource TokenSource(AllTokens); 367 Line->FirstStartColumn = FirstStartColumn; 368 do { 369 LLVM_DEBUG(llvm::dbgs() << "----\n"); 370 reset(); 371 Tokens = &TokenSource; 372 TokenSource.reset(); 373 374 readToken(); 375 parseFile(); 376 377 // If we found an include guard then all preprocessor directives (other than 378 // the guard) are over-indented by one. 379 if (IncludeGuard == IG_Found) { 380 for (auto &Line : Lines) 381 if (Line.InPPDirective && Line.Level > 0) 382 --Line.Level; 383 } 384 385 // Create line with eof token. 386 pushToken(FormatTok); 387 addUnwrappedLine(); 388 389 for (const UnwrappedLine &Line : Lines) 390 Callback.consumeUnwrappedLine(Line); 391 392 Callback.finishRun(); 393 Lines.clear(); 394 while (!PPLevelBranchIndex.empty() && 395 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 396 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 397 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 398 } 399 if (!PPLevelBranchIndex.empty()) { 400 ++PPLevelBranchIndex.back(); 401 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 402 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 403 } 404 } while (!PPLevelBranchIndex.empty()); 405 } 406 407 void UnwrappedLineParser::parseFile() { 408 // The top-level context in a file always has declarations, except for pre- 409 // processor directives and JavaScript files. 410 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 411 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 412 MustBeDeclaration); 413 if (Style.Language == FormatStyle::LK_TextProto) 414 parseBracedList(); 415 else 416 parseLevel(); 417 // Make sure to format the remaining tokens. 418 // 419 // LK_TextProto is special since its top-level is parsed as the body of a 420 // braced list, which does not necessarily have natural line separators such 421 // as a semicolon. Comments after the last entry that have been determined to 422 // not belong to that line, as in: 423 // key: value 424 // // endfile comment 425 // do not have a chance to be put on a line of their own until this point. 426 // Here we add this newline before end-of-file comments. 427 if (Style.Language == FormatStyle::LK_TextProto && 428 !CommentsBeforeNextToken.empty()) { 429 addUnwrappedLine(); 430 } 431 flushComments(true); 432 addUnwrappedLine(); 433 } 434 435 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 436 do { 437 switch (FormatTok->Tok.getKind()) { 438 case tok::l_brace: 439 return; 440 default: 441 if (FormatTok->is(Keywords.kw_where)) { 442 addUnwrappedLine(); 443 nextToken(); 444 parseCSharpGenericTypeConstraint(); 445 break; 446 } 447 nextToken(); 448 break; 449 } 450 } while (!eof()); 451 } 452 453 void UnwrappedLineParser::parseCSharpAttribute() { 454 int UnpairedSquareBrackets = 1; 455 do { 456 switch (FormatTok->Tok.getKind()) { 457 case tok::r_square: 458 nextToken(); 459 --UnpairedSquareBrackets; 460 if (UnpairedSquareBrackets == 0) { 461 addUnwrappedLine(); 462 return; 463 } 464 break; 465 case tok::l_square: 466 ++UnpairedSquareBrackets; 467 nextToken(); 468 break; 469 default: 470 nextToken(); 471 break; 472 } 473 } while (!eof()); 474 } 475 476 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 477 if (!Lines.empty() && Lines.back().InPPDirective) 478 return true; 479 480 const FormatToken *Previous = Tokens->getPreviousToken(); 481 return Previous && Previous->is(tok::comment) && 482 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 483 } 484 485 /// \brief Parses a level, that is ???. 486 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level 487 /// \param CanContainBracedList If the content can contain (at any level) a 488 /// braced list. 489 /// \param NextLBracesType The type for left brace found in this level. 490 /// \param IfKind The \p if statement kind in the level. 491 /// \param IfLeftBrace The left brace of the \p if block in the level. 492 /// \returns true if a simple block of if/else/for/while, or false otherwise. 493 /// (A simple block has a single statement.) 494 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 495 bool CanContainBracedList, 496 TokenType NextLBracesType, 497 IfStmtKind *IfKind, 498 FormatToken **IfLeftBrace) { 499 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace 500 ? TT_BracedListLBrace 501 : TT_Unknown; 502 const bool IsPrecededByCommentOrPPDirective = 503 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 504 FormatToken *IfLBrace = nullptr; 505 bool HasDoWhile = false; 506 bool HasLabel = false; 507 unsigned StatementCount = 0; 508 bool SwitchLabelEncountered = false; 509 510 do { 511 if (FormatTok->getType() == TT_AttributeMacro) { 512 nextToken(); 513 continue; 514 } 515 tok::TokenKind kind = FormatTok->Tok.getKind(); 516 if (FormatTok->getType() == TT_MacroBlockBegin) 517 kind = tok::l_brace; 518 else if (FormatTok->getType() == TT_MacroBlockEnd) 519 kind = tok::r_brace; 520 521 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind, 522 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] { 523 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind, 524 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile, 525 HasLabel ? nullptr : &HasLabel); 526 ++StatementCount; 527 assert(StatementCount > 0 && "StatementCount overflow!"); 528 }; 529 530 switch (kind) { 531 case tok::comment: 532 nextToken(); 533 addUnwrappedLine(); 534 break; 535 case tok::l_brace: 536 if (NextLBracesType != TT_Unknown) { 537 FormatTok->setFinalizedType(NextLBracesType); 538 } else if (FormatTok->Previous && 539 FormatTok->Previous->ClosesRequiresClause) { 540 // We need the 'default' case here to correctly parse a function 541 // l_brace. 542 ParseDefault(); 543 continue; 544 } 545 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && 546 tryToParseBracedList()) { 547 continue; 548 } 549 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 550 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr, 551 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, 552 NextLBracesType); 553 ++StatementCount; 554 assert(StatementCount > 0 && "StatementCount overflow!"); 555 addUnwrappedLine(); 556 break; 557 case tok::r_brace: 558 if (OpeningBrace) { 559 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 560 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 561 return false; 562 } 563 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 564 HasDoWhile || IsPrecededByCommentOrPPDirective || 565 precededByCommentOrPPDirective()) { 566 return false; 567 } 568 const FormatToken *Next = Tokens->peekNextToken(); 569 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 570 return false; 571 if (IfLeftBrace) 572 *IfLeftBrace = IfLBrace; 573 return true; 574 } 575 nextToken(); 576 addUnwrappedLine(); 577 break; 578 case tok::kw_default: { 579 unsigned StoredPosition = Tokens->getPosition(); 580 FormatToken *Next; 581 do { 582 Next = Tokens->getNextToken(); 583 assert(Next); 584 } while (Next->is(tok::comment)); 585 FormatTok = Tokens->setPosition(StoredPosition); 586 if (Next->isNot(tok::colon)) { 587 // default not followed by ':' is not a case label; treat it like 588 // an identifier. 589 parseStructuralElement(); 590 break; 591 } 592 // Else, if it is 'default:', fall through to the case handling. 593 [[fallthrough]]; 594 } 595 case tok::kw_case: 596 if (Style.isProto() || Style.isVerilog() || 597 (Style.isJavaScript() && Line->MustBeDeclaration)) { 598 // Proto: there are no switch/case statements 599 // Verilog: Case labels don't have this word. We handle case 600 // labels including default in TokenAnnotator. 601 // JavaScript: A 'case: string' style field declaration. 602 ParseDefault(); 603 break; 604 } 605 if (!SwitchLabelEncountered && 606 (Style.IndentCaseLabels || 607 (Line->InPPDirective && Line->Level == 1))) { 608 ++Line->Level; 609 } 610 SwitchLabelEncountered = true; 611 parseStructuralElement(); 612 break; 613 case tok::l_square: 614 if (Style.isCSharp()) { 615 nextToken(); 616 parseCSharpAttribute(); 617 break; 618 } 619 if (handleCppAttributes()) 620 break; 621 [[fallthrough]]; 622 default: 623 ParseDefault(); 624 break; 625 } 626 } while (!eof()); 627 628 return false; 629 } 630 631 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 632 // We'll parse forward through the tokens until we hit 633 // a closing brace or eof - note that getNextToken() will 634 // parse macros, so this will magically work inside macro 635 // definitions, too. 636 unsigned StoredPosition = Tokens->getPosition(); 637 FormatToken *Tok = FormatTok; 638 const FormatToken *PrevTok = Tok->Previous; 639 // Keep a stack of positions of lbrace tokens. We will 640 // update information about whether an lbrace starts a 641 // braced init list or a different block during the loop. 642 SmallVector<FormatToken *, 8> LBraceStack; 643 assert(Tok->is(tok::l_brace)); 644 do { 645 // Get next non-comment token. 646 FormatToken *NextTok; 647 do { 648 NextTok = Tokens->getNextToken(); 649 } while (NextTok->is(tok::comment)); 650 651 switch (Tok->Tok.getKind()) { 652 case tok::l_brace: 653 if (Style.isJavaScript() && PrevTok) { 654 if (PrevTok->isOneOf(tok::colon, tok::less)) { 655 // A ':' indicates this code is in a type, or a braced list 656 // following a label in an object literal ({a: {b: 1}}). 657 // A '<' could be an object used in a comparison, but that is nonsense 658 // code (can never return true), so more likely it is a generic type 659 // argument (`X<{a: string; b: number}>`). 660 // The code below could be confused by semicolons between the 661 // individual members in a type member list, which would normally 662 // trigger BK_Block. In both cases, this must be parsed as an inline 663 // braced init. 664 Tok->setBlockKind(BK_BracedInit); 665 } else if (PrevTok->is(tok::r_paren)) { 666 // `) { }` can only occur in function or method declarations in JS. 667 Tok->setBlockKind(BK_Block); 668 } 669 } else { 670 Tok->setBlockKind(BK_Unknown); 671 } 672 LBraceStack.push_back(Tok); 673 break; 674 case tok::r_brace: 675 if (LBraceStack.empty()) 676 break; 677 if (LBraceStack.back()->is(BK_Unknown)) { 678 bool ProbablyBracedList = false; 679 if (Style.Language == FormatStyle::LK_Proto) { 680 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 681 } else { 682 // Skip NextTok over preprocessor lines, otherwise we may not 683 // properly diagnose the block as a braced intializer 684 // if the comma separator appears after the pp directive. 685 while (NextTok->is(tok::hash)) { 686 ScopedMacroState MacroState(*Line, Tokens, NextTok); 687 do { 688 NextTok = Tokens->getNextToken(); 689 } while (NextTok->isNot(tok::eof)); 690 } 691 692 // Using OriginalColumn to distinguish between ObjC methods and 693 // binary operators is a bit hacky. 694 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 695 NextTok->OriginalColumn == 0; 696 697 // Try to detect a braced list. Note that regardless how we mark inner 698 // braces here, we will overwrite the BlockKind later if we parse a 699 // braced list (where all blocks inside are by default braced lists), 700 // or when we explicitly detect blocks (for example while parsing 701 // lambdas). 702 703 // If we already marked the opening brace as braced list, the closing 704 // must also be part of it. 705 ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace); 706 707 ProbablyBracedList = ProbablyBracedList || 708 (Style.isJavaScript() && 709 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 710 Keywords.kw_as)); 711 ProbablyBracedList = ProbablyBracedList || 712 (Style.isCpp() && NextTok->is(tok::l_paren)); 713 714 // If there is a comma, semicolon or right paren after the closing 715 // brace, we assume this is a braced initializer list. 716 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 717 // braced list in JS. 718 ProbablyBracedList = 719 ProbablyBracedList || 720 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 721 tok::r_paren, tok::r_square, tok::l_brace, 722 tok::ellipsis); 723 724 ProbablyBracedList = 725 ProbablyBracedList || 726 (NextTok->is(tok::identifier) && 727 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 728 729 ProbablyBracedList = ProbablyBracedList || 730 (NextTok->is(tok::semi) && 731 (!ExpectClassBody || LBraceStack.size() != 1)); 732 733 ProbablyBracedList = 734 ProbablyBracedList || 735 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 736 737 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 738 // We can have an array subscript after a braced init 739 // list, but C++11 attributes are expected after blocks. 740 NextTok = Tokens->getNextToken(); 741 ProbablyBracedList = NextTok->isNot(tok::l_square); 742 } 743 } 744 if (ProbablyBracedList) { 745 Tok->setBlockKind(BK_BracedInit); 746 LBraceStack.back()->setBlockKind(BK_BracedInit); 747 } else { 748 Tok->setBlockKind(BK_Block); 749 LBraceStack.back()->setBlockKind(BK_Block); 750 } 751 } 752 LBraceStack.pop_back(); 753 break; 754 case tok::identifier: 755 if (!Tok->is(TT_StatementMacro)) 756 break; 757 [[fallthrough]]; 758 case tok::at: 759 case tok::semi: 760 case tok::kw_if: 761 case tok::kw_while: 762 case tok::kw_for: 763 case tok::kw_switch: 764 case tok::kw_try: 765 case tok::kw___try: 766 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 767 LBraceStack.back()->setBlockKind(BK_Block); 768 break; 769 default: 770 break; 771 } 772 PrevTok = Tok; 773 Tok = NextTok; 774 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 775 776 // Assume other blocks for all unclosed opening braces. 777 for (FormatToken *LBrace : LBraceStack) 778 if (LBrace->is(BK_Unknown)) 779 LBrace->setBlockKind(BK_Block); 780 781 FormatTok = Tokens->setPosition(StoredPosition); 782 } 783 784 template <class T> 785 static inline void hash_combine(std::size_t &seed, const T &v) { 786 std::hash<T> hasher; 787 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 788 } 789 790 size_t UnwrappedLineParser::computePPHash() const { 791 size_t h = 0; 792 for (const auto &i : PPStack) { 793 hash_combine(h, size_t(i.Kind)); 794 hash_combine(h, i.Line); 795 } 796 return h; 797 } 798 799 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 800 // is not null, subtracts its length (plus the preceding space) when computing 801 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 802 // running the token annotator on it so that we can restore them afterward. 803 bool UnwrappedLineParser::mightFitOnOneLine( 804 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 805 const auto ColumnLimit = Style.ColumnLimit; 806 if (ColumnLimit == 0) 807 return true; 808 809 auto &Tokens = ParsedLine.Tokens; 810 assert(!Tokens.empty()); 811 812 const auto *LastToken = Tokens.back().Tok; 813 assert(LastToken); 814 815 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 816 817 int Index = 0; 818 for (const auto &Token : Tokens) { 819 assert(Token.Tok); 820 auto &SavedToken = SavedTokens[Index++]; 821 SavedToken.Tok = new FormatToken; 822 SavedToken.Tok->copyFrom(*Token.Tok); 823 SavedToken.Children = std::move(Token.Children); 824 } 825 826 AnnotatedLine Line(ParsedLine); 827 assert(Line.Last == LastToken); 828 829 TokenAnnotator Annotator(Style, Keywords); 830 Annotator.annotate(Line); 831 Annotator.calculateFormattingInformation(Line); 832 833 auto Length = LastToken->TotalLength; 834 if (OpeningBrace) { 835 assert(OpeningBrace != Tokens.front().Tok); 836 if (auto Prev = OpeningBrace->Previous; 837 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 838 Length -= ColumnLimit; 839 } 840 Length -= OpeningBrace->TokenText.size() + 1; 841 } 842 843 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 844 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 845 Length -= FirstToken->TokenText.size() + 1; 846 } 847 848 Index = 0; 849 for (auto &Token : Tokens) { 850 const auto &SavedToken = SavedTokens[Index++]; 851 Token.Tok->copyFrom(*SavedToken.Tok); 852 Token.Children = std::move(SavedToken.Children); 853 delete SavedToken.Tok; 854 } 855 856 // If these change PPLevel needs to be used for get correct indentation. 857 assert(!Line.InMacroBody); 858 assert(!Line.InPPDirective); 859 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 860 } 861 862 FormatToken *UnwrappedLineParser::parseBlock( 863 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, 864 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces, 865 bool CanContainBracedList, TokenType NextLBracesType) { 866 auto HandleVerilogBlockLabel = [this]() { 867 // ":" name 868 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 869 nextToken(); 870 if (Keywords.isVerilogIdentifier(*FormatTok)) 871 nextToken(); 872 } 873 }; 874 875 // Whether this is a Verilog-specific block that has a special header like a 876 // module. 877 const bool VerilogHierarchy = 878 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 879 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 880 (Style.isVerilog() && 881 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 882 "'{' or macro block token expected"); 883 FormatToken *Tok = FormatTok; 884 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 885 auto Index = CurrentLines->size(); 886 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 887 FormatTok->setBlockKind(BK_Block); 888 889 // For Whitesmiths mode, jump to the next level prior to skipping over the 890 // braces. 891 if (!VerilogHierarchy && AddLevels > 0 && 892 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 893 ++Line->Level; 894 } 895 896 size_t PPStartHash = computePPHash(); 897 898 const unsigned InitialLevel = Line->Level; 899 if (VerilogHierarchy) { 900 AddLevels += parseVerilogHierarchyHeader(); 901 } else { 902 nextToken(/*LevelDifference=*/AddLevels); 903 HandleVerilogBlockLabel(); 904 } 905 906 // Bail out if there are too many levels. Otherwise, the stack might overflow. 907 if (Line->Level > 300) 908 return nullptr; 909 910 if (MacroBlock && FormatTok->is(tok::l_paren)) 911 parseParens(); 912 913 size_t NbPreprocessorDirectives = 914 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 915 addUnwrappedLine(); 916 size_t OpeningLineIndex = 917 CurrentLines->empty() 918 ? (UnwrappedLine::kInvalidIndex) 919 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 920 921 // Whitesmiths is weird here. The brace needs to be indented for the namespace 922 // block, but the block itself may not be indented depending on the style 923 // settings. This allows the format to back up one level in those cases. 924 if (UnindentWhitesmithsBraces) 925 --Line->Level; 926 927 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 928 MustBeDeclaration); 929 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 930 Line->Level += AddLevels; 931 932 FormatToken *IfLBrace = nullptr; 933 const bool SimpleBlock = 934 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace); 935 936 if (eof()) 937 return IfLBrace; 938 939 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 940 : !FormatTok->is(tok::r_brace)) { 941 Line->Level = InitialLevel; 942 FormatTok->setBlockKind(BK_Block); 943 return IfLBrace; 944 } 945 946 const bool IsFunctionRBrace = 947 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 948 949 auto RemoveBraces = [=]() mutable { 950 if (!SimpleBlock) 951 return false; 952 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 953 assert(FormatTok->is(tok::r_brace)); 954 const bool WrappedOpeningBrace = !Tok->Previous; 955 if (WrappedOpeningBrace && FollowedByComment) 956 return false; 957 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 958 if (KeepBraces && !HasRequiredIfBraces) 959 return false; 960 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 961 const FormatToken *Previous = Tokens->getPreviousToken(); 962 assert(Previous); 963 if (Previous->is(tok::r_brace) && !Previous->Optional) 964 return false; 965 } 966 assert(!CurrentLines->empty()); 967 auto &LastLine = CurrentLines->back(); 968 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 969 return false; 970 if (Tok->is(TT_ElseLBrace)) 971 return true; 972 if (WrappedOpeningBrace) { 973 assert(Index > 0); 974 --Index; // The line above the wrapped l_brace. 975 Tok = nullptr; 976 } 977 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 978 }; 979 if (RemoveBraces()) { 980 Tok->MatchingParen = FormatTok; 981 FormatTok->MatchingParen = Tok; 982 } 983 984 size_t PPEndHash = computePPHash(); 985 986 // Munch the closing brace. 987 nextToken(/*LevelDifference=*/-AddLevels); 988 989 // When this is a function block and there is an unnecessary semicolon 990 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 991 // it later). 992 if (Style.RemoveSemicolon && IsFunctionRBrace) { 993 while (FormatTok->is(tok::semi)) { 994 FormatTok->Optional = true; 995 nextToken(); 996 } 997 } 998 999 HandleVerilogBlockLabel(); 1000 1001 if (MacroBlock && FormatTok->is(tok::l_paren)) 1002 parseParens(); 1003 1004 Line->Level = InitialLevel; 1005 1006 if (FormatTok->is(tok::kw_noexcept)) { 1007 // A noexcept in a requires expression. 1008 nextToken(); 1009 } 1010 1011 if (FormatTok->is(tok::arrow)) { 1012 // Following the } or noexcept we can find a trailing return type arrow 1013 // as part of an implicit conversion constraint. 1014 nextToken(); 1015 parseStructuralElement(); 1016 } 1017 1018 if (MunchSemi && FormatTok->is(tok::semi)) 1019 nextToken(); 1020 1021 if (PPStartHash == PPEndHash) { 1022 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 1023 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 1024 // Update the opening line to add the forward reference as well 1025 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 1026 CurrentLines->size() - 1; 1027 } 1028 } 1029 1030 return IfLBrace; 1031 } 1032 1033 static bool isGoogScope(const UnwrappedLine &Line) { 1034 // FIXME: Closure-library specific stuff should not be hard-coded but be 1035 // configurable. 1036 if (Line.Tokens.size() < 4) 1037 return false; 1038 auto I = Line.Tokens.begin(); 1039 if (I->Tok->TokenText != "goog") 1040 return false; 1041 ++I; 1042 if (I->Tok->isNot(tok::period)) 1043 return false; 1044 ++I; 1045 if (I->Tok->TokenText != "scope") 1046 return false; 1047 ++I; 1048 return I->Tok->is(tok::l_paren); 1049 } 1050 1051 static bool isIIFE(const UnwrappedLine &Line, 1052 const AdditionalKeywords &Keywords) { 1053 // Look for the start of an immediately invoked anonymous function. 1054 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 1055 // This is commonly done in JavaScript to create a new, anonymous scope. 1056 // Example: (function() { ... })() 1057 if (Line.Tokens.size() < 3) 1058 return false; 1059 auto I = Line.Tokens.begin(); 1060 if (I->Tok->isNot(tok::l_paren)) 1061 return false; 1062 ++I; 1063 if (I->Tok->isNot(Keywords.kw_function)) 1064 return false; 1065 ++I; 1066 return I->Tok->is(tok::l_paren); 1067 } 1068 1069 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 1070 const FormatToken &InitialToken) { 1071 tok::TokenKind Kind = InitialToken.Tok.getKind(); 1072 if (InitialToken.is(TT_NamespaceMacro)) 1073 Kind = tok::kw_namespace; 1074 1075 switch (Kind) { 1076 case tok::kw_namespace: 1077 return Style.BraceWrapping.AfterNamespace; 1078 case tok::kw_class: 1079 return Style.BraceWrapping.AfterClass; 1080 case tok::kw_union: 1081 return Style.BraceWrapping.AfterUnion; 1082 case tok::kw_struct: 1083 return Style.BraceWrapping.AfterStruct; 1084 case tok::kw_enum: 1085 return Style.BraceWrapping.AfterEnum; 1086 default: 1087 return false; 1088 } 1089 } 1090 1091 void UnwrappedLineParser::parseChildBlock( 1092 bool CanContainBracedList, clang::format::TokenType NextLBracesType) { 1093 assert(FormatTok->is(tok::l_brace)); 1094 FormatTok->setBlockKind(BK_Block); 1095 const FormatToken *OpeningBrace = FormatTok; 1096 nextToken(); 1097 { 1098 bool SkipIndent = (Style.isJavaScript() && 1099 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 1100 ScopedLineState LineState(*this); 1101 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 1102 /*MustBeDeclaration=*/false); 1103 Line->Level += SkipIndent ? 0 : 1; 1104 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType); 1105 flushComments(isOnNewLine(*FormatTok)); 1106 Line->Level -= SkipIndent ? 0 : 1; 1107 } 1108 nextToken(); 1109 } 1110 1111 void UnwrappedLineParser::parsePPDirective() { 1112 assert(FormatTok->is(tok::hash) && "'#' expected"); 1113 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1114 1115 nextToken(); 1116 1117 if (!FormatTok->Tok.getIdentifierInfo()) { 1118 parsePPUnknown(); 1119 return; 1120 } 1121 1122 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1123 case tok::pp_define: 1124 parsePPDefine(); 1125 return; 1126 case tok::pp_if: 1127 parsePPIf(/*IfDef=*/false); 1128 break; 1129 case tok::pp_ifdef: 1130 case tok::pp_ifndef: 1131 parsePPIf(/*IfDef=*/true); 1132 break; 1133 case tok::pp_else: 1134 case tok::pp_elifdef: 1135 case tok::pp_elifndef: 1136 case tok::pp_elif: 1137 parsePPElse(); 1138 break; 1139 case tok::pp_endif: 1140 parsePPEndIf(); 1141 break; 1142 case tok::pp_pragma: 1143 parsePPPragma(); 1144 break; 1145 default: 1146 parsePPUnknown(); 1147 break; 1148 } 1149 } 1150 1151 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1152 size_t Line = CurrentLines->size(); 1153 if (CurrentLines == &PreprocessorDirectives) 1154 Line += Lines.size(); 1155 1156 if (Unreachable || 1157 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1158 PPStack.push_back({PP_Unreachable, Line}); 1159 } else { 1160 PPStack.push_back({PP_Conditional, Line}); 1161 } 1162 } 1163 1164 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1165 ++PPBranchLevel; 1166 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1167 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1168 PPLevelBranchIndex.push_back(0); 1169 PPLevelBranchCount.push_back(0); 1170 } 1171 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1172 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1173 conditionalCompilationCondition(Unreachable || Skip); 1174 } 1175 1176 void UnwrappedLineParser::conditionalCompilationAlternative() { 1177 if (!PPStack.empty()) 1178 PPStack.pop_back(); 1179 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1180 if (!PPChainBranchIndex.empty()) 1181 ++PPChainBranchIndex.top(); 1182 conditionalCompilationCondition( 1183 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1184 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1185 } 1186 1187 void UnwrappedLineParser::conditionalCompilationEnd() { 1188 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1189 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1190 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1191 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1192 } 1193 // Guard against #endif's without #if. 1194 if (PPBranchLevel > -1) 1195 --PPBranchLevel; 1196 if (!PPChainBranchIndex.empty()) 1197 PPChainBranchIndex.pop(); 1198 if (!PPStack.empty()) 1199 PPStack.pop_back(); 1200 } 1201 1202 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1203 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1204 nextToken(); 1205 bool Unreachable = false; 1206 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1207 Unreachable = true; 1208 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1209 Unreachable = true; 1210 conditionalCompilationStart(Unreachable); 1211 FormatToken *IfCondition = FormatTok; 1212 // If there's a #ifndef on the first line, and the only lines before it are 1213 // comments, it could be an include guard. 1214 bool MaybeIncludeGuard = IfNDef; 1215 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1216 for (auto &Line : Lines) { 1217 if (!Line.Tokens.front().Tok->is(tok::comment)) { 1218 MaybeIncludeGuard = false; 1219 IncludeGuard = IG_Rejected; 1220 break; 1221 } 1222 } 1223 } 1224 --PPBranchLevel; 1225 parsePPUnknown(); 1226 ++PPBranchLevel; 1227 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1228 IncludeGuard = IG_IfNdefed; 1229 IncludeGuardToken = IfCondition; 1230 } 1231 } 1232 1233 void UnwrappedLineParser::parsePPElse() { 1234 // If a potential include guard has an #else, it's not an include guard. 1235 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1236 IncludeGuard = IG_Rejected; 1237 // Don't crash when there is an #else without an #if. 1238 assert(PPBranchLevel >= -1); 1239 if (PPBranchLevel == -1) 1240 conditionalCompilationStart(/*Unreachable=*/true); 1241 conditionalCompilationAlternative(); 1242 --PPBranchLevel; 1243 parsePPUnknown(); 1244 ++PPBranchLevel; 1245 } 1246 1247 void UnwrappedLineParser::parsePPEndIf() { 1248 conditionalCompilationEnd(); 1249 parsePPUnknown(); 1250 // If the #endif of a potential include guard is the last thing in the file, 1251 // then we found an include guard. 1252 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1253 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1254 IncludeGuard = IG_Found; 1255 } 1256 } 1257 1258 void UnwrappedLineParser::parsePPDefine() { 1259 nextToken(); 1260 1261 if (!FormatTok->Tok.getIdentifierInfo()) { 1262 IncludeGuard = IG_Rejected; 1263 IncludeGuardToken = nullptr; 1264 parsePPUnknown(); 1265 return; 1266 } 1267 1268 if (IncludeGuard == IG_IfNdefed && 1269 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1270 IncludeGuard = IG_Defined; 1271 IncludeGuardToken = nullptr; 1272 for (auto &Line : Lines) { 1273 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1274 IncludeGuard = IG_Rejected; 1275 break; 1276 } 1277 } 1278 } 1279 1280 // In the context of a define, even keywords should be treated as normal 1281 // identifiers. Setting the kind to identifier is not enough, because we need 1282 // to treat additional keywords like __except as well, which are already 1283 // identifiers. Setting the identifier info to null interferes with include 1284 // guard processing above, and changes preprocessing nesting. 1285 FormatTok->Tok.setKind(tok::identifier); 1286 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1287 nextToken(); 1288 if (FormatTok->Tok.getKind() == tok::l_paren && 1289 !FormatTok->hasWhitespaceBefore()) { 1290 parseParens(); 1291 } 1292 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1293 Line->Level += PPBranchLevel + 1; 1294 addUnwrappedLine(); 1295 ++Line->Level; 1296 1297 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1298 assert((int)Line->PPLevel >= 0); 1299 Line->InMacroBody = true; 1300 1301 // Errors during a preprocessor directive can only affect the layout of the 1302 // preprocessor directive, and thus we ignore them. An alternative approach 1303 // would be to use the same approach we use on the file level (no 1304 // re-indentation if there was a structural error) within the macro 1305 // definition. 1306 parseFile(); 1307 } 1308 1309 void UnwrappedLineParser::parsePPPragma() { 1310 Line->InPragmaDirective = true; 1311 parsePPUnknown(); 1312 } 1313 1314 void UnwrappedLineParser::parsePPUnknown() { 1315 do { 1316 nextToken(); 1317 } while (!eof()); 1318 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1319 Line->Level += PPBranchLevel + 1; 1320 addUnwrappedLine(); 1321 } 1322 1323 // Here we exclude certain tokens that are not usually the first token in an 1324 // unwrapped line. This is used in attempt to distinguish macro calls without 1325 // trailing semicolons from other constructs split to several lines. 1326 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1327 // Semicolon can be a null-statement, l_square can be a start of a macro or 1328 // a C++11 attribute, but this doesn't seem to be common. 1329 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1330 Tok.isNot(TT_AttributeSquare) && 1331 // Tokens that can only be used as binary operators and a part of 1332 // overloaded operator names. 1333 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1334 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1335 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1336 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1337 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1338 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1339 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1340 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1341 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1342 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1343 Tok.isNot(tok::lesslessequal) && 1344 // Colon is used in labels, base class lists, initializer lists, 1345 // range-based for loops, ternary operator, but should never be the 1346 // first token in an unwrapped line. 1347 Tok.isNot(tok::colon) && 1348 // 'noexcept' is a trailing annotation. 1349 Tok.isNot(tok::kw_noexcept); 1350 } 1351 1352 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1353 const FormatToken *FormatTok) { 1354 // FIXME: This returns true for C/C++ keywords like 'struct'. 1355 return FormatTok->is(tok::identifier) && 1356 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1357 !FormatTok->isOneOf( 1358 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1359 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1360 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1361 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1362 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1363 Keywords.kw_instanceof, Keywords.kw_interface, 1364 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1365 } 1366 1367 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1368 const FormatToken *FormatTok) { 1369 return FormatTok->Tok.isLiteral() || 1370 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1371 mustBeJSIdent(Keywords, FormatTok); 1372 } 1373 1374 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1375 // when encountered after a value (see mustBeJSIdentOrValue). 1376 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1377 const FormatToken *FormatTok) { 1378 return FormatTok->isOneOf( 1379 tok::kw_return, Keywords.kw_yield, 1380 // conditionals 1381 tok::kw_if, tok::kw_else, 1382 // loops 1383 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1384 // switch/case 1385 tok::kw_switch, tok::kw_case, 1386 // exceptions 1387 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1388 // declaration 1389 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1390 Keywords.kw_async, Keywords.kw_function, 1391 // import/export 1392 Keywords.kw_import, tok::kw_export); 1393 } 1394 1395 // Checks whether a token is a type in K&R C (aka C78). 1396 static bool isC78Type(const FormatToken &Tok) { 1397 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1398 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1399 tok::identifier); 1400 } 1401 1402 // This function checks whether a token starts the first parameter declaration 1403 // in a K&R C (aka C78) function definition, e.g.: 1404 // int f(a, b) 1405 // short a, b; 1406 // { 1407 // return a + b; 1408 // } 1409 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1410 const FormatToken *FuncName) { 1411 assert(Tok); 1412 assert(Next); 1413 assert(FuncName); 1414 1415 if (FuncName->isNot(tok::identifier)) 1416 return false; 1417 1418 const FormatToken *Prev = FuncName->Previous; 1419 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1420 return false; 1421 1422 if (!isC78Type(*Tok) && 1423 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1424 return false; 1425 } 1426 1427 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1428 return false; 1429 1430 Tok = Tok->Previous; 1431 if (!Tok || Tok->isNot(tok::r_paren)) 1432 return false; 1433 1434 Tok = Tok->Previous; 1435 if (!Tok || Tok->isNot(tok::identifier)) 1436 return false; 1437 1438 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1439 } 1440 1441 bool UnwrappedLineParser::parseModuleImport() { 1442 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1443 1444 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1445 !Token->Tok.getIdentifierInfo() && 1446 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1447 return false; 1448 } 1449 1450 nextToken(); 1451 while (!eof()) { 1452 if (FormatTok->is(tok::colon)) { 1453 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1454 } 1455 // Handle import <foo/bar.h> as we would an include statement. 1456 else if (FormatTok->is(tok::less)) { 1457 nextToken(); 1458 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1459 // Mark tokens up to the trailing line comments as implicit string 1460 // literals. 1461 if (FormatTok->isNot(tok::comment) && 1462 !FormatTok->TokenText.startswith("//")) { 1463 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1464 } 1465 nextToken(); 1466 } 1467 } 1468 if (FormatTok->is(tok::semi)) { 1469 nextToken(); 1470 break; 1471 } 1472 nextToken(); 1473 } 1474 1475 addUnwrappedLine(); 1476 return true; 1477 } 1478 1479 // readTokenWithJavaScriptASI reads the next token and terminates the current 1480 // line if JavaScript Automatic Semicolon Insertion must 1481 // happen between the current token and the next token. 1482 // 1483 // This method is conservative - it cannot cover all edge cases of JavaScript, 1484 // but only aims to correctly handle certain well known cases. It *must not* 1485 // return true in speculative cases. 1486 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1487 FormatToken *Previous = FormatTok; 1488 readToken(); 1489 FormatToken *Next = FormatTok; 1490 1491 bool IsOnSameLine = 1492 CommentsBeforeNextToken.empty() 1493 ? Next->NewlinesBefore == 0 1494 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1495 if (IsOnSameLine) 1496 return; 1497 1498 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1499 bool PreviousStartsTemplateExpr = 1500 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1501 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1502 // If the line contains an '@' sign, the previous token might be an 1503 // annotation, which can precede another identifier/value. 1504 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1505 return LineNode.Tok->is(tok::at); 1506 }); 1507 if (HasAt) 1508 return; 1509 } 1510 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1511 return addUnwrappedLine(); 1512 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1513 bool NextEndsTemplateExpr = 1514 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1515 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1516 (PreviousMustBeValue || 1517 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1518 tok::minusminus))) { 1519 return addUnwrappedLine(); 1520 } 1521 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1522 isJSDeclOrStmt(Keywords, Next)) { 1523 return addUnwrappedLine(); 1524 } 1525 } 1526 1527 void UnwrappedLineParser::parseStructuralElement( 1528 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind, 1529 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1530 if (Style.Language == FormatStyle::LK_TableGen && 1531 FormatTok->is(tok::pp_include)) { 1532 nextToken(); 1533 if (FormatTok->is(tok::string_literal)) 1534 nextToken(); 1535 addUnwrappedLine(); 1536 return; 1537 } 1538 1539 if (Style.isVerilog()) { 1540 // Skip things that can exist before keywords like 'if' and 'case'. 1541 while (true) { 1542 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1543 Keywords.kw_unique0)) { 1544 nextToken(); 1545 } else if (FormatTok->is(tok::l_paren) && 1546 Tokens->peekNextToken()->is(tok::star)) { 1547 parseParens(); 1548 } else { 1549 break; 1550 } 1551 } 1552 } 1553 1554 // Tokens that only make sense at the beginning of a line. 1555 switch (FormatTok->Tok.getKind()) { 1556 case tok::kw_asm: 1557 nextToken(); 1558 if (FormatTok->is(tok::l_brace)) { 1559 FormatTok->setFinalizedType(TT_InlineASMBrace); 1560 nextToken(); 1561 while (FormatTok && !eof()) { 1562 if (FormatTok->is(tok::r_brace)) { 1563 FormatTok->setFinalizedType(TT_InlineASMBrace); 1564 nextToken(); 1565 addUnwrappedLine(); 1566 break; 1567 } 1568 FormatTok->Finalized = true; 1569 nextToken(); 1570 } 1571 } 1572 break; 1573 case tok::kw_namespace: 1574 parseNamespace(); 1575 return; 1576 case tok::kw_public: 1577 case tok::kw_protected: 1578 case tok::kw_private: 1579 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1580 Style.isCSharp()) { 1581 nextToken(); 1582 } else { 1583 parseAccessSpecifier(); 1584 } 1585 return; 1586 case tok::kw_if: { 1587 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1588 // field/method declaration. 1589 break; 1590 } 1591 FormatToken *Tok = parseIfThenElse(IfKind); 1592 if (IfLeftBrace) 1593 *IfLeftBrace = Tok; 1594 return; 1595 } 1596 case tok::kw_for: 1597 case tok::kw_while: 1598 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1599 // field/method declaration. 1600 break; 1601 } 1602 parseForOrWhileLoop(); 1603 return; 1604 case tok::kw_do: 1605 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1606 // field/method declaration. 1607 break; 1608 } 1609 parseDoWhile(); 1610 if (HasDoWhile) 1611 *HasDoWhile = true; 1612 return; 1613 case tok::kw_switch: 1614 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1615 // 'switch: string' field declaration. 1616 break; 1617 } 1618 parseSwitch(); 1619 return; 1620 case tok::kw_default: 1621 // In Verilog default along with other labels are handled in the next loop. 1622 if (Style.isVerilog()) 1623 break; 1624 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1625 // 'default: string' field declaration. 1626 break; 1627 } 1628 nextToken(); 1629 if (FormatTok->is(tok::colon)) { 1630 parseLabel(); 1631 return; 1632 } 1633 // e.g. "default void f() {}" in a Java interface. 1634 break; 1635 case tok::kw_case: 1636 // Proto: there are no switch/case statements. 1637 if (Style.isProto()) { 1638 nextToken(); 1639 return; 1640 } 1641 if (Style.isVerilog()) { 1642 parseBlock(); 1643 addUnwrappedLine(); 1644 return; 1645 } 1646 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1647 // 'case: string' field declaration. 1648 nextToken(); 1649 break; 1650 } 1651 parseCaseLabel(); 1652 return; 1653 case tok::kw_try: 1654 case tok::kw___try: 1655 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1656 // field/method declaration. 1657 break; 1658 } 1659 parseTryCatch(); 1660 return; 1661 case tok::kw_extern: 1662 nextToken(); 1663 if (Style.isVerilog()) { 1664 // In Verilog and extern module declaration looks like a start of module. 1665 // But there is no body and endmodule. So we handle it separately. 1666 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1667 parseVerilogHierarchyHeader(); 1668 return; 1669 } 1670 } else if (FormatTok->is(tok::string_literal)) { 1671 nextToken(); 1672 if (FormatTok->is(tok::l_brace)) { 1673 if (Style.BraceWrapping.AfterExternBlock) 1674 addUnwrappedLine(); 1675 // Either we indent or for backwards compatibility we follow the 1676 // AfterExternBlock style. 1677 unsigned AddLevels = 1678 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1679 (Style.BraceWrapping.AfterExternBlock && 1680 Style.IndentExternBlock == 1681 FormatStyle::IEBS_AfterExternBlock) 1682 ? 1u 1683 : 0u; 1684 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1685 addUnwrappedLine(); 1686 return; 1687 } 1688 } 1689 break; 1690 case tok::kw_export: 1691 if (Style.isJavaScript()) { 1692 parseJavaScriptEs6ImportExport(); 1693 return; 1694 } 1695 if (Style.isCpp()) { 1696 nextToken(); 1697 if (FormatTok->is(tok::kw_namespace)) { 1698 parseNamespace(); 1699 return; 1700 } 1701 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1702 return; 1703 } 1704 break; 1705 case tok::kw_inline: 1706 nextToken(); 1707 if (FormatTok->is(tok::kw_namespace)) { 1708 parseNamespace(); 1709 return; 1710 } 1711 break; 1712 case tok::identifier: 1713 if (FormatTok->is(TT_ForEachMacro)) { 1714 parseForOrWhileLoop(); 1715 return; 1716 } 1717 if (FormatTok->is(TT_MacroBlockBegin)) { 1718 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1719 /*MunchSemi=*/false); 1720 return; 1721 } 1722 if (FormatTok->is(Keywords.kw_import)) { 1723 if (Style.isJavaScript()) { 1724 parseJavaScriptEs6ImportExport(); 1725 return; 1726 } 1727 if (Style.Language == FormatStyle::LK_Proto) { 1728 nextToken(); 1729 if (FormatTok->is(tok::kw_public)) 1730 nextToken(); 1731 if (!FormatTok->is(tok::string_literal)) 1732 return; 1733 nextToken(); 1734 if (FormatTok->is(tok::semi)) 1735 nextToken(); 1736 addUnwrappedLine(); 1737 return; 1738 } 1739 if (Style.isCpp() && parseModuleImport()) 1740 return; 1741 } 1742 if (Style.isCpp() && 1743 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1744 Keywords.kw_slots, Keywords.kw_qslots)) { 1745 nextToken(); 1746 if (FormatTok->is(tok::colon)) { 1747 nextToken(); 1748 addUnwrappedLine(); 1749 return; 1750 } 1751 } 1752 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1753 parseStatementMacro(); 1754 return; 1755 } 1756 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1757 parseNamespace(); 1758 return; 1759 } 1760 // In all other cases, parse the declaration. 1761 break; 1762 default: 1763 break; 1764 } 1765 do { 1766 const FormatToken *Previous = FormatTok->Previous; 1767 switch (FormatTok->Tok.getKind()) { 1768 case tok::at: 1769 nextToken(); 1770 if (FormatTok->is(tok::l_brace)) { 1771 nextToken(); 1772 parseBracedList(); 1773 break; 1774 } else if (Style.Language == FormatStyle::LK_Java && 1775 FormatTok->is(Keywords.kw_interface)) { 1776 nextToken(); 1777 break; 1778 } 1779 switch (FormatTok->Tok.getObjCKeywordID()) { 1780 case tok::objc_public: 1781 case tok::objc_protected: 1782 case tok::objc_package: 1783 case tok::objc_private: 1784 return parseAccessSpecifier(); 1785 case tok::objc_interface: 1786 case tok::objc_implementation: 1787 return parseObjCInterfaceOrImplementation(); 1788 case tok::objc_protocol: 1789 if (parseObjCProtocol()) 1790 return; 1791 break; 1792 case tok::objc_end: 1793 return; // Handled by the caller. 1794 case tok::objc_optional: 1795 case tok::objc_required: 1796 nextToken(); 1797 addUnwrappedLine(); 1798 return; 1799 case tok::objc_autoreleasepool: 1800 nextToken(); 1801 if (FormatTok->is(tok::l_brace)) { 1802 if (Style.BraceWrapping.AfterControlStatement == 1803 FormatStyle::BWACS_Always) { 1804 addUnwrappedLine(); 1805 } 1806 parseBlock(); 1807 } 1808 addUnwrappedLine(); 1809 return; 1810 case tok::objc_synchronized: 1811 nextToken(); 1812 if (FormatTok->is(tok::l_paren)) { 1813 // Skip synchronization object 1814 parseParens(); 1815 } 1816 if (FormatTok->is(tok::l_brace)) { 1817 if (Style.BraceWrapping.AfterControlStatement == 1818 FormatStyle::BWACS_Always) { 1819 addUnwrappedLine(); 1820 } 1821 parseBlock(); 1822 } 1823 addUnwrappedLine(); 1824 return; 1825 case tok::objc_try: 1826 // This branch isn't strictly necessary (the kw_try case below would 1827 // do this too after the tok::at is parsed above). But be explicit. 1828 parseTryCatch(); 1829 return; 1830 default: 1831 break; 1832 } 1833 break; 1834 case tok::kw_requires: { 1835 if (Style.isCpp()) { 1836 bool ParsedClause = parseRequires(); 1837 if (ParsedClause) 1838 return; 1839 } else { 1840 nextToken(); 1841 } 1842 break; 1843 } 1844 case tok::kw_enum: 1845 // Ignore if this is part of "template <enum ...". 1846 if (Previous && Previous->is(tok::less)) { 1847 nextToken(); 1848 break; 1849 } 1850 1851 // parseEnum falls through and does not yet add an unwrapped line as an 1852 // enum definition can start a structural element. 1853 if (!parseEnum()) 1854 break; 1855 // This only applies for C++. 1856 if (!Style.isCpp()) { 1857 addUnwrappedLine(); 1858 return; 1859 } 1860 break; 1861 case tok::kw_typedef: 1862 nextToken(); 1863 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1864 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1865 Keywords.kw_CF_CLOSED_ENUM, 1866 Keywords.kw_NS_CLOSED_ENUM)) { 1867 parseEnum(); 1868 } 1869 break; 1870 case tok::kw_class: 1871 if (Style.isVerilog()) { 1872 parseBlock(); 1873 addUnwrappedLine(); 1874 return; 1875 } 1876 [[fallthrough]]; 1877 case tok::kw_struct: 1878 case tok::kw_union: 1879 if (parseStructLike()) 1880 return; 1881 break; 1882 case tok::period: 1883 nextToken(); 1884 // In Java, classes have an implicit static member "class". 1885 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1886 FormatTok->is(tok::kw_class)) { 1887 nextToken(); 1888 } 1889 if (Style.isJavaScript() && FormatTok && 1890 FormatTok->Tok.getIdentifierInfo()) { 1891 // JavaScript only has pseudo keywords, all keywords are allowed to 1892 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1893 nextToken(); 1894 } 1895 break; 1896 case tok::semi: 1897 nextToken(); 1898 addUnwrappedLine(); 1899 return; 1900 case tok::r_brace: 1901 addUnwrappedLine(); 1902 return; 1903 case tok::l_paren: { 1904 parseParens(); 1905 // Break the unwrapped line if a K&R C function definition has a parameter 1906 // declaration. 1907 if (!IsTopLevel || !Style.isCpp() || !Previous || eof()) 1908 break; 1909 if (isC78ParameterDecl(FormatTok, 1910 Tokens->peekNextToken(/*SkipComment=*/true), 1911 Previous)) { 1912 addUnwrappedLine(); 1913 return; 1914 } 1915 break; 1916 } 1917 case tok::kw_operator: 1918 nextToken(); 1919 if (FormatTok->isBinaryOperator()) 1920 nextToken(); 1921 break; 1922 case tok::caret: 1923 nextToken(); 1924 if (FormatTok->Tok.isAnyIdentifier() || 1925 FormatTok->isSimpleTypeSpecifier()) { 1926 nextToken(); 1927 } 1928 if (FormatTok->is(tok::l_paren)) 1929 parseParens(); 1930 if (FormatTok->is(tok::l_brace)) 1931 parseChildBlock(); 1932 break; 1933 case tok::l_brace: 1934 if (NextLBracesType != TT_Unknown) 1935 FormatTok->setFinalizedType(NextLBracesType); 1936 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1937 // A block outside of parentheses must be the last part of a 1938 // structural element. 1939 // FIXME: Figure out cases where this is not true, and add projections 1940 // for them (the one we know is missing are lambdas). 1941 if (Style.Language == FormatStyle::LK_Java && 1942 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1943 // If necessary, we could set the type to something different than 1944 // TT_FunctionLBrace. 1945 if (Style.BraceWrapping.AfterControlStatement == 1946 FormatStyle::BWACS_Always) { 1947 addUnwrappedLine(); 1948 } 1949 } else if (Style.BraceWrapping.AfterFunction) { 1950 addUnwrappedLine(); 1951 } 1952 FormatTok->setFinalizedType(TT_FunctionLBrace); 1953 parseBlock(); 1954 addUnwrappedLine(); 1955 return; 1956 } 1957 // Otherwise this was a braced init list, and the structural 1958 // element continues. 1959 break; 1960 case tok::kw_try: 1961 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1962 // field/method declaration. 1963 nextToken(); 1964 break; 1965 } 1966 // We arrive here when parsing function-try blocks. 1967 if (Style.BraceWrapping.AfterFunction) 1968 addUnwrappedLine(); 1969 parseTryCatch(); 1970 return; 1971 case tok::identifier: { 1972 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1973 Line->MustBeDeclaration) { 1974 addUnwrappedLine(); 1975 parseCSharpGenericTypeConstraint(); 1976 break; 1977 } 1978 if (FormatTok->is(TT_MacroBlockEnd)) { 1979 addUnwrappedLine(); 1980 return; 1981 } 1982 1983 // Function declarations (as opposed to function expressions) are parsed 1984 // on their own unwrapped line by continuing this loop. Function 1985 // expressions (functions that are not on their own line) must not create 1986 // a new unwrapped line, so they are special cased below. 1987 size_t TokenCount = Line->Tokens.size(); 1988 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1989 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1990 Keywords.kw_async)))) { 1991 tryToParseJSFunction(); 1992 break; 1993 } 1994 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1995 FormatTok->is(Keywords.kw_interface)) { 1996 if (Style.isJavaScript()) { 1997 // In JavaScript/TypeScript, "interface" can be used as a standalone 1998 // identifier, e.g. in `var interface = 1;`. If "interface" is 1999 // followed by another identifier, it is very like to be an actual 2000 // interface declaration. 2001 unsigned StoredPosition = Tokens->getPosition(); 2002 FormatToken *Next = Tokens->getNextToken(); 2003 FormatTok = Tokens->setPosition(StoredPosition); 2004 if (!mustBeJSIdent(Keywords, Next)) { 2005 nextToken(); 2006 break; 2007 } 2008 } 2009 parseRecord(); 2010 addUnwrappedLine(); 2011 return; 2012 } 2013 2014 if (Style.isVerilog()) { 2015 if (FormatTok->is(Keywords.kw_table)) { 2016 parseVerilogTable(); 2017 return; 2018 } 2019 if (Keywords.isVerilogBegin(*FormatTok) || 2020 Keywords.isVerilogHierarchy(*FormatTok)) { 2021 parseBlock(); 2022 addUnwrappedLine(); 2023 return; 2024 } 2025 } 2026 2027 if (FormatTok->is(Keywords.kw_interface)) { 2028 if (parseStructLike()) 2029 return; 2030 break; 2031 } 2032 2033 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 2034 parseStatementMacro(); 2035 return; 2036 } 2037 2038 // See if the following token should start a new unwrapped line. 2039 StringRef Text = FormatTok->TokenText; 2040 2041 FormatToken *PreviousToken = FormatTok; 2042 nextToken(); 2043 2044 // JS doesn't have macros, and within classes colons indicate fields, not 2045 // labels. 2046 if (Style.isJavaScript()) 2047 break; 2048 2049 auto OneTokenSoFar = [&]() { 2050 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2051 while (I != E && I->Tok->is(tok::comment)) 2052 ++I; 2053 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash)) 2054 ++I; 2055 return I != E && (++I == E); 2056 }; 2057 if (OneTokenSoFar()) { 2058 // In Verilog labels can be any expression, so we don't do them here. 2059 if (!Style.isVerilog() && FormatTok->is(tok::colon) && 2060 !Line->MustBeDeclaration) { 2061 Line->Tokens.begin()->Tok->MustBreakBefore = true; 2062 parseLabel(!Style.IndentGotoLabels); 2063 if (HasLabel) 2064 *HasLabel = true; 2065 return; 2066 } 2067 // Recognize function-like macro usages without trailing semicolon as 2068 // well as free-standing macros like Q_OBJECT. 2069 bool FunctionLike = FormatTok->is(tok::l_paren); 2070 if (FunctionLike) 2071 parseParens(); 2072 2073 bool FollowedByNewline = 2074 CommentsBeforeNextToken.empty() 2075 ? FormatTok->NewlinesBefore > 0 2076 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2077 2078 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2079 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2080 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2081 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2082 addUnwrappedLine(); 2083 return; 2084 } 2085 } 2086 break; 2087 } 2088 case tok::equal: 2089 if ((Style.isJavaScript() || Style.isCSharp()) && 2090 FormatTok->is(TT_FatArrow)) { 2091 tryToParseChildBlock(); 2092 break; 2093 } 2094 2095 nextToken(); 2096 if (FormatTok->is(tok::l_brace)) { 2097 // Block kind should probably be set to BK_BracedInit for any language. 2098 // C# needs this change to ensure that array initialisers and object 2099 // initialisers are indented the same way. 2100 if (Style.isCSharp()) 2101 FormatTok->setBlockKind(BK_BracedInit); 2102 nextToken(); 2103 parseBracedList(); 2104 } else if (Style.Language == FormatStyle::LK_Proto && 2105 FormatTok->is(tok::less)) { 2106 nextToken(); 2107 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2108 /*ClosingBraceKind=*/tok::greater); 2109 } 2110 break; 2111 case tok::l_square: 2112 parseSquare(); 2113 break; 2114 case tok::kw_new: 2115 parseNew(); 2116 break; 2117 case tok::kw_case: 2118 // Proto: there are no switch/case statements. 2119 if (Style.isProto()) { 2120 nextToken(); 2121 return; 2122 } 2123 // In Verilog switch is called case. 2124 if (Style.isVerilog()) { 2125 parseBlock(); 2126 addUnwrappedLine(); 2127 return; 2128 } 2129 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2130 // 'case: string' field declaration. 2131 nextToken(); 2132 break; 2133 } 2134 parseCaseLabel(); 2135 break; 2136 case tok::kw_default: 2137 nextToken(); 2138 if (Style.isVerilog()) { 2139 if (FormatTok->is(tok::colon)) { 2140 // The label will be handled in the next iteration. 2141 break; 2142 } 2143 if (FormatTok->is(Keywords.kw_clocking)) { 2144 // A default clocking block. 2145 parseBlock(); 2146 addUnwrappedLine(); 2147 return; 2148 } 2149 parseVerilogCaseLabel(); 2150 return; 2151 } 2152 break; 2153 case tok::colon: 2154 nextToken(); 2155 if (Style.isVerilog()) { 2156 parseVerilogCaseLabel(); 2157 return; 2158 } 2159 break; 2160 default: 2161 nextToken(); 2162 break; 2163 } 2164 } while (!eof()); 2165 } 2166 2167 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2168 assert(FormatTok->is(tok::l_brace)); 2169 if (!Style.isCSharp()) 2170 return false; 2171 // See if it's a property accessor. 2172 if (FormatTok->Previous->isNot(tok::identifier)) 2173 return false; 2174 2175 // See if we are inside a property accessor. 2176 // 2177 // Record the current tokenPosition so that we can advance and 2178 // reset the current token. `Next` is not set yet so we need 2179 // another way to advance along the token stream. 2180 unsigned int StoredPosition = Tokens->getPosition(); 2181 FormatToken *Tok = Tokens->getNextToken(); 2182 2183 // A trivial property accessor is of the form: 2184 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2185 // Track these as they do not require line breaks to be introduced. 2186 bool HasSpecialAccessor = false; 2187 bool IsTrivialPropertyAccessor = true; 2188 while (!eof()) { 2189 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 2190 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 2191 Keywords.kw_init, Keywords.kw_set)) { 2192 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2193 HasSpecialAccessor = true; 2194 Tok = Tokens->getNextToken(); 2195 continue; 2196 } 2197 if (Tok->isNot(tok::r_brace)) 2198 IsTrivialPropertyAccessor = false; 2199 break; 2200 } 2201 2202 if (!HasSpecialAccessor) { 2203 Tokens->setPosition(StoredPosition); 2204 return false; 2205 } 2206 2207 // Try to parse the property accessor: 2208 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2209 Tokens->setPosition(StoredPosition); 2210 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2211 addUnwrappedLine(); 2212 nextToken(); 2213 do { 2214 switch (FormatTok->Tok.getKind()) { 2215 case tok::r_brace: 2216 nextToken(); 2217 if (FormatTok->is(tok::equal)) { 2218 while (!eof() && FormatTok->isNot(tok::semi)) 2219 nextToken(); 2220 nextToken(); 2221 } 2222 addUnwrappedLine(); 2223 return true; 2224 case tok::l_brace: 2225 ++Line->Level; 2226 parseBlock(/*MustBeDeclaration=*/true); 2227 addUnwrappedLine(); 2228 --Line->Level; 2229 break; 2230 case tok::equal: 2231 if (FormatTok->is(TT_FatArrow)) { 2232 ++Line->Level; 2233 do { 2234 nextToken(); 2235 } while (!eof() && FormatTok->isNot(tok::semi)); 2236 nextToken(); 2237 addUnwrappedLine(); 2238 --Line->Level; 2239 break; 2240 } 2241 nextToken(); 2242 break; 2243 default: 2244 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2245 Keywords.kw_set) && 2246 !IsTrivialPropertyAccessor) { 2247 // Non-trivial get/set needs to be on its own line. 2248 addUnwrappedLine(); 2249 } 2250 nextToken(); 2251 } 2252 } while (!eof()); 2253 2254 // Unreachable for well-formed code (paired '{' and '}'). 2255 return true; 2256 } 2257 2258 bool UnwrappedLineParser::tryToParseLambda() { 2259 assert(FormatTok->is(tok::l_square)); 2260 if (!Style.isCpp()) { 2261 nextToken(); 2262 return false; 2263 } 2264 FormatToken &LSquare = *FormatTok; 2265 if (!tryToParseLambdaIntroducer()) 2266 return false; 2267 2268 bool SeenArrow = false; 2269 bool InTemplateParameterList = false; 2270 2271 while (FormatTok->isNot(tok::l_brace)) { 2272 if (FormatTok->isSimpleTypeSpecifier()) { 2273 nextToken(); 2274 continue; 2275 } 2276 switch (FormatTok->Tok.getKind()) { 2277 case tok::l_brace: 2278 break; 2279 case tok::l_paren: 2280 parseParens(); 2281 break; 2282 case tok::l_square: 2283 parseSquare(); 2284 break; 2285 case tok::less: 2286 assert(FormatTok->Previous); 2287 if (FormatTok->Previous->is(tok::r_square)) 2288 InTemplateParameterList = true; 2289 nextToken(); 2290 break; 2291 case tok::kw_auto: 2292 case tok::kw_class: 2293 case tok::kw_template: 2294 case tok::kw_typename: 2295 case tok::amp: 2296 case tok::star: 2297 case tok::kw_const: 2298 case tok::kw_constexpr: 2299 case tok::kw_consteval: 2300 case tok::comma: 2301 case tok::greater: 2302 case tok::identifier: 2303 case tok::numeric_constant: 2304 case tok::coloncolon: 2305 case tok::kw_mutable: 2306 case tok::kw_noexcept: 2307 case tok::kw_static: 2308 nextToken(); 2309 break; 2310 // Specialization of a template with an integer parameter can contain 2311 // arithmetic, logical, comparison and ternary operators. 2312 // 2313 // FIXME: This also accepts sequences of operators that are not in the scope 2314 // of a template argument list. 2315 // 2316 // In a C++ lambda a template type can only occur after an arrow. We use 2317 // this as an heuristic to distinguish between Objective-C expressions 2318 // followed by an `a->b` expression, such as: 2319 // ([obj func:arg] + a->b) 2320 // Otherwise the code below would parse as a lambda. 2321 // 2322 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 2323 // explicit template lists: []<bool b = true && false>(U &&u){} 2324 case tok::plus: 2325 case tok::minus: 2326 case tok::exclaim: 2327 case tok::tilde: 2328 case tok::slash: 2329 case tok::percent: 2330 case tok::lessless: 2331 case tok::pipe: 2332 case tok::pipepipe: 2333 case tok::ampamp: 2334 case tok::caret: 2335 case tok::equalequal: 2336 case tok::exclaimequal: 2337 case tok::greaterequal: 2338 case tok::lessequal: 2339 case tok::question: 2340 case tok::colon: 2341 case tok::ellipsis: 2342 case tok::kw_true: 2343 case tok::kw_false: 2344 if (SeenArrow || InTemplateParameterList) { 2345 nextToken(); 2346 break; 2347 } 2348 return true; 2349 case tok::arrow: 2350 // This might or might not actually be a lambda arrow (this could be an 2351 // ObjC method invocation followed by a dereferencing arrow). We might 2352 // reset this back to TT_Unknown in TokenAnnotator. 2353 FormatTok->setFinalizedType(TT_LambdaArrow); 2354 SeenArrow = true; 2355 nextToken(); 2356 break; 2357 default: 2358 return true; 2359 } 2360 } 2361 FormatTok->setFinalizedType(TT_LambdaLBrace); 2362 LSquare.setFinalizedType(TT_LambdaLSquare); 2363 parseChildBlock(); 2364 return true; 2365 } 2366 2367 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2368 const FormatToken *Previous = FormatTok->Previous; 2369 const FormatToken *LeftSquare = FormatTok; 2370 nextToken(); 2371 if (Previous && 2372 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 2373 tok::kw_delete, tok::l_square) || 2374 LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() || 2375 Previous->isSimpleTypeSpecifier())) { 2376 return false; 2377 } 2378 if (FormatTok->is(tok::l_square)) 2379 return false; 2380 if (FormatTok->is(tok::r_square)) { 2381 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2382 if (Next->is(tok::greater)) 2383 return false; 2384 } 2385 parseSquare(/*LambdaIntroducer=*/true); 2386 return true; 2387 } 2388 2389 void UnwrappedLineParser::tryToParseJSFunction() { 2390 assert(FormatTok->is(Keywords.kw_function) || 2391 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 2392 if (FormatTok->is(Keywords.kw_async)) 2393 nextToken(); 2394 // Consume "function". 2395 nextToken(); 2396 2397 // Consume * (generator function). Treat it like C++'s overloaded operators. 2398 if (FormatTok->is(tok::star)) { 2399 FormatTok->setFinalizedType(TT_OverloadedOperator); 2400 nextToken(); 2401 } 2402 2403 // Consume function name. 2404 if (FormatTok->is(tok::identifier)) 2405 nextToken(); 2406 2407 if (FormatTok->isNot(tok::l_paren)) 2408 return; 2409 2410 // Parse formal parameter list. 2411 parseParens(); 2412 2413 if (FormatTok->is(tok::colon)) { 2414 // Parse a type definition. 2415 nextToken(); 2416 2417 // Eat the type declaration. For braced inline object types, balance braces, 2418 // otherwise just parse until finding an l_brace for the function body. 2419 if (FormatTok->is(tok::l_brace)) 2420 tryToParseBracedList(); 2421 else 2422 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2423 nextToken(); 2424 } 2425 2426 if (FormatTok->is(tok::semi)) 2427 return; 2428 2429 parseChildBlock(); 2430 } 2431 2432 bool UnwrappedLineParser::tryToParseBracedList() { 2433 if (FormatTok->is(BK_Unknown)) 2434 calculateBraceTypes(); 2435 assert(FormatTok->isNot(BK_Unknown)); 2436 if (FormatTok->is(BK_Block)) 2437 return false; 2438 nextToken(); 2439 parseBracedList(); 2440 return true; 2441 } 2442 2443 bool UnwrappedLineParser::tryToParseChildBlock() { 2444 assert(Style.isJavaScript() || Style.isCSharp()); 2445 assert(FormatTok->is(TT_FatArrow)); 2446 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2447 // They always start an expression or a child block if followed by a curly 2448 // brace. 2449 nextToken(); 2450 if (FormatTok->isNot(tok::l_brace)) 2451 return false; 2452 parseChildBlock(); 2453 return true; 2454 } 2455 2456 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2457 bool IsEnum, 2458 tok::TokenKind ClosingBraceKind) { 2459 bool HasError = false; 2460 2461 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2462 // replace this by using parseAssignmentExpression() inside. 2463 do { 2464 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2465 tryToParseChildBlock()) { 2466 continue; 2467 } 2468 if (Style.isJavaScript()) { 2469 if (FormatTok->is(Keywords.kw_function) || 2470 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2471 tryToParseJSFunction(); 2472 continue; 2473 } 2474 if (FormatTok->is(tok::l_brace)) { 2475 // Could be a method inside of a braced list `{a() { return 1; }}`. 2476 if (tryToParseBracedList()) 2477 continue; 2478 parseChildBlock(); 2479 } 2480 } 2481 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2482 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2483 addUnwrappedLine(); 2484 nextToken(); 2485 return !HasError; 2486 } 2487 switch (FormatTok->Tok.getKind()) { 2488 case tok::l_square: 2489 if (Style.isCSharp()) 2490 parseSquare(); 2491 else 2492 tryToParseLambda(); 2493 break; 2494 case tok::l_paren: 2495 parseParens(); 2496 // JavaScript can just have free standing methods and getters/setters in 2497 // object literals. Detect them by a "{" following ")". 2498 if (Style.isJavaScript()) { 2499 if (FormatTok->is(tok::l_brace)) 2500 parseChildBlock(); 2501 break; 2502 } 2503 break; 2504 case tok::l_brace: 2505 // Assume there are no blocks inside a braced init list apart 2506 // from the ones we explicitly parse out (like lambdas). 2507 FormatTok->setBlockKind(BK_BracedInit); 2508 nextToken(); 2509 parseBracedList(); 2510 break; 2511 case tok::less: 2512 if (Style.Language == FormatStyle::LK_Proto || 2513 ClosingBraceKind == tok::greater) { 2514 nextToken(); 2515 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2516 /*ClosingBraceKind=*/tok::greater); 2517 } else { 2518 nextToken(); 2519 } 2520 break; 2521 case tok::semi: 2522 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2523 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2524 // used for error recovery if we have otherwise determined that this is 2525 // a braced list. 2526 if (Style.isJavaScript()) { 2527 nextToken(); 2528 break; 2529 } 2530 HasError = true; 2531 if (!ContinueOnSemicolons) 2532 return !HasError; 2533 nextToken(); 2534 break; 2535 case tok::comma: 2536 nextToken(); 2537 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2538 addUnwrappedLine(); 2539 break; 2540 default: 2541 nextToken(); 2542 break; 2543 } 2544 } while (!eof()); 2545 return false; 2546 } 2547 2548 /// \brief Parses a pair of parentheses (and everything between them). 2549 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2550 /// double ampersands. This only counts for the current parens scope. 2551 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2552 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2553 nextToken(); 2554 do { 2555 switch (FormatTok->Tok.getKind()) { 2556 case tok::l_paren: 2557 parseParens(); 2558 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2559 parseChildBlock(); 2560 break; 2561 case tok::r_paren: 2562 nextToken(); 2563 return; 2564 case tok::r_brace: 2565 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2566 return; 2567 case tok::l_square: 2568 tryToParseLambda(); 2569 break; 2570 case tok::l_brace: 2571 if (!tryToParseBracedList()) 2572 parseChildBlock(); 2573 break; 2574 case tok::at: 2575 nextToken(); 2576 if (FormatTok->is(tok::l_brace)) { 2577 nextToken(); 2578 parseBracedList(); 2579 } 2580 break; 2581 case tok::equal: 2582 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2583 tryToParseChildBlock(); 2584 else 2585 nextToken(); 2586 break; 2587 case tok::kw_class: 2588 if (Style.isJavaScript()) 2589 parseRecord(/*ParseAsExpr=*/true); 2590 else 2591 nextToken(); 2592 break; 2593 case tok::identifier: 2594 if (Style.isJavaScript() && 2595 (FormatTok->is(Keywords.kw_function) || 2596 FormatTok->startsSequence(Keywords.kw_async, 2597 Keywords.kw_function))) { 2598 tryToParseJSFunction(); 2599 } else { 2600 nextToken(); 2601 } 2602 break; 2603 case tok::kw_requires: { 2604 auto RequiresToken = FormatTok; 2605 nextToken(); 2606 parseRequiresExpression(RequiresToken); 2607 break; 2608 } 2609 case tok::ampamp: 2610 if (AmpAmpTokenType != TT_Unknown) 2611 FormatTok->setFinalizedType(AmpAmpTokenType); 2612 [[fallthrough]]; 2613 default: 2614 nextToken(); 2615 break; 2616 } 2617 } while (!eof()); 2618 } 2619 2620 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2621 if (!LambdaIntroducer) { 2622 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2623 if (tryToParseLambda()) 2624 return; 2625 } 2626 do { 2627 switch (FormatTok->Tok.getKind()) { 2628 case tok::l_paren: 2629 parseParens(); 2630 break; 2631 case tok::r_square: 2632 nextToken(); 2633 return; 2634 case tok::r_brace: 2635 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2636 return; 2637 case tok::l_square: 2638 parseSquare(); 2639 break; 2640 case tok::l_brace: { 2641 if (!tryToParseBracedList()) 2642 parseChildBlock(); 2643 break; 2644 } 2645 case tok::at: 2646 nextToken(); 2647 if (FormatTok->is(tok::l_brace)) { 2648 nextToken(); 2649 parseBracedList(); 2650 } 2651 break; 2652 default: 2653 nextToken(); 2654 break; 2655 } 2656 } while (!eof()); 2657 } 2658 2659 void UnwrappedLineParser::keepAncestorBraces() { 2660 if (!Style.RemoveBracesLLVM) 2661 return; 2662 2663 const int MaxNestingLevels = 2; 2664 const int Size = NestedTooDeep.size(); 2665 if (Size >= MaxNestingLevels) 2666 NestedTooDeep[Size - MaxNestingLevels] = true; 2667 NestedTooDeep.push_back(false); 2668 } 2669 2670 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2671 for (const auto &Token : llvm::reverse(Line.Tokens)) 2672 if (Token.Tok->isNot(tok::comment)) 2673 return Token.Tok; 2674 2675 return nullptr; 2676 } 2677 2678 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2679 FormatToken *Tok = nullptr; 2680 2681 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2682 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2683 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2684 ? getLastNonComment(*Line) 2685 : Line->Tokens.back().Tok; 2686 assert(Tok); 2687 if (Tok->BraceCount < 0) { 2688 assert(Tok->BraceCount == -1); 2689 Tok = nullptr; 2690 } else { 2691 Tok->BraceCount = -1; 2692 } 2693 } 2694 2695 addUnwrappedLine(); 2696 ++Line->Level; 2697 parseStructuralElement(); 2698 2699 if (Tok) { 2700 assert(!Line->InPPDirective); 2701 Tok = nullptr; 2702 for (const auto &L : llvm::reverse(*CurrentLines)) { 2703 if (!L.InPPDirective && getLastNonComment(L)) { 2704 Tok = L.Tokens.back().Tok; 2705 break; 2706 } 2707 } 2708 assert(Tok); 2709 ++Tok->BraceCount; 2710 } 2711 2712 if (CheckEOF && eof()) 2713 addUnwrappedLine(); 2714 2715 --Line->Level; 2716 } 2717 2718 static void markOptionalBraces(FormatToken *LeftBrace) { 2719 if (!LeftBrace) 2720 return; 2721 2722 assert(LeftBrace->is(tok::l_brace)); 2723 2724 FormatToken *RightBrace = LeftBrace->MatchingParen; 2725 if (!RightBrace) { 2726 assert(!LeftBrace->Optional); 2727 return; 2728 } 2729 2730 assert(RightBrace->is(tok::r_brace)); 2731 assert(RightBrace->MatchingParen == LeftBrace); 2732 assert(LeftBrace->Optional == RightBrace->Optional); 2733 2734 LeftBrace->Optional = true; 2735 RightBrace->Optional = true; 2736 } 2737 2738 void UnwrappedLineParser::handleAttributes() { 2739 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2740 if (FormatTok->is(TT_AttributeMacro)) 2741 nextToken(); 2742 if (FormatTok->is(tok::l_square)) 2743 handleCppAttributes(); 2744 } 2745 2746 bool UnwrappedLineParser::handleCppAttributes() { 2747 // Handle [[likely]] / [[unlikely]] attributes. 2748 assert(FormatTok->is(tok::l_square)); 2749 if (!tryToParseSimpleAttribute()) 2750 return false; 2751 parseSquare(); 2752 return true; 2753 } 2754 2755 /// Returns whether \c Tok begins a block. 2756 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2757 // FIXME: rename the function or make 2758 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2759 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2760 : Tok.is(tok::l_brace); 2761 } 2762 2763 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2764 bool KeepBraces) { 2765 assert(FormatTok->is(tok::kw_if) && "'if' expected"); 2766 nextToken(); 2767 if (FormatTok->is(tok::exclaim)) 2768 nextToken(); 2769 2770 bool KeepIfBraces = true; 2771 if (FormatTok->is(tok::kw_consteval)) { 2772 nextToken(); 2773 } else { 2774 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2775 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2776 nextToken(); 2777 if (FormatTok->is(tok::l_paren)) 2778 parseParens(); 2779 } 2780 handleAttributes(); 2781 2782 bool NeedsUnwrappedLine = false; 2783 keepAncestorBraces(); 2784 2785 FormatToken *IfLeftBrace = nullptr; 2786 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2787 2788 if (isBlockBegin(*FormatTok)) { 2789 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2790 IfLeftBrace = FormatTok; 2791 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2792 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2793 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2794 if (Style.BraceWrapping.BeforeElse) 2795 addUnwrappedLine(); 2796 else 2797 NeedsUnwrappedLine = true; 2798 } else { 2799 parseUnbracedBody(); 2800 } 2801 2802 if (Style.RemoveBracesLLVM) { 2803 assert(!NestedTooDeep.empty()); 2804 KeepIfBraces = KeepIfBraces || 2805 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2806 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2807 IfBlockKind == IfStmtKind::IfElseIf; 2808 } 2809 2810 bool KeepElseBraces = KeepIfBraces; 2811 FormatToken *ElseLeftBrace = nullptr; 2812 IfStmtKind Kind = IfStmtKind::IfOnly; 2813 2814 if (FormatTok->is(tok::kw_else)) { 2815 if (Style.RemoveBracesLLVM) { 2816 NestedTooDeep.back() = false; 2817 Kind = IfStmtKind::IfElse; 2818 } 2819 nextToken(); 2820 handleAttributes(); 2821 if (isBlockBegin(*FormatTok)) { 2822 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2823 FormatTok->setFinalizedType(TT_ElseLBrace); 2824 ElseLeftBrace = FormatTok; 2825 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2826 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2827 FormatToken *IfLBrace = 2828 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2829 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2830 if (FormatTok->is(tok::kw_else)) { 2831 KeepElseBraces = KeepElseBraces || 2832 ElseBlockKind == IfStmtKind::IfOnly || 2833 ElseBlockKind == IfStmtKind::IfElseIf; 2834 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2835 KeepElseBraces = true; 2836 assert(ElseLeftBrace->MatchingParen); 2837 markOptionalBraces(ElseLeftBrace); 2838 } 2839 addUnwrappedLine(); 2840 } else if (FormatTok->is(tok::kw_if)) { 2841 const FormatToken *Previous = Tokens->getPreviousToken(); 2842 assert(Previous); 2843 const bool IsPrecededByComment = Previous->is(tok::comment); 2844 if (IsPrecededByComment) { 2845 addUnwrappedLine(); 2846 ++Line->Level; 2847 } 2848 bool TooDeep = true; 2849 if (Style.RemoveBracesLLVM) { 2850 Kind = IfStmtKind::IfElseIf; 2851 TooDeep = NestedTooDeep.pop_back_val(); 2852 } 2853 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2854 if (Style.RemoveBracesLLVM) 2855 NestedTooDeep.push_back(TooDeep); 2856 if (IsPrecededByComment) 2857 --Line->Level; 2858 } else { 2859 parseUnbracedBody(/*CheckEOF=*/true); 2860 } 2861 } else { 2862 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2863 if (NeedsUnwrappedLine) 2864 addUnwrappedLine(); 2865 } 2866 2867 if (!Style.RemoveBracesLLVM) 2868 return nullptr; 2869 2870 assert(!NestedTooDeep.empty()); 2871 KeepElseBraces = KeepElseBraces || 2872 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2873 NestedTooDeep.back(); 2874 2875 NestedTooDeep.pop_back(); 2876 2877 if (!KeepIfBraces && !KeepElseBraces) { 2878 markOptionalBraces(IfLeftBrace); 2879 markOptionalBraces(ElseLeftBrace); 2880 } else if (IfLeftBrace) { 2881 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2882 if (IfRightBrace) { 2883 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2884 assert(!IfLeftBrace->Optional); 2885 assert(!IfRightBrace->Optional); 2886 IfLeftBrace->MatchingParen = nullptr; 2887 IfRightBrace->MatchingParen = nullptr; 2888 } 2889 } 2890 2891 if (IfKind) 2892 *IfKind = Kind; 2893 2894 return IfLeftBrace; 2895 } 2896 2897 void UnwrappedLineParser::parseTryCatch() { 2898 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2899 nextToken(); 2900 bool NeedsUnwrappedLine = false; 2901 if (FormatTok->is(tok::colon)) { 2902 // We are in a function try block, what comes is an initializer list. 2903 nextToken(); 2904 2905 // In case identifiers were removed by clang-tidy, what might follow is 2906 // multiple commas in sequence - before the first identifier. 2907 while (FormatTok->is(tok::comma)) 2908 nextToken(); 2909 2910 while (FormatTok->is(tok::identifier)) { 2911 nextToken(); 2912 if (FormatTok->is(tok::l_paren)) 2913 parseParens(); 2914 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2915 FormatTok->is(tok::l_brace)) { 2916 do { 2917 nextToken(); 2918 } while (!FormatTok->is(tok::r_brace)); 2919 nextToken(); 2920 } 2921 2922 // In case identifiers were removed by clang-tidy, what might follow is 2923 // multiple commas in sequence - after the first identifier. 2924 while (FormatTok->is(tok::comma)) 2925 nextToken(); 2926 } 2927 } 2928 // Parse try with resource. 2929 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2930 parseParens(); 2931 2932 keepAncestorBraces(); 2933 2934 if (FormatTok->is(tok::l_brace)) { 2935 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2936 parseBlock(); 2937 if (Style.BraceWrapping.BeforeCatch) 2938 addUnwrappedLine(); 2939 else 2940 NeedsUnwrappedLine = true; 2941 } else if (!FormatTok->is(tok::kw_catch)) { 2942 // The C++ standard requires a compound-statement after a try. 2943 // If there's none, we try to assume there's a structuralElement 2944 // and try to continue. 2945 addUnwrappedLine(); 2946 ++Line->Level; 2947 parseStructuralElement(); 2948 --Line->Level; 2949 } 2950 while (true) { 2951 if (FormatTok->is(tok::at)) 2952 nextToken(); 2953 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2954 tok::kw___finally) || 2955 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2956 FormatTok->is(Keywords.kw_finally)) || 2957 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2958 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2959 break; 2960 } 2961 nextToken(); 2962 while (FormatTok->isNot(tok::l_brace)) { 2963 if (FormatTok->is(tok::l_paren)) { 2964 parseParens(); 2965 continue; 2966 } 2967 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2968 if (Style.RemoveBracesLLVM) 2969 NestedTooDeep.pop_back(); 2970 return; 2971 } 2972 nextToken(); 2973 } 2974 NeedsUnwrappedLine = false; 2975 Line->MustBeDeclaration = false; 2976 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2977 parseBlock(); 2978 if (Style.BraceWrapping.BeforeCatch) 2979 addUnwrappedLine(); 2980 else 2981 NeedsUnwrappedLine = true; 2982 } 2983 2984 if (Style.RemoveBracesLLVM) 2985 NestedTooDeep.pop_back(); 2986 2987 if (NeedsUnwrappedLine) 2988 addUnwrappedLine(); 2989 } 2990 2991 void UnwrappedLineParser::parseNamespace() { 2992 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2993 "'namespace' expected"); 2994 2995 const FormatToken &InitialToken = *FormatTok; 2996 nextToken(); 2997 if (InitialToken.is(TT_NamespaceMacro)) { 2998 parseParens(); 2999 } else { 3000 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3001 tok::l_square, tok::period, tok::l_paren) || 3002 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3003 if (FormatTok->is(tok::l_square)) 3004 parseSquare(); 3005 else if (FormatTok->is(tok::l_paren)) 3006 parseParens(); 3007 else 3008 nextToken(); 3009 } 3010 } 3011 if (FormatTok->is(tok::l_brace)) { 3012 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3013 addUnwrappedLine(); 3014 3015 unsigned AddLevels = 3016 Style.NamespaceIndentation == FormatStyle::NI_All || 3017 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3018 DeclarationScopeStack.size() > 1) 3019 ? 1u 3020 : 0u; 3021 bool ManageWhitesmithsBraces = 3022 AddLevels == 0u && 3023 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3024 3025 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3026 // the whole block. 3027 if (ManageWhitesmithsBraces) 3028 ++Line->Level; 3029 3030 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3031 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3032 ManageWhitesmithsBraces); 3033 3034 // Munch the semicolon after a namespace. This is more common than one would 3035 // think. Putting the semicolon into its own line is very ugly. 3036 if (FormatTok->is(tok::semi)) 3037 nextToken(); 3038 3039 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3040 3041 if (ManageWhitesmithsBraces) 3042 --Line->Level; 3043 } 3044 // FIXME: Add error handling. 3045 } 3046 3047 void UnwrappedLineParser::parseNew() { 3048 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3049 nextToken(); 3050 3051 if (Style.isCSharp()) { 3052 do { 3053 // Handle constructor invocation, e.g. `new(field: value)`. 3054 if (FormatTok->is(tok::l_paren)) 3055 parseParens(); 3056 3057 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3058 if (FormatTok->is(tok::l_brace)) 3059 parseBracedList(); 3060 3061 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3062 return; 3063 3064 nextToken(); 3065 } while (!eof()); 3066 } 3067 3068 if (Style.Language != FormatStyle::LK_Java) 3069 return; 3070 3071 // In Java, we can parse everything up to the parens, which aren't optional. 3072 do { 3073 // There should not be a ;, { or } before the new's open paren. 3074 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3075 return; 3076 3077 // Consume the parens. 3078 if (FormatTok->is(tok::l_paren)) { 3079 parseParens(); 3080 3081 // If there is a class body of an anonymous class, consume that as child. 3082 if (FormatTok->is(tok::l_brace)) 3083 parseChildBlock(); 3084 return; 3085 } 3086 nextToken(); 3087 } while (!eof()); 3088 } 3089 3090 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3091 keepAncestorBraces(); 3092 3093 if (isBlockBegin(*FormatTok)) { 3094 if (!KeepBraces) 3095 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3096 FormatToken *LeftBrace = FormatTok; 3097 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3098 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3099 /*MunchSemi=*/true, KeepBraces); 3100 if (!KeepBraces) { 3101 assert(!NestedTooDeep.empty()); 3102 if (!NestedTooDeep.back()) 3103 markOptionalBraces(LeftBrace); 3104 } 3105 if (WrapRightBrace) 3106 addUnwrappedLine(); 3107 } else { 3108 parseUnbracedBody(); 3109 } 3110 3111 if (!KeepBraces) 3112 NestedTooDeep.pop_back(); 3113 } 3114 3115 void UnwrappedLineParser::parseForOrWhileLoop() { 3116 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 3117 "'for', 'while' or foreach macro expected"); 3118 const bool KeepBraces = !Style.RemoveBracesLLVM || 3119 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3120 3121 nextToken(); 3122 // JS' for await ( ... 3123 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3124 nextToken(); 3125 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 3126 nextToken(); 3127 if (FormatTok->is(tok::l_paren)) 3128 parseParens(); 3129 3130 handleAttributes(); 3131 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3132 } 3133 3134 void UnwrappedLineParser::parseDoWhile() { 3135 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3136 nextToken(); 3137 3138 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3139 3140 // FIXME: Add error handling. 3141 if (!FormatTok->is(tok::kw_while)) { 3142 addUnwrappedLine(); 3143 return; 3144 } 3145 3146 // If in Whitesmiths mode, the line with the while() needs to be indented 3147 // to the same level as the block. 3148 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3149 ++Line->Level; 3150 3151 nextToken(); 3152 parseStructuralElement(); 3153 } 3154 3155 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3156 nextToken(); 3157 unsigned OldLineLevel = Line->Level; 3158 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3159 --Line->Level; 3160 if (LeftAlignLabel) 3161 Line->Level = 0; 3162 3163 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3164 FormatTok->is(tok::l_brace)) { 3165 3166 CompoundStatementIndenter Indenter(this, Line->Level, 3167 Style.BraceWrapping.AfterCaseLabel, 3168 Style.BraceWrapping.IndentBraces); 3169 parseBlock(); 3170 if (FormatTok->is(tok::kw_break)) { 3171 if (Style.BraceWrapping.AfterControlStatement == 3172 FormatStyle::BWACS_Always) { 3173 addUnwrappedLine(); 3174 if (!Style.IndentCaseBlocks && 3175 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3176 ++Line->Level; 3177 } 3178 } 3179 parseStructuralElement(); 3180 } 3181 addUnwrappedLine(); 3182 } else { 3183 if (FormatTok->is(tok::semi)) 3184 nextToken(); 3185 addUnwrappedLine(); 3186 } 3187 Line->Level = OldLineLevel; 3188 if (FormatTok->isNot(tok::l_brace)) { 3189 parseStructuralElement(); 3190 addUnwrappedLine(); 3191 } 3192 } 3193 3194 void UnwrappedLineParser::parseCaseLabel() { 3195 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3196 3197 // FIXME: fix handling of complex expressions here. 3198 do { 3199 nextToken(); 3200 } while (!eof() && !FormatTok->is(tok::colon)); 3201 parseLabel(); 3202 } 3203 3204 void UnwrappedLineParser::parseSwitch() { 3205 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3206 nextToken(); 3207 if (FormatTok->is(tok::l_paren)) 3208 parseParens(); 3209 3210 keepAncestorBraces(); 3211 3212 if (FormatTok->is(tok::l_brace)) { 3213 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3214 parseBlock(); 3215 addUnwrappedLine(); 3216 } else { 3217 addUnwrappedLine(); 3218 ++Line->Level; 3219 parseStructuralElement(); 3220 --Line->Level; 3221 } 3222 3223 if (Style.RemoveBracesLLVM) 3224 NestedTooDeep.pop_back(); 3225 } 3226 3227 // Operators that can follow a C variable. 3228 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 3229 switch (kind) { 3230 case tok::ampamp: 3231 case tok::ampequal: 3232 case tok::arrow: 3233 case tok::caret: 3234 case tok::caretequal: 3235 case tok::comma: 3236 case tok::ellipsis: 3237 case tok::equal: 3238 case tok::equalequal: 3239 case tok::exclaim: 3240 case tok::exclaimequal: 3241 case tok::greater: 3242 case tok::greaterequal: 3243 case tok::greatergreater: 3244 case tok::greatergreaterequal: 3245 case tok::l_paren: 3246 case tok::l_square: 3247 case tok::less: 3248 case tok::lessequal: 3249 case tok::lessless: 3250 case tok::lesslessequal: 3251 case tok::minus: 3252 case tok::minusequal: 3253 case tok::minusminus: 3254 case tok::percent: 3255 case tok::percentequal: 3256 case tok::period: 3257 case tok::pipe: 3258 case tok::pipeequal: 3259 case tok::pipepipe: 3260 case tok::plus: 3261 case tok::plusequal: 3262 case tok::plusplus: 3263 case tok::question: 3264 case tok::r_brace: 3265 case tok::r_paren: 3266 case tok::r_square: 3267 case tok::semi: 3268 case tok::slash: 3269 case tok::slashequal: 3270 case tok::star: 3271 case tok::starequal: 3272 return true; 3273 default: 3274 return false; 3275 } 3276 } 3277 3278 void UnwrappedLineParser::parseAccessSpecifier() { 3279 FormatToken *AccessSpecifierCandidate = FormatTok; 3280 nextToken(); 3281 // Understand Qt's slots. 3282 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3283 nextToken(); 3284 // Otherwise, we don't know what it is, and we'd better keep the next token. 3285 if (FormatTok->is(tok::colon)) { 3286 nextToken(); 3287 addUnwrappedLine(); 3288 } else if (!FormatTok->is(tok::coloncolon) && 3289 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3290 // Not a variable name nor namespace name. 3291 addUnwrappedLine(); 3292 } else if (AccessSpecifierCandidate) { 3293 // Consider the access specifier to be a C identifier. 3294 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3295 } 3296 } 3297 3298 /// \brief Parses a requires, decides if it is a clause or an expression. 3299 /// \pre The current token has to be the requires keyword. 3300 /// \returns true if it parsed a clause. 3301 bool clang::format::UnwrappedLineParser::parseRequires() { 3302 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3303 auto RequiresToken = FormatTok; 3304 3305 // We try to guess if it is a requires clause, or a requires expression. For 3306 // that we first consume the keyword and check the next token. 3307 nextToken(); 3308 3309 switch (FormatTok->Tok.getKind()) { 3310 case tok::l_brace: 3311 // This can only be an expression, never a clause. 3312 parseRequiresExpression(RequiresToken); 3313 return false; 3314 case tok::l_paren: 3315 // Clauses and expression can start with a paren, it's unclear what we have. 3316 break; 3317 default: 3318 // All other tokens can only be a clause. 3319 parseRequiresClause(RequiresToken); 3320 return true; 3321 } 3322 3323 // Looking forward we would have to decide if there are function declaration 3324 // like arguments to the requires expression: 3325 // requires (T t) { 3326 // Or there is a constraint expression for the requires clause: 3327 // requires (C<T> && ... 3328 3329 // But first let's look behind. 3330 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3331 3332 if (!PreviousNonComment || 3333 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3334 // If there is no token, or an expression left brace, we are a requires 3335 // clause within a requires expression. 3336 parseRequiresClause(RequiresToken); 3337 return true; 3338 } 3339 3340 switch (PreviousNonComment->Tok.getKind()) { 3341 case tok::greater: 3342 case tok::r_paren: 3343 case tok::kw_noexcept: 3344 case tok::kw_const: 3345 // This is a requires clause. 3346 parseRequiresClause(RequiresToken); 3347 return true; 3348 case tok::amp: 3349 case tok::ampamp: { 3350 // This can be either: 3351 // if (... && requires (T t) ...) 3352 // Or 3353 // void member(...) && requires (C<T> ... 3354 // We check the one token before that for a const: 3355 // void member(...) const && requires (C<T> ... 3356 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3357 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3358 parseRequiresClause(RequiresToken); 3359 return true; 3360 } 3361 break; 3362 } 3363 default: 3364 if (PreviousNonComment->isTypeOrIdentifier()) { 3365 // This is a requires clause. 3366 parseRequiresClause(RequiresToken); 3367 return true; 3368 } 3369 // It's an expression. 3370 parseRequiresExpression(RequiresToken); 3371 return false; 3372 } 3373 3374 // Now we look forward and try to check if the paren content is a parameter 3375 // list. The parameters can be cv-qualified and contain references or 3376 // pointers. 3377 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3378 // of stuff: typename, const, *, &, &&, ::, identifiers. 3379 3380 unsigned StoredPosition = Tokens->getPosition(); 3381 FormatToken *NextToken = Tokens->getNextToken(); 3382 int Lookahead = 0; 3383 auto PeekNext = [&Lookahead, &NextToken, this] { 3384 ++Lookahead; 3385 NextToken = Tokens->getNextToken(); 3386 }; 3387 3388 bool FoundType = false; 3389 bool LastWasColonColon = false; 3390 int OpenAngles = 0; 3391 3392 for (; Lookahead < 50; PeekNext()) { 3393 switch (NextToken->Tok.getKind()) { 3394 case tok::kw_volatile: 3395 case tok::kw_const: 3396 case tok::comma: 3397 FormatTok = Tokens->setPosition(StoredPosition); 3398 parseRequiresExpression(RequiresToken); 3399 return false; 3400 case tok::r_paren: 3401 case tok::pipepipe: 3402 FormatTok = Tokens->setPosition(StoredPosition); 3403 parseRequiresClause(RequiresToken); 3404 return true; 3405 case tok::eof: 3406 // Break out of the loop. 3407 Lookahead = 50; 3408 break; 3409 case tok::coloncolon: 3410 LastWasColonColon = true; 3411 break; 3412 case tok::identifier: 3413 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3414 FormatTok = Tokens->setPosition(StoredPosition); 3415 parseRequiresExpression(RequiresToken); 3416 return false; 3417 } 3418 FoundType = true; 3419 LastWasColonColon = false; 3420 break; 3421 case tok::less: 3422 ++OpenAngles; 3423 break; 3424 case tok::greater: 3425 --OpenAngles; 3426 break; 3427 default: 3428 if (NextToken->isSimpleTypeSpecifier()) { 3429 FormatTok = Tokens->setPosition(StoredPosition); 3430 parseRequiresExpression(RequiresToken); 3431 return false; 3432 } 3433 break; 3434 } 3435 } 3436 // This seems to be a complicated expression, just assume it's a clause. 3437 FormatTok = Tokens->setPosition(StoredPosition); 3438 parseRequiresClause(RequiresToken); 3439 return true; 3440 } 3441 3442 /// \brief Parses a requires clause. 3443 /// \param RequiresToken The requires keyword token, which starts this clause. 3444 /// \pre We need to be on the next token after the requires keyword. 3445 /// \sa parseRequiresExpression 3446 /// 3447 /// Returns if it either has finished parsing the clause, or it detects, that 3448 /// the clause is incorrect. 3449 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3450 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3451 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3452 3453 // If there is no previous token, we are within a requires expression, 3454 // otherwise we will always have the template or function declaration in front 3455 // of it. 3456 bool InRequiresExpression = 3457 !RequiresToken->Previous || 3458 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3459 3460 RequiresToken->setFinalizedType(InRequiresExpression 3461 ? TT_RequiresClauseInARequiresExpression 3462 : TT_RequiresClause); 3463 3464 // NOTE: parseConstraintExpression is only ever called from this function. 3465 // It could be inlined into here. 3466 parseConstraintExpression(); 3467 3468 if (!InRequiresExpression) 3469 FormatTok->Previous->ClosesRequiresClause = true; 3470 } 3471 3472 /// \brief Parses a requires expression. 3473 /// \param RequiresToken The requires keyword token, which starts this clause. 3474 /// \pre We need to be on the next token after the requires keyword. 3475 /// \sa parseRequiresClause 3476 /// 3477 /// Returns if it either has finished parsing the expression, or it detects, 3478 /// that the expression is incorrect. 3479 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3480 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3481 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3482 3483 RequiresToken->setFinalizedType(TT_RequiresExpression); 3484 3485 if (FormatTok->is(tok::l_paren)) { 3486 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3487 parseParens(); 3488 } 3489 3490 if (FormatTok->is(tok::l_brace)) { 3491 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3492 parseChildBlock(/*CanContainBracedList=*/false, 3493 /*NextLBracesType=*/TT_CompoundRequirementLBrace); 3494 } 3495 } 3496 3497 /// \brief Parses a constraint expression. 3498 /// 3499 /// This is the body of a requires clause. It returns, when the parsing is 3500 /// complete, or the expression is incorrect. 3501 void UnwrappedLineParser::parseConstraintExpression() { 3502 // The special handling for lambdas is needed since tryToParseLambda() eats a 3503 // token and if a requires expression is the last part of a requires clause 3504 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3505 // not set on the correct token. Thus we need to be aware if we even expect a 3506 // lambda to be possible. 3507 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3508 bool LambdaNextTimeAllowed = true; 3509 do { 3510 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3511 3512 switch (FormatTok->Tok.getKind()) { 3513 case tok::kw_requires: { 3514 auto RequiresToken = FormatTok; 3515 nextToken(); 3516 parseRequiresExpression(RequiresToken); 3517 break; 3518 } 3519 3520 case tok::l_paren: 3521 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3522 break; 3523 3524 case tok::l_square: 3525 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3526 return; 3527 break; 3528 3529 case tok::kw_const: 3530 case tok::semi: 3531 case tok::kw_class: 3532 case tok::kw_struct: 3533 case tok::kw_union: 3534 return; 3535 3536 case tok::l_brace: 3537 // Potential function body. 3538 return; 3539 3540 case tok::ampamp: 3541 case tok::pipepipe: 3542 FormatTok->setFinalizedType(TT_BinaryOperator); 3543 nextToken(); 3544 LambdaNextTimeAllowed = true; 3545 break; 3546 3547 case tok::comma: 3548 case tok::comment: 3549 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3550 nextToken(); 3551 break; 3552 3553 case tok::kw_sizeof: 3554 case tok::greater: 3555 case tok::greaterequal: 3556 case tok::greatergreater: 3557 case tok::less: 3558 case tok::lessequal: 3559 case tok::lessless: 3560 case tok::equalequal: 3561 case tok::exclaim: 3562 case tok::exclaimequal: 3563 case tok::plus: 3564 case tok::minus: 3565 case tok::star: 3566 case tok::slash: 3567 LambdaNextTimeAllowed = true; 3568 // Just eat them. 3569 nextToken(); 3570 break; 3571 3572 case tok::numeric_constant: 3573 case tok::coloncolon: 3574 case tok::kw_true: 3575 case tok::kw_false: 3576 // Just eat them. 3577 nextToken(); 3578 break; 3579 3580 case tok::kw_static_cast: 3581 case tok::kw_const_cast: 3582 case tok::kw_reinterpret_cast: 3583 case tok::kw_dynamic_cast: 3584 nextToken(); 3585 if (!FormatTok->is(tok::less)) 3586 return; 3587 3588 nextToken(); 3589 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3590 /*ClosingBraceKind=*/tok::greater); 3591 break; 3592 3593 case tok::kw_bool: 3594 // bool is only allowed if it is directly followed by a paren for a cast: 3595 // concept C = bool(...); 3596 // and bool is the only type, all other types as cast must be inside a 3597 // cast to bool an thus are handled by the other cases. 3598 if (Tokens->peekNextToken()->isNot(tok::l_paren)) 3599 return; 3600 nextToken(); 3601 parseParens(); 3602 break; 3603 3604 default: 3605 if (!FormatTok->Tok.getIdentifierInfo()) { 3606 // Identifiers are part of the default case, we check for more then 3607 // tok::identifier to handle builtin type traits. 3608 return; 3609 } 3610 3611 // We need to differentiate identifiers for a template deduction guide, 3612 // variables, or function return types (the constraint expression has 3613 // ended before that), and basically all other cases. But it's easier to 3614 // check the other way around. 3615 assert(FormatTok->Previous); 3616 switch (FormatTok->Previous->Tok.getKind()) { 3617 case tok::coloncolon: // Nested identifier. 3618 case tok::ampamp: // Start of a function or variable for the 3619 case tok::pipepipe: // constraint expression. (binary) 3620 case tok::exclaim: // The same as above, but unary. 3621 case tok::kw_requires: // Initial identifier of a requires clause. 3622 case tok::equal: // Initial identifier of a concept declaration. 3623 break; 3624 default: 3625 return; 3626 } 3627 3628 // Read identifier with optional template declaration. 3629 nextToken(); 3630 if (FormatTok->is(tok::less)) { 3631 nextToken(); 3632 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3633 /*ClosingBraceKind=*/tok::greater); 3634 } 3635 break; 3636 } 3637 } while (!eof()); 3638 } 3639 3640 bool UnwrappedLineParser::parseEnum() { 3641 const FormatToken &InitialToken = *FormatTok; 3642 3643 // Won't be 'enum' for NS_ENUMs. 3644 if (FormatTok->is(tok::kw_enum)) 3645 nextToken(); 3646 3647 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3648 // declarations. An "enum" keyword followed by a colon would be a syntax 3649 // error and thus assume it is just an identifier. 3650 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3651 return false; 3652 3653 // In protobuf, "enum" can be used as a field name. 3654 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3655 return false; 3656 3657 // Eat up enum class ... 3658 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3659 nextToken(); 3660 3661 while (FormatTok->Tok.getIdentifierInfo() || 3662 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3663 tok::greater, tok::comma, tok::question, 3664 tok::l_square, tok::r_square)) { 3665 nextToken(); 3666 // We can have macros or attributes in between 'enum' and the enum name. 3667 if (FormatTok->is(tok::l_paren)) 3668 parseParens(); 3669 if (FormatTok->is(TT_AttributeSquare)) { 3670 parseSquare(); 3671 // Consume the closing TT_AttributeSquare. 3672 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3673 nextToken(); 3674 } 3675 if (FormatTok->is(tok::identifier)) { 3676 nextToken(); 3677 // If there are two identifiers in a row, this is likely an elaborate 3678 // return type. In Java, this can be "implements", etc. 3679 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3680 return false; 3681 } 3682 } 3683 3684 // Just a declaration or something is wrong. 3685 if (FormatTok->isNot(tok::l_brace)) 3686 return true; 3687 FormatTok->setFinalizedType(TT_EnumLBrace); 3688 FormatTok->setBlockKind(BK_Block); 3689 3690 if (Style.Language == FormatStyle::LK_Java) { 3691 // Java enums are different. 3692 parseJavaEnumBody(); 3693 return true; 3694 } 3695 if (Style.Language == FormatStyle::LK_Proto) { 3696 parseBlock(/*MustBeDeclaration=*/true); 3697 return true; 3698 } 3699 3700 if (!Style.AllowShortEnumsOnASingleLine && 3701 ShouldBreakBeforeBrace(Style, InitialToken)) { 3702 addUnwrappedLine(); 3703 } 3704 // Parse enum body. 3705 nextToken(); 3706 if (!Style.AllowShortEnumsOnASingleLine) { 3707 addUnwrappedLine(); 3708 Line->Level += 1; 3709 } 3710 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 3711 /*IsEnum=*/true); 3712 if (!Style.AllowShortEnumsOnASingleLine) 3713 Line->Level -= 1; 3714 if (HasError) { 3715 if (FormatTok->is(tok::semi)) 3716 nextToken(); 3717 addUnwrappedLine(); 3718 } 3719 return true; 3720 3721 // There is no addUnwrappedLine() here so that we fall through to parsing a 3722 // structural element afterwards. Thus, in "enum A {} n, m;", 3723 // "} n, m;" will end up in one unwrapped line. 3724 } 3725 3726 bool UnwrappedLineParser::parseStructLike() { 3727 // parseRecord falls through and does not yet add an unwrapped line as a 3728 // record declaration or definition can start a structural element. 3729 parseRecord(); 3730 // This does not apply to Java, JavaScript and C#. 3731 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3732 Style.isCSharp()) { 3733 if (FormatTok->is(tok::semi)) 3734 nextToken(); 3735 addUnwrappedLine(); 3736 return true; 3737 } 3738 return false; 3739 } 3740 3741 namespace { 3742 // A class used to set and restore the Token position when peeking 3743 // ahead in the token source. 3744 class ScopedTokenPosition { 3745 unsigned StoredPosition; 3746 FormatTokenSource *Tokens; 3747 3748 public: 3749 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3750 assert(Tokens && "Tokens expected to not be null"); 3751 StoredPosition = Tokens->getPosition(); 3752 } 3753 3754 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3755 }; 3756 } // namespace 3757 3758 // Look to see if we have [[ by looking ahead, if 3759 // its not then rewind to the original position. 3760 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3761 ScopedTokenPosition AutoPosition(Tokens); 3762 FormatToken *Tok = Tokens->getNextToken(); 3763 // We already read the first [ check for the second. 3764 if (!Tok->is(tok::l_square)) 3765 return false; 3766 // Double check that the attribute is just something 3767 // fairly simple. 3768 while (Tok->isNot(tok::eof)) { 3769 if (Tok->is(tok::r_square)) 3770 break; 3771 Tok = Tokens->getNextToken(); 3772 } 3773 if (Tok->is(tok::eof)) 3774 return false; 3775 Tok = Tokens->getNextToken(); 3776 if (!Tok->is(tok::r_square)) 3777 return false; 3778 Tok = Tokens->getNextToken(); 3779 if (Tok->is(tok::semi)) 3780 return false; 3781 return true; 3782 } 3783 3784 void UnwrappedLineParser::parseJavaEnumBody() { 3785 assert(FormatTok->is(tok::l_brace)); 3786 const FormatToken *OpeningBrace = FormatTok; 3787 3788 // Determine whether the enum is simple, i.e. does not have a semicolon or 3789 // constants with class bodies. Simple enums can be formatted like braced 3790 // lists, contracted to a single line, etc. 3791 unsigned StoredPosition = Tokens->getPosition(); 3792 bool IsSimple = true; 3793 FormatToken *Tok = Tokens->getNextToken(); 3794 while (!Tok->is(tok::eof)) { 3795 if (Tok->is(tok::r_brace)) 3796 break; 3797 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3798 IsSimple = false; 3799 break; 3800 } 3801 // FIXME: This will also mark enums with braces in the arguments to enum 3802 // constants as "not simple". This is probably fine in practice, though. 3803 Tok = Tokens->getNextToken(); 3804 } 3805 FormatTok = Tokens->setPosition(StoredPosition); 3806 3807 if (IsSimple) { 3808 nextToken(); 3809 parseBracedList(); 3810 addUnwrappedLine(); 3811 return; 3812 } 3813 3814 // Parse the body of a more complex enum. 3815 // First add a line for everything up to the "{". 3816 nextToken(); 3817 addUnwrappedLine(); 3818 ++Line->Level; 3819 3820 // Parse the enum constants. 3821 while (!eof()) { 3822 if (FormatTok->is(tok::l_brace)) { 3823 // Parse the constant's class body. 3824 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3825 /*MunchSemi=*/false); 3826 } else if (FormatTok->is(tok::l_paren)) { 3827 parseParens(); 3828 } else if (FormatTok->is(tok::comma)) { 3829 nextToken(); 3830 addUnwrappedLine(); 3831 } else if (FormatTok->is(tok::semi)) { 3832 nextToken(); 3833 addUnwrappedLine(); 3834 break; 3835 } else if (FormatTok->is(tok::r_brace)) { 3836 addUnwrappedLine(); 3837 break; 3838 } else { 3839 nextToken(); 3840 } 3841 } 3842 3843 // Parse the class body after the enum's ";" if any. 3844 parseLevel(OpeningBrace); 3845 nextToken(); 3846 --Line->Level; 3847 addUnwrappedLine(); 3848 } 3849 3850 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3851 const FormatToken &InitialToken = *FormatTok; 3852 nextToken(); 3853 3854 // The actual identifier can be a nested name specifier, and in macros 3855 // it is often token-pasted. 3856 // An [[attribute]] can be before the identifier. 3857 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3858 tok::kw___attribute, tok::kw___declspec, 3859 tok::kw_alignas, tok::l_square) || 3860 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3861 FormatTok->isOneOf(tok::period, tok::comma))) { 3862 if (Style.isJavaScript() && 3863 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3864 // JavaScript/TypeScript supports inline object types in 3865 // extends/implements positions: 3866 // class Foo implements {bar: number} { } 3867 nextToken(); 3868 if (FormatTok->is(tok::l_brace)) { 3869 tryToParseBracedList(); 3870 continue; 3871 } 3872 } 3873 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 3874 continue; 3875 bool IsNonMacroIdentifier = 3876 FormatTok->is(tok::identifier) && 3877 FormatTok->TokenText != FormatTok->TokenText.upper(); 3878 nextToken(); 3879 // We can have macros in between 'class' and the class name. 3880 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren)) 3881 parseParens(); 3882 } 3883 3884 // Note that parsing away template declarations here leads to incorrectly 3885 // accepting function declarations as record declarations. 3886 // In general, we cannot solve this problem. Consider: 3887 // class A<int> B() {} 3888 // which can be a function definition or a class definition when B() is a 3889 // macro. If we find enough real-world cases where this is a problem, we 3890 // can parse for the 'template' keyword in the beginning of the statement, 3891 // and thus rule out the record production in case there is no template 3892 // (this would still leave us with an ambiguity between template function 3893 // and class declarations). 3894 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3895 do { 3896 if (FormatTok->is(tok::l_brace)) { 3897 calculateBraceTypes(/*ExpectClassBody=*/true); 3898 if (!tryToParseBracedList()) 3899 break; 3900 } 3901 if (FormatTok->is(tok::l_square)) { 3902 FormatToken *Previous = FormatTok->Previous; 3903 if (!Previous || 3904 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3905 // Don't try parsing a lambda if we had a closing parenthesis before, 3906 // it was probably a pointer to an array: int (*)[]. 3907 if (!tryToParseLambda()) 3908 break; 3909 } else { 3910 parseSquare(); 3911 continue; 3912 } 3913 } 3914 if (FormatTok->is(tok::semi)) 3915 return; 3916 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3917 addUnwrappedLine(); 3918 nextToken(); 3919 parseCSharpGenericTypeConstraint(); 3920 break; 3921 } 3922 nextToken(); 3923 } while (!eof()); 3924 } 3925 3926 auto GetBraceType = [](const FormatToken &RecordTok) { 3927 switch (RecordTok.Tok.getKind()) { 3928 case tok::kw_class: 3929 return TT_ClassLBrace; 3930 case tok::kw_struct: 3931 return TT_StructLBrace; 3932 case tok::kw_union: 3933 return TT_UnionLBrace; 3934 default: 3935 // Useful for e.g. interface. 3936 return TT_RecordLBrace; 3937 } 3938 }; 3939 if (FormatTok->is(tok::l_brace)) { 3940 FormatTok->setFinalizedType(GetBraceType(InitialToken)); 3941 if (ParseAsExpr) { 3942 parseChildBlock(); 3943 } else { 3944 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3945 addUnwrappedLine(); 3946 3947 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3948 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3949 } 3950 } 3951 // There is no addUnwrappedLine() here so that we fall through to parsing a 3952 // structural element afterwards. Thus, in "class A {} n, m;", 3953 // "} n, m;" will end up in one unwrapped line. 3954 } 3955 3956 void UnwrappedLineParser::parseObjCMethod() { 3957 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 3958 "'(' or identifier expected."); 3959 do { 3960 if (FormatTok->is(tok::semi)) { 3961 nextToken(); 3962 addUnwrappedLine(); 3963 return; 3964 } else if (FormatTok->is(tok::l_brace)) { 3965 if (Style.BraceWrapping.AfterFunction) 3966 addUnwrappedLine(); 3967 parseBlock(); 3968 addUnwrappedLine(); 3969 return; 3970 } else { 3971 nextToken(); 3972 } 3973 } while (!eof()); 3974 } 3975 3976 void UnwrappedLineParser::parseObjCProtocolList() { 3977 assert(FormatTok->is(tok::less) && "'<' expected."); 3978 do { 3979 nextToken(); 3980 // Early exit in case someone forgot a close angle. 3981 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3982 FormatTok->isObjCAtKeyword(tok::objc_end)) { 3983 return; 3984 } 3985 } while (!eof() && FormatTok->isNot(tok::greater)); 3986 nextToken(); // Skip '>'. 3987 } 3988 3989 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3990 do { 3991 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 3992 nextToken(); 3993 addUnwrappedLine(); 3994 break; 3995 } 3996 if (FormatTok->is(tok::l_brace)) { 3997 parseBlock(); 3998 // In ObjC interfaces, nothing should be following the "}". 3999 addUnwrappedLine(); 4000 } else if (FormatTok->is(tok::r_brace)) { 4001 // Ignore stray "}". parseStructuralElement doesn't consume them. 4002 nextToken(); 4003 addUnwrappedLine(); 4004 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4005 nextToken(); 4006 parseObjCMethod(); 4007 } else { 4008 parseStructuralElement(); 4009 } 4010 } while (!eof()); 4011 } 4012 4013 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4014 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4015 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4016 nextToken(); 4017 nextToken(); // interface name 4018 4019 // @interface can be followed by a lightweight generic 4020 // specialization list, then either a base class or a category. 4021 if (FormatTok->is(tok::less)) 4022 parseObjCLightweightGenerics(); 4023 if (FormatTok->is(tok::colon)) { 4024 nextToken(); 4025 nextToken(); // base class name 4026 // The base class can also have lightweight generics applied to it. 4027 if (FormatTok->is(tok::less)) 4028 parseObjCLightweightGenerics(); 4029 } else if (FormatTok->is(tok::l_paren)) { 4030 // Skip category, if present. 4031 parseParens(); 4032 } 4033 4034 if (FormatTok->is(tok::less)) 4035 parseObjCProtocolList(); 4036 4037 if (FormatTok->is(tok::l_brace)) { 4038 if (Style.BraceWrapping.AfterObjCDeclaration) 4039 addUnwrappedLine(); 4040 parseBlock(/*MustBeDeclaration=*/true); 4041 } 4042 4043 // With instance variables, this puts '}' on its own line. Without instance 4044 // variables, this ends the @interface line. 4045 addUnwrappedLine(); 4046 4047 parseObjCUntilAtEnd(); 4048 } 4049 4050 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4051 assert(FormatTok->is(tok::less)); 4052 // Unlike protocol lists, generic parameterizations support 4053 // nested angles: 4054 // 4055 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4056 // NSObject <NSCopying, NSSecureCoding> 4057 // 4058 // so we need to count how many open angles we have left. 4059 unsigned NumOpenAngles = 1; 4060 do { 4061 nextToken(); 4062 // Early exit in case someone forgot a close angle. 4063 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4064 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4065 break; 4066 } 4067 if (FormatTok->is(tok::less)) { 4068 ++NumOpenAngles; 4069 } else if (FormatTok->is(tok::greater)) { 4070 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4071 --NumOpenAngles; 4072 } 4073 } while (!eof() && NumOpenAngles != 0); 4074 nextToken(); // Skip '>'. 4075 } 4076 4077 // Returns true for the declaration/definition form of @protocol, 4078 // false for the expression form. 4079 bool UnwrappedLineParser::parseObjCProtocol() { 4080 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4081 nextToken(); 4082 4083 if (FormatTok->is(tok::l_paren)) { 4084 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4085 return false; 4086 } 4087 4088 // The definition/declaration form, 4089 // @protocol Foo 4090 // - (int)someMethod; 4091 // @end 4092 4093 nextToken(); // protocol name 4094 4095 if (FormatTok->is(tok::less)) 4096 parseObjCProtocolList(); 4097 4098 // Check for protocol declaration. 4099 if (FormatTok->is(tok::semi)) { 4100 nextToken(); 4101 addUnwrappedLine(); 4102 return true; 4103 } 4104 4105 addUnwrappedLine(); 4106 parseObjCUntilAtEnd(); 4107 return true; 4108 } 4109 4110 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4111 bool IsImport = FormatTok->is(Keywords.kw_import); 4112 assert(IsImport || FormatTok->is(tok::kw_export)); 4113 nextToken(); 4114 4115 // Consume the "default" in "export default class/function". 4116 if (FormatTok->is(tok::kw_default)) 4117 nextToken(); 4118 4119 // Consume "async function", "function" and "default function", so that these 4120 // get parsed as free-standing JS functions, i.e. do not require a trailing 4121 // semicolon. 4122 if (FormatTok->is(Keywords.kw_async)) 4123 nextToken(); 4124 if (FormatTok->is(Keywords.kw_function)) { 4125 nextToken(); 4126 return; 4127 } 4128 4129 // For imports, `export *`, `export {...}`, consume the rest of the line up 4130 // to the terminating `;`. For everything else, just return and continue 4131 // parsing the structural element, i.e. the declaration or expression for 4132 // `export default`. 4133 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4134 !FormatTok->isStringLiteral()) { 4135 return; 4136 } 4137 4138 while (!eof()) { 4139 if (FormatTok->is(tok::semi)) 4140 return; 4141 if (Line->Tokens.empty()) { 4142 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4143 // import statement should terminate. 4144 return; 4145 } 4146 if (FormatTok->is(tok::l_brace)) { 4147 FormatTok->setBlockKind(BK_Block); 4148 nextToken(); 4149 parseBracedList(); 4150 } else { 4151 nextToken(); 4152 } 4153 } 4154 } 4155 4156 void UnwrappedLineParser::parseStatementMacro() { 4157 nextToken(); 4158 if (FormatTok->is(tok::l_paren)) 4159 parseParens(); 4160 if (FormatTok->is(tok::semi)) 4161 nextToken(); 4162 addUnwrappedLine(); 4163 } 4164 4165 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4166 // consume things like a::`b.c[d:e] or a::* 4167 while (true) { 4168 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4169 tok::coloncolon, tok::hash) || 4170 Keywords.isVerilogIdentifier(*FormatTok)) { 4171 nextToken(); 4172 } else if (FormatTok->is(tok::l_square)) { 4173 parseSquare(); 4174 } else { 4175 break; 4176 } 4177 } 4178 } 4179 4180 void UnwrappedLineParser::parseVerilogSensitivityList() { 4181 if (!FormatTok->is(tok::at)) 4182 return; 4183 nextToken(); 4184 // A block event expression has 2 at signs. 4185 if (FormatTok->is(tok::at)) 4186 nextToken(); 4187 switch (FormatTok->Tok.getKind()) { 4188 case tok::star: 4189 nextToken(); 4190 break; 4191 case tok::l_paren: 4192 parseParens(); 4193 break; 4194 default: 4195 parseVerilogHierarchyIdentifier(); 4196 break; 4197 } 4198 } 4199 4200 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4201 unsigned AddLevels = 0; 4202 4203 if (FormatTok->is(Keywords.kw_clocking)) { 4204 nextToken(); 4205 if (Keywords.isVerilogIdentifier(*FormatTok)) 4206 nextToken(); 4207 parseVerilogSensitivityList(); 4208 if (FormatTok->is(tok::semi)) 4209 nextToken(); 4210 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4211 Keywords.kw_casez, Keywords.kw_randcase, 4212 Keywords.kw_randsequence)) { 4213 if (Style.IndentCaseLabels) 4214 AddLevels++; 4215 nextToken(); 4216 if (FormatTok->is(tok::l_paren)) { 4217 FormatTok->setFinalizedType(TT_ConditionLParen); 4218 parseParens(); 4219 } 4220 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4221 nextToken(); 4222 // The case header has no semicolon. 4223 } else { 4224 // "module" etc. 4225 nextToken(); 4226 // all the words like the name of the module and specifiers like 4227 // "automatic" and the width of function return type 4228 while (true) { 4229 if (FormatTok->is(tok::l_square)) { 4230 auto Prev = FormatTok->getPreviousNonComment(); 4231 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4232 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4233 parseSquare(); 4234 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4235 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4236 nextToken(); 4237 } else { 4238 break; 4239 } 4240 } 4241 4242 auto NewLine = [this]() { 4243 addUnwrappedLine(); 4244 Line->IsContinuation = true; 4245 }; 4246 4247 // package imports 4248 while (FormatTok->is(Keywords.kw_import)) { 4249 NewLine(); 4250 nextToken(); 4251 parseVerilogHierarchyIdentifier(); 4252 if (FormatTok->is(tok::semi)) 4253 nextToken(); 4254 } 4255 4256 // parameters and ports 4257 if (FormatTok->is(Keywords.kw_verilogHash)) { 4258 NewLine(); 4259 nextToken(); 4260 if (FormatTok->is(tok::l_paren)) 4261 parseParens(); 4262 } 4263 if (FormatTok->is(tok::l_paren)) { 4264 NewLine(); 4265 parseParens(); 4266 } 4267 4268 // extends and implements 4269 if (FormatTok->is(Keywords.kw_extends)) { 4270 NewLine(); 4271 nextToken(); 4272 parseVerilogHierarchyIdentifier(); 4273 if (FormatTok->is(tok::l_paren)) 4274 parseParens(); 4275 } 4276 if (FormatTok->is(Keywords.kw_implements)) { 4277 NewLine(); 4278 do { 4279 nextToken(); 4280 parseVerilogHierarchyIdentifier(); 4281 } while (FormatTok->is(tok::comma)); 4282 } 4283 4284 // Coverage event for cover groups. 4285 if (FormatTok->is(tok::at)) { 4286 NewLine(); 4287 parseVerilogSensitivityList(); 4288 } 4289 4290 if (FormatTok->is(tok::semi)) 4291 nextToken(/*LevelDifference=*/1); 4292 addUnwrappedLine(); 4293 } 4294 4295 return AddLevels; 4296 } 4297 4298 void UnwrappedLineParser::parseVerilogTable() { 4299 assert(FormatTok->is(Keywords.kw_table)); 4300 nextToken(/*LevelDifference=*/1); 4301 addUnwrappedLine(); 4302 4303 auto InitialLevel = Line->Level++; 4304 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4305 FormatToken *Tok = FormatTok; 4306 nextToken(); 4307 if (Tok->is(tok::semi)) 4308 addUnwrappedLine(); 4309 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4310 Tok->setFinalizedType(TT_VerilogTableItem); 4311 } 4312 Line->Level = InitialLevel; 4313 nextToken(/*LevelDifference=*/-1); 4314 addUnwrappedLine(); 4315 } 4316 4317 void UnwrappedLineParser::parseVerilogCaseLabel() { 4318 // The label will get unindented in AnnotatingParser. If there are no leading 4319 // spaces, indent the rest here so that things inside the block will be 4320 // indented relative to things outside. We don't use parseLabel because we 4321 // don't know whether this colon is a label or a ternary expression at this 4322 // point. 4323 auto OrigLevel = Line->Level; 4324 auto FirstLine = CurrentLines->size(); 4325 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4326 ++Line->Level; 4327 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4328 --Line->Level; 4329 parseStructuralElement(); 4330 // Restore the indentation in both the new line and the line that has the 4331 // label. 4332 if (CurrentLines->size() > FirstLine) 4333 (*CurrentLines)[FirstLine].Level = OrigLevel; 4334 Line->Level = OrigLevel; 4335 } 4336 4337 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4338 if (Line->Tokens.empty()) 4339 return; 4340 LLVM_DEBUG({ 4341 if (CurrentLines == &Lines) 4342 printDebugInfo(*Line); 4343 }); 4344 4345 // If this line closes a block when in Whitesmiths mode, remember that 4346 // information so that the level can be decreased after the line is added. 4347 // This has to happen after the addition of the line since the line itself 4348 // needs to be indented. 4349 bool ClosesWhitesmithsBlock = 4350 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4351 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4352 4353 CurrentLines->push_back(std::move(*Line)); 4354 Line->Tokens.clear(); 4355 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4356 Line->FirstStartColumn = 0; 4357 Line->IsContinuation = false; 4358 4359 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4360 --Line->Level; 4361 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 4362 CurrentLines->append( 4363 std::make_move_iterator(PreprocessorDirectives.begin()), 4364 std::make_move_iterator(PreprocessorDirectives.end())); 4365 PreprocessorDirectives.clear(); 4366 } 4367 // Disconnect the current token from the last token on the previous line. 4368 FormatTok->Previous = nullptr; 4369 } 4370 4371 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4372 4373 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4374 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4375 FormatTok.NewlinesBefore > 0; 4376 } 4377 4378 // Checks if \p FormatTok is a line comment that continues the line comment 4379 // section on \p Line. 4380 static bool 4381 continuesLineCommentSection(const FormatToken &FormatTok, 4382 const UnwrappedLine &Line, 4383 const llvm::Regex &CommentPragmasRegex) { 4384 if (Line.Tokens.empty()) 4385 return false; 4386 4387 StringRef IndentContent = FormatTok.TokenText; 4388 if (FormatTok.TokenText.startswith("//") || 4389 FormatTok.TokenText.startswith("/*")) { 4390 IndentContent = FormatTok.TokenText.substr(2); 4391 } 4392 if (CommentPragmasRegex.match(IndentContent)) 4393 return false; 4394 4395 // If Line starts with a line comment, then FormatTok continues the comment 4396 // section if its original column is greater or equal to the original start 4397 // column of the line. 4398 // 4399 // Define the min column token of a line as follows: if a line ends in '{' or 4400 // contains a '{' followed by a line comment, then the min column token is 4401 // that '{'. Otherwise, the min column token of the line is the first token of 4402 // the line. 4403 // 4404 // If Line starts with a token other than a line comment, then FormatTok 4405 // continues the comment section if its original column is greater than the 4406 // original start column of the min column token of the line. 4407 // 4408 // For example, the second line comment continues the first in these cases: 4409 // 4410 // // first line 4411 // // second line 4412 // 4413 // and: 4414 // 4415 // // first line 4416 // // second line 4417 // 4418 // and: 4419 // 4420 // int i; // first line 4421 // // second line 4422 // 4423 // and: 4424 // 4425 // do { // first line 4426 // // second line 4427 // int i; 4428 // } while (true); 4429 // 4430 // and: 4431 // 4432 // enum { 4433 // a, // first line 4434 // // second line 4435 // b 4436 // }; 4437 // 4438 // The second line comment doesn't continue the first in these cases: 4439 // 4440 // // first line 4441 // // second line 4442 // 4443 // and: 4444 // 4445 // int i; // first line 4446 // // second line 4447 // 4448 // and: 4449 // 4450 // do { // first line 4451 // // second line 4452 // int i; 4453 // } while (true); 4454 // 4455 // and: 4456 // 4457 // enum { 4458 // a, // first line 4459 // // second line 4460 // }; 4461 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4462 4463 // Scan for '{//'. If found, use the column of '{' as a min column for line 4464 // comment section continuation. 4465 const FormatToken *PreviousToken = nullptr; 4466 for (const UnwrappedLineNode &Node : Line.Tokens) { 4467 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4468 isLineComment(*Node.Tok)) { 4469 MinColumnToken = PreviousToken; 4470 break; 4471 } 4472 PreviousToken = Node.Tok; 4473 4474 // Grab the last newline preceding a token in this unwrapped line. 4475 if (Node.Tok->NewlinesBefore > 0) 4476 MinColumnToken = Node.Tok; 4477 } 4478 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4479 MinColumnToken = PreviousToken; 4480 4481 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4482 MinColumnToken); 4483 } 4484 4485 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4486 bool JustComments = Line->Tokens.empty(); 4487 for (FormatToken *Tok : CommentsBeforeNextToken) { 4488 // Line comments that belong to the same line comment section are put on the 4489 // same line since later we might want to reflow content between them. 4490 // Additional fine-grained breaking of line comment sections is controlled 4491 // by the class BreakableLineCommentSection in case it is desirable to keep 4492 // several line comment sections in the same unwrapped line. 4493 // 4494 // FIXME: Consider putting separate line comment sections as children to the 4495 // unwrapped line instead. 4496 Tok->ContinuesLineCommentSection = 4497 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4498 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4499 addUnwrappedLine(); 4500 pushToken(Tok); 4501 } 4502 if (NewlineBeforeNext && JustComments) 4503 addUnwrappedLine(); 4504 CommentsBeforeNextToken.clear(); 4505 } 4506 4507 void UnwrappedLineParser::nextToken(int LevelDifference) { 4508 if (eof()) 4509 return; 4510 flushComments(isOnNewLine(*FormatTok)); 4511 pushToken(FormatTok); 4512 FormatToken *Previous = FormatTok; 4513 if (!Style.isJavaScript()) 4514 readToken(LevelDifference); 4515 else 4516 readTokenWithJavaScriptASI(); 4517 FormatTok->Previous = Previous; 4518 if (Style.isVerilog()) { 4519 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4520 // keywords like `begin`, we can't treat them the same as left braces 4521 // because some contexts require one of them. For example structs use 4522 // braces and if blocks use keywords, and a left brace can occur in an if 4523 // statement, but it is not a block. For keywords like `end`, we simply 4524 // treat them the same as right braces. 4525 if (Keywords.isVerilogEnd(*FormatTok)) 4526 FormatTok->Tok.setKind(tok::r_brace); 4527 } 4528 } 4529 4530 void UnwrappedLineParser::distributeComments( 4531 const SmallVectorImpl<FormatToken *> &Comments, 4532 const FormatToken *NextTok) { 4533 // Whether or not a line comment token continues a line is controlled by 4534 // the method continuesLineCommentSection, with the following caveat: 4535 // 4536 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4537 // that each comment line from the trail is aligned with the next token, if 4538 // the next token exists. If a trail exists, the beginning of the maximal 4539 // trail is marked as a start of a new comment section. 4540 // 4541 // For example in this code: 4542 // 4543 // int a; // line about a 4544 // // line 1 about b 4545 // // line 2 about b 4546 // int b; 4547 // 4548 // the two lines about b form a maximal trail, so there are two sections, the 4549 // first one consisting of the single comment "// line about a" and the 4550 // second one consisting of the next two comments. 4551 if (Comments.empty()) 4552 return; 4553 bool ShouldPushCommentsInCurrentLine = true; 4554 bool HasTrailAlignedWithNextToken = false; 4555 unsigned StartOfTrailAlignedWithNextToken = 0; 4556 if (NextTok) { 4557 // We are skipping the first element intentionally. 4558 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4559 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4560 HasTrailAlignedWithNextToken = true; 4561 StartOfTrailAlignedWithNextToken = i; 4562 } 4563 } 4564 } 4565 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4566 FormatToken *FormatTok = Comments[i]; 4567 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4568 FormatTok->ContinuesLineCommentSection = false; 4569 } else { 4570 FormatTok->ContinuesLineCommentSection = 4571 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4572 } 4573 if (!FormatTok->ContinuesLineCommentSection && 4574 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4575 ShouldPushCommentsInCurrentLine = false; 4576 } 4577 if (ShouldPushCommentsInCurrentLine) 4578 pushToken(FormatTok); 4579 else 4580 CommentsBeforeNextToken.push_back(FormatTok); 4581 } 4582 } 4583 4584 void UnwrappedLineParser::readToken(int LevelDifference) { 4585 SmallVector<FormatToken *, 1> Comments; 4586 bool PreviousWasComment = false; 4587 bool FirstNonCommentOnLine = false; 4588 do { 4589 FormatTok = Tokens->getNextToken(); 4590 assert(FormatTok); 4591 while (FormatTok->getType() == TT_ConflictStart || 4592 FormatTok->getType() == TT_ConflictEnd || 4593 FormatTok->getType() == TT_ConflictAlternative) { 4594 if (FormatTok->getType() == TT_ConflictStart) 4595 conditionalCompilationStart(/*Unreachable=*/false); 4596 else if (FormatTok->getType() == TT_ConflictAlternative) 4597 conditionalCompilationAlternative(); 4598 else if (FormatTok->getType() == TT_ConflictEnd) 4599 conditionalCompilationEnd(); 4600 FormatTok = Tokens->getNextToken(); 4601 FormatTok->MustBreakBefore = true; 4602 } 4603 4604 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4605 const FormatToken &Tok, 4606 bool PreviousWasComment) { 4607 auto IsFirstOnLine = [](const FormatToken &Tok) { 4608 return Tok.HasUnescapedNewline || Tok.IsFirst; 4609 }; 4610 4611 // Consider preprocessor directives preceded by block comments as first 4612 // on line. 4613 if (PreviousWasComment) 4614 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4615 return IsFirstOnLine(Tok); 4616 }; 4617 4618 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4619 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4620 PreviousWasComment = FormatTok->is(tok::comment); 4621 4622 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4623 (!Style.isVerilog() || 4624 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4625 FirstNonCommentOnLine) { 4626 distributeComments(Comments, FormatTok); 4627 Comments.clear(); 4628 // If there is an unfinished unwrapped line, we flush the preprocessor 4629 // directives only after that unwrapped line was finished later. 4630 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4631 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4632 assert((LevelDifference >= 0 || 4633 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4634 "LevelDifference makes Line->Level negative"); 4635 Line->Level += LevelDifference; 4636 // Comments stored before the preprocessor directive need to be output 4637 // before the preprocessor directive, at the same level as the 4638 // preprocessor directive, as we consider them to apply to the directive. 4639 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4640 PPBranchLevel > 0) { 4641 Line->Level += PPBranchLevel; 4642 } 4643 flushComments(isOnNewLine(*FormatTok)); 4644 parsePPDirective(); 4645 PreviousWasComment = FormatTok->is(tok::comment); 4646 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4647 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4648 } 4649 4650 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4651 !Line->InPPDirective) { 4652 continue; 4653 } 4654 4655 if (!FormatTok->is(tok::comment)) { 4656 distributeComments(Comments, FormatTok); 4657 Comments.clear(); 4658 return; 4659 } 4660 4661 Comments.push_back(FormatTok); 4662 } while (!eof()); 4663 4664 distributeComments(Comments, nullptr); 4665 Comments.clear(); 4666 } 4667 4668 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4669 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4670 if (MustBreakBeforeNextToken) { 4671 Line->Tokens.back().Tok->MustBreakBefore = true; 4672 MustBreakBeforeNextToken = false; 4673 } 4674 } 4675 4676 } // end namespace format 4677 } // end namespace clang 4678