1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 24 #define DEBUG_TYPE "format-parser" 25 26 namespace clang { 27 namespace format { 28 29 class FormatTokenSource { 30 public: 31 virtual ~FormatTokenSource() {} 32 33 // Returns the next token in the token stream. 34 virtual FormatToken *getNextToken() = 0; 35 36 // Returns the token preceding the token returned by the last call to 37 // getNextToken() in the token stream, or nullptr if no such token exists. 38 virtual FormatToken *getPreviousToken() = 0; 39 40 // Returns the token that would be returned by the next call to 41 // getNextToken(). 42 virtual FormatToken *peekNextToken() = 0; 43 44 // Returns whether we are at the end of the file. 45 // This can be different from whether getNextToken() returned an eof token 46 // when the FormatTokenSource is a view on a part of the token stream. 47 virtual bool isEOF() = 0; 48 49 // Gets the current position in the token stream, to be used by setPosition(). 50 virtual unsigned getPosition() = 0; 51 52 // Resets the token stream to the state it was in when getPosition() returned 53 // Position, and return the token at that position in the stream. 54 virtual FormatToken *setPosition(unsigned Position) = 0; 55 }; 56 57 namespace { 58 59 class ScopedDeclarationState { 60 public: 61 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 62 bool MustBeDeclaration) 63 : Line(Line), Stack(Stack) { 64 Line.MustBeDeclaration = MustBeDeclaration; 65 Stack.push_back(MustBeDeclaration); 66 } 67 ~ScopedDeclarationState() { 68 Stack.pop_back(); 69 if (!Stack.empty()) 70 Line.MustBeDeclaration = Stack.back(); 71 else 72 Line.MustBeDeclaration = true; 73 } 74 75 private: 76 UnwrappedLine &Line; 77 llvm::BitVector &Stack; 78 }; 79 80 static bool isLineComment(const FormatToken &FormatTok) { 81 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 82 } 83 84 // Checks if \p FormatTok is a line comment that continues the line comment 85 // \p Previous. The original column of \p MinColumnToken is used to determine 86 // whether \p FormatTok is indented enough to the right to continue \p Previous. 87 static bool continuesLineComment(const FormatToken &FormatTok, 88 const FormatToken *Previous, 89 const FormatToken *MinColumnToken) { 90 if (!Previous || !MinColumnToken) 91 return false; 92 unsigned MinContinueColumn = 93 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 94 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 95 isLineComment(*Previous) && 96 FormatTok.OriginalColumn >= MinContinueColumn; 97 } 98 99 class ScopedMacroState : public FormatTokenSource { 100 public: 101 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 102 FormatToken *&ResetToken) 103 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 104 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 105 Token(nullptr), PreviousToken(nullptr) { 106 FakeEOF.Tok.startToken(); 107 FakeEOF.Tok.setKind(tok::eof); 108 TokenSource = this; 109 Line.Level = 0; 110 Line.InPPDirective = true; 111 } 112 113 ~ScopedMacroState() override { 114 TokenSource = PreviousTokenSource; 115 ResetToken = Token; 116 Line.InPPDirective = false; 117 Line.Level = PreviousLineLevel; 118 } 119 120 FormatToken *getNextToken() override { 121 // The \c UnwrappedLineParser guards against this by never calling 122 // \c getNextToken() after it has encountered the first eof token. 123 assert(!eof()); 124 PreviousToken = Token; 125 Token = PreviousTokenSource->getNextToken(); 126 if (eof()) 127 return &FakeEOF; 128 return Token; 129 } 130 131 FormatToken *getPreviousToken() override { 132 return PreviousTokenSource->getPreviousToken(); 133 } 134 135 FormatToken *peekNextToken() override { 136 if (eof()) 137 return &FakeEOF; 138 return PreviousTokenSource->peekNextToken(); 139 } 140 141 bool isEOF() override { return PreviousTokenSource->isEOF(); } 142 143 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 144 145 FormatToken *setPosition(unsigned Position) override { 146 PreviousToken = nullptr; 147 Token = PreviousTokenSource->setPosition(Position); 148 return Token; 149 } 150 151 private: 152 bool eof() { 153 return Token && Token->HasUnescapedNewline && 154 !continuesLineComment(*Token, PreviousToken, 155 /*MinColumnToken=*/PreviousToken); 156 } 157 158 FormatToken FakeEOF; 159 UnwrappedLine &Line; 160 FormatTokenSource *&TokenSource; 161 FormatToken *&ResetToken; 162 unsigned PreviousLineLevel; 163 FormatTokenSource *PreviousTokenSource; 164 165 FormatToken *Token; 166 FormatToken *PreviousToken; 167 }; 168 169 } // end anonymous namespace 170 171 class ScopedLineState { 172 public: 173 ScopedLineState(UnwrappedLineParser &Parser, 174 bool SwitchToPreprocessorLines = false) 175 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 176 if (SwitchToPreprocessorLines) 177 Parser.CurrentLines = &Parser.PreprocessorDirectives; 178 else if (!Parser.Line->Tokens.empty()) 179 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 180 PreBlockLine = std::move(Parser.Line); 181 Parser.Line = std::make_unique<UnwrappedLine>(); 182 Parser.Line->Level = PreBlockLine->Level; 183 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 184 } 185 186 ~ScopedLineState() { 187 if (!Parser.Line->Tokens.empty()) { 188 Parser.addUnwrappedLine(); 189 } 190 assert(Parser.Line->Tokens.empty()); 191 Parser.Line = std::move(PreBlockLine); 192 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 193 Parser.MustBreakBeforeNextToken = true; 194 Parser.CurrentLines = OriginalLines; 195 } 196 197 private: 198 UnwrappedLineParser &Parser; 199 200 std::unique_ptr<UnwrappedLine> PreBlockLine; 201 SmallVectorImpl<UnwrappedLine> *OriginalLines; 202 }; 203 204 class CompoundStatementIndenter { 205 public: 206 CompoundStatementIndenter(UnwrappedLineParser *Parser, 207 const FormatStyle &Style, unsigned &LineLevel) 208 : CompoundStatementIndenter(Parser, LineLevel, 209 Style.BraceWrapping.AfterControlStatement, 210 Style.BraceWrapping.IndentBraces) {} 211 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 212 bool WrapBrace, bool IndentBrace) 213 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 214 if (WrapBrace) 215 Parser->addUnwrappedLine(); 216 if (IndentBrace) 217 ++LineLevel; 218 } 219 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 220 221 private: 222 unsigned &LineLevel; 223 unsigned OldLineLevel; 224 }; 225 226 namespace { 227 228 class IndexedTokenSource : public FormatTokenSource { 229 public: 230 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 231 : Tokens(Tokens), Position(-1) {} 232 233 FormatToken *getNextToken() override { 234 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 235 LLVM_DEBUG({ 236 llvm::dbgs() << "Next "; 237 dbgToken(Position); 238 }); 239 return Tokens[Position]; 240 } 241 ++Position; 242 LLVM_DEBUG({ 243 llvm::dbgs() << "Next "; 244 dbgToken(Position); 245 }); 246 return Tokens[Position]; 247 } 248 249 FormatToken *getPreviousToken() override { 250 return Position > 0 ? Tokens[Position - 1] : nullptr; 251 } 252 253 FormatToken *peekNextToken() override { 254 int Next = Position + 1; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Peeking "; 257 dbgToken(Next); 258 }); 259 return Tokens[Next]; 260 } 261 262 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 263 264 unsigned getPosition() override { 265 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 266 assert(Position >= 0); 267 return Position; 268 } 269 270 FormatToken *setPosition(unsigned P) override { 271 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 272 Position = P; 273 return Tokens[Position]; 274 } 275 276 void reset() { Position = -1; } 277 278 private: 279 void dbgToken(int Position, llvm::StringRef Indent = "") { 280 FormatToken *Tok = Tokens[Position]; 281 llvm::dbgs() << Indent << "[" << Position 282 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 283 << ", Macro: " << !!Tok->MacroCtx << "\n"; 284 } 285 286 ArrayRef<FormatToken *> Tokens; 287 int Position; 288 }; 289 290 } // end anonymous namespace 291 292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 293 const AdditionalKeywords &Keywords, 294 unsigned FirstStartColumn, 295 ArrayRef<FormatToken *> Tokens, 296 UnwrappedLineConsumer &Callback) 297 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 298 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 299 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 300 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 301 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 302 ? IG_Rejected 303 : IG_Inited), 304 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 305 306 void UnwrappedLineParser::reset() { 307 PPBranchLevel = -1; 308 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 309 ? IG_Rejected 310 : IG_Inited; 311 IncludeGuardToken = nullptr; 312 Line.reset(new UnwrappedLine); 313 CommentsBeforeNextToken.clear(); 314 FormatTok = nullptr; 315 MustBreakBeforeNextToken = false; 316 PreprocessorDirectives.clear(); 317 CurrentLines = &Lines; 318 DeclarationScopeStack.clear(); 319 NestedTooDeep.clear(); 320 PPStack.clear(); 321 Line->FirstStartColumn = FirstStartColumn; 322 } 323 324 void UnwrappedLineParser::parse() { 325 IndexedTokenSource TokenSource(AllTokens); 326 Line->FirstStartColumn = FirstStartColumn; 327 do { 328 LLVM_DEBUG(llvm::dbgs() << "----\n"); 329 reset(); 330 Tokens = &TokenSource; 331 TokenSource.reset(); 332 333 readToken(); 334 parseFile(); 335 336 // If we found an include guard then all preprocessor directives (other than 337 // the guard) are over-indented by one. 338 if (IncludeGuard == IG_Found) 339 for (auto &Line : Lines) 340 if (Line.InPPDirective && Line.Level > 0) 341 --Line.Level; 342 343 // Create line with eof token. 344 pushToken(FormatTok); 345 addUnwrappedLine(); 346 347 for (const UnwrappedLine &Line : Lines) 348 Callback.consumeUnwrappedLine(Line); 349 350 Callback.finishRun(); 351 Lines.clear(); 352 while (!PPLevelBranchIndex.empty() && 353 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 354 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 355 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 356 } 357 if (!PPLevelBranchIndex.empty()) { 358 ++PPLevelBranchIndex.back(); 359 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 360 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 361 } 362 } while (!PPLevelBranchIndex.empty()); 363 } 364 365 void UnwrappedLineParser::parseFile() { 366 // The top-level context in a file always has declarations, except for pre- 367 // processor directives and JavaScript files. 368 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 369 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 370 MustBeDeclaration); 371 if (Style.Language == FormatStyle::LK_TextProto) 372 parseBracedList(); 373 else 374 parseLevel(/*HasOpeningBrace=*/false); 375 // Make sure to format the remaining tokens. 376 // 377 // LK_TextProto is special since its top-level is parsed as the body of a 378 // braced list, which does not necessarily have natural line separators such 379 // as a semicolon. Comments after the last entry that have been determined to 380 // not belong to that line, as in: 381 // key: value 382 // // endfile comment 383 // do not have a chance to be put on a line of their own until this point. 384 // Here we add this newline before end-of-file comments. 385 if (Style.Language == FormatStyle::LK_TextProto && 386 !CommentsBeforeNextToken.empty()) 387 addUnwrappedLine(); 388 flushComments(true); 389 addUnwrappedLine(); 390 } 391 392 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 393 do { 394 switch (FormatTok->Tok.getKind()) { 395 case tok::l_brace: 396 return; 397 default: 398 if (FormatTok->is(Keywords.kw_where)) { 399 addUnwrappedLine(); 400 nextToken(); 401 parseCSharpGenericTypeConstraint(); 402 break; 403 } 404 nextToken(); 405 break; 406 } 407 } while (!eof()); 408 } 409 410 void UnwrappedLineParser::parseCSharpAttribute() { 411 int UnpairedSquareBrackets = 1; 412 do { 413 switch (FormatTok->Tok.getKind()) { 414 case tok::r_square: 415 nextToken(); 416 --UnpairedSquareBrackets; 417 if (UnpairedSquareBrackets == 0) { 418 addUnwrappedLine(); 419 return; 420 } 421 break; 422 case tok::l_square: 423 ++UnpairedSquareBrackets; 424 nextToken(); 425 break; 426 default: 427 nextToken(); 428 break; 429 } 430 } while (!eof()); 431 } 432 433 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 434 if (!Lines.empty() && Lines.back().InPPDirective) 435 return true; 436 437 const FormatToken *Previous = Tokens->getPreviousToken(); 438 return Previous && Previous->is(tok::comment) && 439 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 440 } 441 442 bool UnwrappedLineParser::mightFitOnOneLine() const { 443 const auto ColumnLimit = Style.ColumnLimit; 444 if (ColumnLimit == 0) 445 return true; 446 447 if (Lines.empty()) 448 return true; 449 450 const auto &PreviousLine = Lines.back(); 451 const auto &Tokens = PreviousLine.Tokens; 452 assert(!Tokens.empty()); 453 const auto *LastToken = Tokens.back().Tok; 454 assert(LastToken); 455 if (!LastToken->isOneOf(tok::semi, tok::comment)) 456 return true; 457 458 AnnotatedLine Line(PreviousLine); 459 assert(Line.Last == LastToken); 460 461 TokenAnnotator Annotator(Style, Keywords); 462 Annotator.annotate(Line); 463 Annotator.calculateFormattingInformation(Line); 464 465 return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit; 466 } 467 468 // Returns true if a simple block, or false otherwise. (A simple block has a 469 // single statement that fits on a single line.) 470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) { 471 const bool IsPrecededByCommentOrPPDirective = 472 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 473 unsigned StatementCount = 0; 474 bool SwitchLabelEncountered = false; 475 do { 476 tok::TokenKind kind = FormatTok->Tok.getKind(); 477 if (FormatTok->getType() == TT_MacroBlockBegin) { 478 kind = tok::l_brace; 479 } else if (FormatTok->getType() == TT_MacroBlockEnd) { 480 kind = tok::r_brace; 481 } 482 483 switch (kind) { 484 case tok::comment: 485 nextToken(); 486 addUnwrappedLine(); 487 break; 488 case tok::l_brace: 489 // FIXME: Add parameter whether this can happen - if this happens, we must 490 // be in a non-declaration context. 491 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 492 continue; 493 parseBlock(); 494 ++StatementCount; 495 assert(StatementCount > 0 && "StatementCount overflow!"); 496 addUnwrappedLine(); 497 break; 498 case tok::r_brace: 499 if (HasOpeningBrace) { 500 if (!Style.RemoveBracesLLVM) 501 return false; 502 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || 503 IsPrecededByCommentOrPPDirective || 504 precededByCommentOrPPDirective()) { 505 return false; 506 } 507 const FormatToken *Next = Tokens->peekNextToken(); 508 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 509 return false; 510 return mightFitOnOneLine(); 511 } 512 nextToken(); 513 addUnwrappedLine(); 514 break; 515 case tok::kw_default: { 516 unsigned StoredPosition = Tokens->getPosition(); 517 FormatToken *Next; 518 do { 519 Next = Tokens->getNextToken(); 520 } while (Next->is(tok::comment)); 521 FormatTok = Tokens->setPosition(StoredPosition); 522 if (Next && Next->isNot(tok::colon)) { 523 // default not followed by ':' is not a case label; treat it like 524 // an identifier. 525 parseStructuralElement(); 526 break; 527 } 528 // Else, if it is 'default:', fall through to the case handling. 529 LLVM_FALLTHROUGH; 530 } 531 case tok::kw_case: 532 if (Style.isJavaScript() && Line->MustBeDeclaration) { 533 // A 'case: string' style field declaration. 534 parseStructuralElement(); 535 break; 536 } 537 if (!SwitchLabelEncountered && 538 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 539 ++Line->Level; 540 SwitchLabelEncountered = true; 541 parseStructuralElement(); 542 break; 543 case tok::l_square: 544 if (Style.isCSharp()) { 545 nextToken(); 546 parseCSharpAttribute(); 547 break; 548 } 549 LLVM_FALLTHROUGH; 550 default: 551 parseStructuralElement(IfKind, !HasOpeningBrace); 552 ++StatementCount; 553 assert(StatementCount > 0 && "StatementCount overflow!"); 554 break; 555 } 556 } while (!eof()); 557 return false; 558 } 559 560 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 561 // We'll parse forward through the tokens until we hit 562 // a closing brace or eof - note that getNextToken() will 563 // parse macros, so this will magically work inside macro 564 // definitions, too. 565 unsigned StoredPosition = Tokens->getPosition(); 566 FormatToken *Tok = FormatTok; 567 const FormatToken *PrevTok = Tok->Previous; 568 // Keep a stack of positions of lbrace tokens. We will 569 // update information about whether an lbrace starts a 570 // braced init list or a different block during the loop. 571 SmallVector<FormatToken *, 8> LBraceStack; 572 assert(Tok->Tok.is(tok::l_brace)); 573 do { 574 // Get next non-comment token. 575 FormatToken *NextTok; 576 unsigned ReadTokens = 0; 577 do { 578 NextTok = Tokens->getNextToken(); 579 ++ReadTokens; 580 } while (NextTok->is(tok::comment)); 581 582 switch (Tok->Tok.getKind()) { 583 case tok::l_brace: 584 if (Style.isJavaScript() && PrevTok) { 585 if (PrevTok->isOneOf(tok::colon, tok::less)) 586 // A ':' indicates this code is in a type, or a braced list 587 // following a label in an object literal ({a: {b: 1}}). 588 // A '<' could be an object used in a comparison, but that is nonsense 589 // code (can never return true), so more likely it is a generic type 590 // argument (`X<{a: string; b: number}>`). 591 // The code below could be confused by semicolons between the 592 // individual members in a type member list, which would normally 593 // trigger BK_Block. In both cases, this must be parsed as an inline 594 // braced init. 595 Tok->setBlockKind(BK_BracedInit); 596 else if (PrevTok->is(tok::r_paren)) 597 // `) { }` can only occur in function or method declarations in JS. 598 Tok->setBlockKind(BK_Block); 599 } else { 600 Tok->setBlockKind(BK_Unknown); 601 } 602 LBraceStack.push_back(Tok); 603 break; 604 case tok::r_brace: 605 if (LBraceStack.empty()) 606 break; 607 if (LBraceStack.back()->is(BK_Unknown)) { 608 bool ProbablyBracedList = false; 609 if (Style.Language == FormatStyle::LK_Proto) { 610 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 611 } else { 612 // Skip NextTok over preprocessor lines, otherwise we may not 613 // properly diagnose the block as a braced intializer 614 // if the comma separator appears after the pp directive. 615 while (NextTok->is(tok::hash)) { 616 ScopedMacroState MacroState(*Line, Tokens, NextTok); 617 do { 618 NextTok = Tokens->getNextToken(); 619 ++ReadTokens; 620 } while (NextTok->isNot(tok::eof)); 621 } 622 623 // Using OriginalColumn to distinguish between ObjC methods and 624 // binary operators is a bit hacky. 625 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 626 NextTok->OriginalColumn == 0; 627 628 // If there is a comma, semicolon or right paren after the closing 629 // brace, we assume this is a braced initializer list. Note that 630 // regardless how we mark inner braces here, we will overwrite the 631 // BlockKind later if we parse a braced list (where all blocks 632 // inside are by default braced lists), or when we explicitly detect 633 // blocks (for example while parsing lambdas). 634 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 635 // braced list in JS. 636 ProbablyBracedList = 637 (Style.isJavaScript() && 638 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 639 Keywords.kw_as)) || 640 (Style.isCpp() && NextTok->is(tok::l_paren)) || 641 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 642 tok::r_paren, tok::r_square, tok::l_brace, 643 tok::ellipsis) || 644 (NextTok->is(tok::identifier) && 645 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 646 (NextTok->is(tok::semi) && 647 (!ExpectClassBody || LBraceStack.size() != 1)) || 648 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 649 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 650 // We can have an array subscript after a braced init 651 // list, but C++11 attributes are expected after blocks. 652 NextTok = Tokens->getNextToken(); 653 ++ReadTokens; 654 ProbablyBracedList = NextTok->isNot(tok::l_square); 655 } 656 } 657 if (ProbablyBracedList) { 658 Tok->setBlockKind(BK_BracedInit); 659 LBraceStack.back()->setBlockKind(BK_BracedInit); 660 } else { 661 Tok->setBlockKind(BK_Block); 662 LBraceStack.back()->setBlockKind(BK_Block); 663 } 664 } 665 LBraceStack.pop_back(); 666 break; 667 case tok::identifier: 668 if (!Tok->is(TT_StatementMacro)) 669 break; 670 LLVM_FALLTHROUGH; 671 case tok::at: 672 case tok::semi: 673 case tok::kw_if: 674 case tok::kw_while: 675 case tok::kw_for: 676 case tok::kw_switch: 677 case tok::kw_try: 678 case tok::kw___try: 679 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 680 LBraceStack.back()->setBlockKind(BK_Block); 681 break; 682 default: 683 break; 684 } 685 PrevTok = Tok; 686 Tok = NextTok; 687 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 688 689 // Assume other blocks for all unclosed opening braces. 690 for (FormatToken *LBrace : LBraceStack) { 691 if (LBrace->is(BK_Unknown)) 692 LBrace->setBlockKind(BK_Block); 693 } 694 695 FormatTok = Tokens->setPosition(StoredPosition); 696 } 697 698 template <class T> 699 static inline void hash_combine(std::size_t &seed, const T &v) { 700 std::hash<T> hasher; 701 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 702 } 703 704 size_t UnwrappedLineParser::computePPHash() const { 705 size_t h = 0; 706 for (const auto &i : PPStack) { 707 hash_combine(h, size_t(i.Kind)); 708 hash_combine(h, i.Line); 709 } 710 return h; 711 } 712 713 UnwrappedLineParser::IfStmtKind 714 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, 715 bool MunchSemi, 716 bool UnindentWhitesmithsBraces) { 717 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 718 "'{' or macro block token expected"); 719 FormatToken *Tok = FormatTok; 720 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 721 FormatTok->setBlockKind(BK_Block); 722 723 // For Whitesmiths mode, jump to the next level prior to skipping over the 724 // braces. 725 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 726 ++Line->Level; 727 728 size_t PPStartHash = computePPHash(); 729 730 unsigned InitialLevel = Line->Level; 731 nextToken(/*LevelDifference=*/AddLevels); 732 733 if (MacroBlock && FormatTok->is(tok::l_paren)) 734 parseParens(); 735 736 size_t NbPreprocessorDirectives = 737 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 738 addUnwrappedLine(); 739 size_t OpeningLineIndex = 740 CurrentLines->empty() 741 ? (UnwrappedLine::kInvalidIndex) 742 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 743 744 // Whitesmiths is weird here. The brace needs to be indented for the namespace 745 // block, but the block itself may not be indented depending on the style 746 // settings. This allows the format to back up one level in those cases. 747 if (UnindentWhitesmithsBraces) 748 --Line->Level; 749 750 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 751 MustBeDeclaration); 752 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 753 Line->Level += AddLevels; 754 755 IfStmtKind IfKind = IfStmtKind::NotIf; 756 const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind); 757 758 if (eof()) 759 return IfKind; 760 761 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 762 : !FormatTok->is(tok::r_brace)) { 763 Line->Level = InitialLevel; 764 FormatTok->setBlockKind(BK_Block); 765 return IfKind; 766 } 767 768 if (SimpleBlock && Tok->is(tok::l_brace)) { 769 assert(FormatTok->is(tok::r_brace)); 770 const FormatToken *Previous = Tokens->getPreviousToken(); 771 assert(Previous); 772 if (Previous->isNot(tok::r_brace) || Previous->Optional) { 773 Tok->MatchingParen = FormatTok; 774 FormatTok->MatchingParen = Tok; 775 } 776 } 777 778 size_t PPEndHash = computePPHash(); 779 780 // Munch the closing brace. 781 nextToken(/*LevelDifference=*/-AddLevels); 782 783 if (MacroBlock && FormatTok->is(tok::l_paren)) 784 parseParens(); 785 786 if (FormatTok->is(tok::arrow)) { 787 // Following the } we can find a trailing return type arrow 788 // as part of an implicit conversion constraint. 789 nextToken(); 790 parseStructuralElement(); 791 } 792 793 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 794 nextToken(); 795 796 Line->Level = InitialLevel; 797 798 if (PPStartHash == PPEndHash) { 799 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 800 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 801 // Update the opening line to add the forward reference as well 802 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 803 CurrentLines->size() - 1; 804 } 805 } 806 807 return IfKind; 808 } 809 810 static bool isGoogScope(const UnwrappedLine &Line) { 811 // FIXME: Closure-library specific stuff should not be hard-coded but be 812 // configurable. 813 if (Line.Tokens.size() < 4) 814 return false; 815 auto I = Line.Tokens.begin(); 816 if (I->Tok->TokenText != "goog") 817 return false; 818 ++I; 819 if (I->Tok->isNot(tok::period)) 820 return false; 821 ++I; 822 if (I->Tok->TokenText != "scope") 823 return false; 824 ++I; 825 return I->Tok->is(tok::l_paren); 826 } 827 828 static bool isIIFE(const UnwrappedLine &Line, 829 const AdditionalKeywords &Keywords) { 830 // Look for the start of an immediately invoked anonymous function. 831 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 832 // This is commonly done in JavaScript to create a new, anonymous scope. 833 // Example: (function() { ... })() 834 if (Line.Tokens.size() < 3) 835 return false; 836 auto I = Line.Tokens.begin(); 837 if (I->Tok->isNot(tok::l_paren)) 838 return false; 839 ++I; 840 if (I->Tok->isNot(Keywords.kw_function)) 841 return false; 842 ++I; 843 return I->Tok->is(tok::l_paren); 844 } 845 846 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 847 const FormatToken &InitialToken) { 848 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 849 return Style.BraceWrapping.AfterNamespace; 850 if (InitialToken.is(tok::kw_class)) 851 return Style.BraceWrapping.AfterClass; 852 if (InitialToken.is(tok::kw_union)) 853 return Style.BraceWrapping.AfterUnion; 854 if (InitialToken.is(tok::kw_struct)) 855 return Style.BraceWrapping.AfterStruct; 856 if (InitialToken.is(tok::kw_enum)) 857 return Style.BraceWrapping.AfterEnum; 858 return false; 859 } 860 861 void UnwrappedLineParser::parseChildBlock() { 862 FormatTok->setBlockKind(BK_Block); 863 nextToken(); 864 { 865 bool SkipIndent = (Style.isJavaScript() && 866 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 867 ScopedLineState LineState(*this); 868 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 869 /*MustBeDeclaration=*/false); 870 Line->Level += SkipIndent ? 0 : 1; 871 parseLevel(/*HasOpeningBrace=*/true); 872 flushComments(isOnNewLine(*FormatTok)); 873 Line->Level -= SkipIndent ? 0 : 1; 874 } 875 nextToken(); 876 } 877 878 void UnwrappedLineParser::parsePPDirective() { 879 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 880 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 881 882 nextToken(); 883 884 if (!FormatTok->Tok.getIdentifierInfo()) { 885 parsePPUnknown(); 886 return; 887 } 888 889 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 890 case tok::pp_define: 891 parsePPDefine(); 892 return; 893 case tok::pp_if: 894 parsePPIf(/*IfDef=*/false); 895 break; 896 case tok::pp_ifdef: 897 case tok::pp_ifndef: 898 parsePPIf(/*IfDef=*/true); 899 break; 900 case tok::pp_else: 901 parsePPElse(); 902 break; 903 case tok::pp_elifdef: 904 case tok::pp_elifndef: 905 case tok::pp_elif: 906 parsePPElIf(); 907 break; 908 case tok::pp_endif: 909 parsePPEndIf(); 910 break; 911 default: 912 parsePPUnknown(); 913 break; 914 } 915 } 916 917 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 918 size_t Line = CurrentLines->size(); 919 if (CurrentLines == &PreprocessorDirectives) 920 Line += Lines.size(); 921 922 if (Unreachable || 923 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 924 PPStack.push_back({PP_Unreachable, Line}); 925 else 926 PPStack.push_back({PP_Conditional, Line}); 927 } 928 929 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 930 ++PPBranchLevel; 931 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 932 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 933 PPLevelBranchIndex.push_back(0); 934 PPLevelBranchCount.push_back(0); 935 } 936 PPChainBranchIndex.push(0); 937 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 938 conditionalCompilationCondition(Unreachable || Skip); 939 } 940 941 void UnwrappedLineParser::conditionalCompilationAlternative() { 942 if (!PPStack.empty()) 943 PPStack.pop_back(); 944 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 945 if (!PPChainBranchIndex.empty()) 946 ++PPChainBranchIndex.top(); 947 conditionalCompilationCondition( 948 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 949 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 950 } 951 952 void UnwrappedLineParser::conditionalCompilationEnd() { 953 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 954 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 955 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 956 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 957 } 958 } 959 // Guard against #endif's without #if. 960 if (PPBranchLevel > -1) 961 --PPBranchLevel; 962 if (!PPChainBranchIndex.empty()) 963 PPChainBranchIndex.pop(); 964 if (!PPStack.empty()) 965 PPStack.pop_back(); 966 } 967 968 void UnwrappedLineParser::parsePPIf(bool IfDef) { 969 bool IfNDef = FormatTok->is(tok::pp_ifndef); 970 nextToken(); 971 bool Unreachable = false; 972 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 973 Unreachable = true; 974 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 975 Unreachable = true; 976 conditionalCompilationStart(Unreachable); 977 FormatToken *IfCondition = FormatTok; 978 // If there's a #ifndef on the first line, and the only lines before it are 979 // comments, it could be an include guard. 980 bool MaybeIncludeGuard = IfNDef; 981 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 982 for (auto &Line : Lines) { 983 if (!Line.Tokens.front().Tok->is(tok::comment)) { 984 MaybeIncludeGuard = false; 985 IncludeGuard = IG_Rejected; 986 break; 987 } 988 } 989 --PPBranchLevel; 990 parsePPUnknown(); 991 ++PPBranchLevel; 992 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 993 IncludeGuard = IG_IfNdefed; 994 IncludeGuardToken = IfCondition; 995 } 996 } 997 998 void UnwrappedLineParser::parsePPElse() { 999 // If a potential include guard has an #else, it's not an include guard. 1000 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1001 IncludeGuard = IG_Rejected; 1002 conditionalCompilationAlternative(); 1003 if (PPBranchLevel > -1) 1004 --PPBranchLevel; 1005 parsePPUnknown(); 1006 ++PPBranchLevel; 1007 } 1008 1009 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1010 1011 void UnwrappedLineParser::parsePPEndIf() { 1012 conditionalCompilationEnd(); 1013 parsePPUnknown(); 1014 // If the #endif of a potential include guard is the last thing in the file, 1015 // then we found an include guard. 1016 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1017 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1018 IncludeGuard = IG_Found; 1019 } 1020 1021 void UnwrappedLineParser::parsePPDefine() { 1022 nextToken(); 1023 1024 if (!FormatTok->Tok.getIdentifierInfo()) { 1025 IncludeGuard = IG_Rejected; 1026 IncludeGuardToken = nullptr; 1027 parsePPUnknown(); 1028 return; 1029 } 1030 1031 if (IncludeGuard == IG_IfNdefed && 1032 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1033 IncludeGuard = IG_Defined; 1034 IncludeGuardToken = nullptr; 1035 for (auto &Line : Lines) { 1036 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1037 IncludeGuard = IG_Rejected; 1038 break; 1039 } 1040 } 1041 } 1042 1043 nextToken(); 1044 if (FormatTok->Tok.getKind() == tok::l_paren && 1045 !FormatTok->hasWhitespaceBefore()) { 1046 parseParens(); 1047 } 1048 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1049 Line->Level += PPBranchLevel + 1; 1050 addUnwrappedLine(); 1051 ++Line->Level; 1052 1053 // Errors during a preprocessor directive can only affect the layout of the 1054 // preprocessor directive, and thus we ignore them. An alternative approach 1055 // would be to use the same approach we use on the file level (no 1056 // re-indentation if there was a structural error) within the macro 1057 // definition. 1058 parseFile(); 1059 } 1060 1061 void UnwrappedLineParser::parsePPUnknown() { 1062 do { 1063 nextToken(); 1064 } while (!eof()); 1065 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1066 Line->Level += PPBranchLevel + 1; 1067 addUnwrappedLine(); 1068 } 1069 1070 // Here we exclude certain tokens that are not usually the first token in an 1071 // unwrapped line. This is used in attempt to distinguish macro calls without 1072 // trailing semicolons from other constructs split to several lines. 1073 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1074 // Semicolon can be a null-statement, l_square can be a start of a macro or 1075 // a C++11 attribute, but this doesn't seem to be common. 1076 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1077 Tok.isNot(TT_AttributeSquare) && 1078 // Tokens that can only be used as binary operators and a part of 1079 // overloaded operator names. 1080 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1081 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1082 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1083 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1084 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1085 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1086 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1087 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1088 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1089 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1090 Tok.isNot(tok::lesslessequal) && 1091 // Colon is used in labels, base class lists, initializer lists, 1092 // range-based for loops, ternary operator, but should never be the 1093 // first token in an unwrapped line. 1094 Tok.isNot(tok::colon) && 1095 // 'noexcept' is a trailing annotation. 1096 Tok.isNot(tok::kw_noexcept); 1097 } 1098 1099 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1100 const FormatToken *FormatTok) { 1101 // FIXME: This returns true for C/C++ keywords like 'struct'. 1102 return FormatTok->is(tok::identifier) && 1103 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1104 !FormatTok->isOneOf( 1105 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1106 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1107 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1108 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1109 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1110 Keywords.kw_instanceof, Keywords.kw_interface, 1111 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1112 } 1113 1114 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1115 const FormatToken *FormatTok) { 1116 return FormatTok->Tok.isLiteral() || 1117 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1118 mustBeJSIdent(Keywords, FormatTok); 1119 } 1120 1121 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1122 // when encountered after a value (see mustBeJSIdentOrValue). 1123 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1124 const FormatToken *FormatTok) { 1125 return FormatTok->isOneOf( 1126 tok::kw_return, Keywords.kw_yield, 1127 // conditionals 1128 tok::kw_if, tok::kw_else, 1129 // loops 1130 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1131 // switch/case 1132 tok::kw_switch, tok::kw_case, 1133 // exceptions 1134 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1135 // declaration 1136 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1137 Keywords.kw_async, Keywords.kw_function, 1138 // import/export 1139 Keywords.kw_import, tok::kw_export); 1140 } 1141 1142 // Checks whether a token is a type in K&R C (aka C78). 1143 static bool isC78Type(const FormatToken &Tok) { 1144 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1145 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1146 tok::identifier); 1147 } 1148 1149 // This function checks whether a token starts the first parameter declaration 1150 // in a K&R C (aka C78) function definition, e.g.: 1151 // int f(a, b) 1152 // short a, b; 1153 // { 1154 // return a + b; 1155 // } 1156 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1157 const FormatToken *FuncName) { 1158 assert(Tok); 1159 assert(Next); 1160 assert(FuncName); 1161 1162 if (FuncName->isNot(tok::identifier)) 1163 return false; 1164 1165 const FormatToken *Prev = FuncName->Previous; 1166 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1167 return false; 1168 1169 if (!isC78Type(*Tok) && 1170 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) 1171 return false; 1172 1173 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1174 return false; 1175 1176 Tok = Tok->Previous; 1177 if (!Tok || Tok->isNot(tok::r_paren)) 1178 return false; 1179 1180 Tok = Tok->Previous; 1181 if (!Tok || Tok->isNot(tok::identifier)) 1182 return false; 1183 1184 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1185 } 1186 1187 void UnwrappedLineParser::parseModuleImport() { 1188 nextToken(); 1189 while (!eof()) { 1190 if (FormatTok->is(tok::colon)) { 1191 FormatTok->setType(TT_ModulePartitionColon); 1192 } 1193 // Handle import <foo/bar.h> as we would an include statement. 1194 else if (FormatTok->is(tok::less)) { 1195 nextToken(); 1196 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1197 // Mark tokens up to the trailing line comments as implicit string 1198 // literals. 1199 if (FormatTok->isNot(tok::comment) && 1200 !FormatTok->TokenText.startswith("//")) 1201 FormatTok->setType(TT_ImplicitStringLiteral); 1202 nextToken(); 1203 } 1204 } 1205 if (FormatTok->is(tok::semi)) { 1206 nextToken(); 1207 break; 1208 } 1209 nextToken(); 1210 } 1211 1212 addUnwrappedLine(); 1213 } 1214 1215 // readTokenWithJavaScriptASI reads the next token and terminates the current 1216 // line if JavaScript Automatic Semicolon Insertion must 1217 // happen between the current token and the next token. 1218 // 1219 // This method is conservative - it cannot cover all edge cases of JavaScript, 1220 // but only aims to correctly handle certain well known cases. It *must not* 1221 // return true in speculative cases. 1222 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1223 FormatToken *Previous = FormatTok; 1224 readToken(); 1225 FormatToken *Next = FormatTok; 1226 1227 bool IsOnSameLine = 1228 CommentsBeforeNextToken.empty() 1229 ? Next->NewlinesBefore == 0 1230 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1231 if (IsOnSameLine) 1232 return; 1233 1234 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1235 bool PreviousStartsTemplateExpr = 1236 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1237 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1238 // If the line contains an '@' sign, the previous token might be an 1239 // annotation, which can precede another identifier/value. 1240 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1241 return LineNode.Tok->is(tok::at); 1242 }); 1243 if (HasAt) 1244 return; 1245 } 1246 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1247 return addUnwrappedLine(); 1248 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1249 bool NextEndsTemplateExpr = 1250 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1251 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1252 (PreviousMustBeValue || 1253 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1254 tok::minusminus))) 1255 return addUnwrappedLine(); 1256 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1257 isJSDeclOrStmt(Keywords, Next)) 1258 return addUnwrappedLine(); 1259 } 1260 1261 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, 1262 bool IsTopLevel) { 1263 if (Style.Language == FormatStyle::LK_TableGen && 1264 FormatTok->is(tok::pp_include)) { 1265 nextToken(); 1266 if (FormatTok->is(tok::string_literal)) 1267 nextToken(); 1268 addUnwrappedLine(); 1269 return; 1270 } 1271 switch (FormatTok->Tok.getKind()) { 1272 case tok::kw_asm: 1273 nextToken(); 1274 if (FormatTok->is(tok::l_brace)) { 1275 FormatTok->setType(TT_InlineASMBrace); 1276 nextToken(); 1277 while (FormatTok && FormatTok->isNot(tok::eof)) { 1278 if (FormatTok->is(tok::r_brace)) { 1279 FormatTok->setType(TT_InlineASMBrace); 1280 nextToken(); 1281 addUnwrappedLine(); 1282 break; 1283 } 1284 FormatTok->Finalized = true; 1285 nextToken(); 1286 } 1287 } 1288 break; 1289 case tok::kw_namespace: 1290 parseNamespace(); 1291 return; 1292 case tok::kw_public: 1293 case tok::kw_protected: 1294 case tok::kw_private: 1295 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1296 Style.isCSharp()) 1297 nextToken(); 1298 else 1299 parseAccessSpecifier(); 1300 return; 1301 case tok::kw_if: 1302 if (Style.isJavaScript() && Line->MustBeDeclaration) 1303 // field/method declaration. 1304 break; 1305 parseIfThenElse(IfKind); 1306 return; 1307 case tok::kw_for: 1308 case tok::kw_while: 1309 if (Style.isJavaScript() && Line->MustBeDeclaration) 1310 // field/method declaration. 1311 break; 1312 parseForOrWhileLoop(); 1313 return; 1314 case tok::kw_do: 1315 if (Style.isJavaScript() && Line->MustBeDeclaration) 1316 // field/method declaration. 1317 break; 1318 parseDoWhile(); 1319 return; 1320 case tok::kw_switch: 1321 if (Style.isJavaScript() && Line->MustBeDeclaration) 1322 // 'switch: string' field declaration. 1323 break; 1324 parseSwitch(); 1325 return; 1326 case tok::kw_default: 1327 if (Style.isJavaScript() && Line->MustBeDeclaration) 1328 // 'default: string' field declaration. 1329 break; 1330 nextToken(); 1331 if (FormatTok->is(tok::colon)) { 1332 parseLabel(); 1333 return; 1334 } 1335 // e.g. "default void f() {}" in a Java interface. 1336 break; 1337 case tok::kw_case: 1338 if (Style.isJavaScript() && Line->MustBeDeclaration) 1339 // 'case: string' field declaration. 1340 break; 1341 parseCaseLabel(); 1342 return; 1343 case tok::kw_try: 1344 case tok::kw___try: 1345 if (Style.isJavaScript() && Line->MustBeDeclaration) 1346 // field/method declaration. 1347 break; 1348 parseTryCatch(); 1349 return; 1350 case tok::kw_extern: 1351 nextToken(); 1352 if (FormatTok->Tok.is(tok::string_literal)) { 1353 nextToken(); 1354 if (FormatTok->Tok.is(tok::l_brace)) { 1355 if (Style.BraceWrapping.AfterExternBlock) 1356 addUnwrappedLine(); 1357 // Either we indent or for backwards compatibility we follow the 1358 // AfterExternBlock style. 1359 unsigned AddLevels = 1360 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1361 (Style.BraceWrapping.AfterExternBlock && 1362 Style.IndentExternBlock == 1363 FormatStyle::IEBS_AfterExternBlock) 1364 ? 1u 1365 : 0u; 1366 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1367 addUnwrappedLine(); 1368 return; 1369 } 1370 } 1371 break; 1372 case tok::kw_export: 1373 if (Style.isJavaScript()) { 1374 parseJavaScriptEs6ImportExport(); 1375 return; 1376 } 1377 if (!Style.isCpp()) 1378 break; 1379 // Handle C++ "(inline|export) namespace". 1380 LLVM_FALLTHROUGH; 1381 case tok::kw_inline: 1382 nextToken(); 1383 if (FormatTok->Tok.is(tok::kw_namespace)) { 1384 parseNamespace(); 1385 return; 1386 } 1387 break; 1388 case tok::identifier: 1389 if (FormatTok->is(TT_ForEachMacro)) { 1390 parseForOrWhileLoop(); 1391 return; 1392 } 1393 if (FormatTok->is(TT_MacroBlockBegin)) { 1394 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1395 /*MunchSemi=*/false); 1396 return; 1397 } 1398 if (FormatTok->is(Keywords.kw_import)) { 1399 if (Style.isJavaScript()) { 1400 parseJavaScriptEs6ImportExport(); 1401 return; 1402 } 1403 if (Style.Language == FormatStyle::LK_Proto) { 1404 nextToken(); 1405 if (FormatTok->is(tok::kw_public)) 1406 nextToken(); 1407 if (!FormatTok->is(tok::string_literal)) 1408 return; 1409 nextToken(); 1410 if (FormatTok->is(tok::semi)) 1411 nextToken(); 1412 addUnwrappedLine(); 1413 return; 1414 } 1415 if (Style.isCpp()) { 1416 parseModuleImport(); 1417 return; 1418 } 1419 } 1420 if (Style.isCpp() && 1421 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1422 Keywords.kw_slots, Keywords.kw_qslots)) { 1423 nextToken(); 1424 if (FormatTok->is(tok::colon)) { 1425 nextToken(); 1426 addUnwrappedLine(); 1427 return; 1428 } 1429 } 1430 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1431 parseStatementMacro(); 1432 return; 1433 } 1434 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1435 parseNamespace(); 1436 return; 1437 } 1438 // In all other cases, parse the declaration. 1439 break; 1440 default: 1441 break; 1442 } 1443 do { 1444 const FormatToken *Previous = FormatTok->Previous; 1445 switch (FormatTok->Tok.getKind()) { 1446 case tok::at: 1447 nextToken(); 1448 if (FormatTok->Tok.is(tok::l_brace)) { 1449 nextToken(); 1450 parseBracedList(); 1451 break; 1452 } else if (Style.Language == FormatStyle::LK_Java && 1453 FormatTok->is(Keywords.kw_interface)) { 1454 nextToken(); 1455 break; 1456 } 1457 switch (FormatTok->Tok.getObjCKeywordID()) { 1458 case tok::objc_public: 1459 case tok::objc_protected: 1460 case tok::objc_package: 1461 case tok::objc_private: 1462 return parseAccessSpecifier(); 1463 case tok::objc_interface: 1464 case tok::objc_implementation: 1465 return parseObjCInterfaceOrImplementation(); 1466 case tok::objc_protocol: 1467 if (parseObjCProtocol()) 1468 return; 1469 break; 1470 case tok::objc_end: 1471 return; // Handled by the caller. 1472 case tok::objc_optional: 1473 case tok::objc_required: 1474 nextToken(); 1475 addUnwrappedLine(); 1476 return; 1477 case tok::objc_autoreleasepool: 1478 nextToken(); 1479 if (FormatTok->Tok.is(tok::l_brace)) { 1480 if (Style.BraceWrapping.AfterControlStatement == 1481 FormatStyle::BWACS_Always) 1482 addUnwrappedLine(); 1483 parseBlock(); 1484 } 1485 addUnwrappedLine(); 1486 return; 1487 case tok::objc_synchronized: 1488 nextToken(); 1489 if (FormatTok->Tok.is(tok::l_paren)) 1490 // Skip synchronization object 1491 parseParens(); 1492 if (FormatTok->Tok.is(tok::l_brace)) { 1493 if (Style.BraceWrapping.AfterControlStatement == 1494 FormatStyle::BWACS_Always) 1495 addUnwrappedLine(); 1496 parseBlock(); 1497 } 1498 addUnwrappedLine(); 1499 return; 1500 case tok::objc_try: 1501 // This branch isn't strictly necessary (the kw_try case below would 1502 // do this too after the tok::at is parsed above). But be explicit. 1503 parseTryCatch(); 1504 return; 1505 default: 1506 break; 1507 } 1508 break; 1509 case tok::kw_concept: 1510 parseConcept(); 1511 return; 1512 case tok::kw_requires: 1513 parseRequires(); 1514 return; 1515 case tok::kw_enum: 1516 // Ignore if this is part of "template <enum ...". 1517 if (Previous && Previous->is(tok::less)) { 1518 nextToken(); 1519 break; 1520 } 1521 1522 // parseEnum falls through and does not yet add an unwrapped line as an 1523 // enum definition can start a structural element. 1524 if (!parseEnum()) 1525 break; 1526 // This only applies for C++. 1527 if (!Style.isCpp()) { 1528 addUnwrappedLine(); 1529 return; 1530 } 1531 break; 1532 case tok::kw_typedef: 1533 nextToken(); 1534 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1535 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1536 Keywords.kw_CF_CLOSED_ENUM, 1537 Keywords.kw_NS_CLOSED_ENUM)) 1538 parseEnum(); 1539 break; 1540 case tok::kw_struct: 1541 case tok::kw_union: 1542 case tok::kw_class: 1543 if (parseStructLike()) { 1544 return; 1545 } 1546 break; 1547 case tok::period: 1548 nextToken(); 1549 // In Java, classes have an implicit static member "class". 1550 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1551 FormatTok->is(tok::kw_class)) 1552 nextToken(); 1553 if (Style.isJavaScript() && FormatTok && 1554 FormatTok->Tok.getIdentifierInfo()) 1555 // JavaScript only has pseudo keywords, all keywords are allowed to 1556 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1557 nextToken(); 1558 break; 1559 case tok::semi: 1560 nextToken(); 1561 addUnwrappedLine(); 1562 return; 1563 case tok::r_brace: 1564 addUnwrappedLine(); 1565 return; 1566 case tok::l_paren: { 1567 parseParens(); 1568 // Break the unwrapped line if a K&R C function definition has a parameter 1569 // declaration. 1570 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1571 break; 1572 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1573 addUnwrappedLine(); 1574 return; 1575 } 1576 break; 1577 } 1578 case tok::kw_operator: 1579 nextToken(); 1580 if (FormatTok->isBinaryOperator()) 1581 nextToken(); 1582 break; 1583 case tok::caret: 1584 nextToken(); 1585 if (FormatTok->Tok.isAnyIdentifier() || 1586 FormatTok->isSimpleTypeSpecifier()) 1587 nextToken(); 1588 if (FormatTok->is(tok::l_paren)) 1589 parseParens(); 1590 if (FormatTok->is(tok::l_brace)) 1591 parseChildBlock(); 1592 break; 1593 case tok::l_brace: 1594 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1595 // A block outside of parentheses must be the last part of a 1596 // structural element. 1597 // FIXME: Figure out cases where this is not true, and add projections 1598 // for them (the one we know is missing are lambdas). 1599 if (Style.Language == FormatStyle::LK_Java && 1600 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1601 // If necessary, we could set the type to something different than 1602 // TT_FunctionLBrace. 1603 if (Style.BraceWrapping.AfterControlStatement == 1604 FormatStyle::BWACS_Always) 1605 addUnwrappedLine(); 1606 } else if (Style.BraceWrapping.AfterFunction) { 1607 addUnwrappedLine(); 1608 } 1609 FormatTok->setType(TT_FunctionLBrace); 1610 parseBlock(); 1611 addUnwrappedLine(); 1612 return; 1613 } 1614 // Otherwise this was a braced init list, and the structural 1615 // element continues. 1616 break; 1617 case tok::kw_try: 1618 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1619 // field/method declaration. 1620 nextToken(); 1621 break; 1622 } 1623 // We arrive here when parsing function-try blocks. 1624 if (Style.BraceWrapping.AfterFunction) 1625 addUnwrappedLine(); 1626 parseTryCatch(); 1627 return; 1628 case tok::identifier: { 1629 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1630 Line->MustBeDeclaration) { 1631 addUnwrappedLine(); 1632 parseCSharpGenericTypeConstraint(); 1633 break; 1634 } 1635 if (FormatTok->is(TT_MacroBlockEnd)) { 1636 addUnwrappedLine(); 1637 return; 1638 } 1639 1640 // Function declarations (as opposed to function expressions) are parsed 1641 // on their own unwrapped line by continuing this loop. Function 1642 // expressions (functions that are not on their own line) must not create 1643 // a new unwrapped line, so they are special cased below. 1644 size_t TokenCount = Line->Tokens.size(); 1645 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1646 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1647 Keywords.kw_async)))) { 1648 tryToParseJSFunction(); 1649 break; 1650 } 1651 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1652 FormatTok->is(Keywords.kw_interface)) { 1653 if (Style.isJavaScript()) { 1654 // In JavaScript/TypeScript, "interface" can be used as a standalone 1655 // identifier, e.g. in `var interface = 1;`. If "interface" is 1656 // followed by another identifier, it is very like to be an actual 1657 // interface declaration. 1658 unsigned StoredPosition = Tokens->getPosition(); 1659 FormatToken *Next = Tokens->getNextToken(); 1660 FormatTok = Tokens->setPosition(StoredPosition); 1661 if (!mustBeJSIdent(Keywords, Next)) { 1662 nextToken(); 1663 break; 1664 } 1665 } 1666 parseRecord(); 1667 addUnwrappedLine(); 1668 return; 1669 } 1670 1671 if (FormatTok->is(Keywords.kw_interface)) { 1672 if (parseStructLike()) { 1673 return; 1674 } 1675 break; 1676 } 1677 1678 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1679 parseStatementMacro(); 1680 return; 1681 } 1682 1683 // See if the following token should start a new unwrapped line. 1684 StringRef Text = FormatTok->TokenText; 1685 1686 FormatToken *PreviousToken = FormatTok; 1687 nextToken(); 1688 1689 // JS doesn't have macros, and within classes colons indicate fields, not 1690 // labels. 1691 if (Style.isJavaScript()) 1692 break; 1693 1694 TokenCount = Line->Tokens.size(); 1695 if (TokenCount == 1 || 1696 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1697 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1698 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1699 parseLabel(!Style.IndentGotoLabels); 1700 return; 1701 } 1702 // Recognize function-like macro usages without trailing semicolon as 1703 // well as free-standing macros like Q_OBJECT. 1704 bool FunctionLike = FormatTok->is(tok::l_paren); 1705 if (FunctionLike) 1706 parseParens(); 1707 1708 bool FollowedByNewline = 1709 CommentsBeforeNextToken.empty() 1710 ? FormatTok->NewlinesBefore > 0 1711 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1712 1713 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1714 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1715 PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro); 1716 addUnwrappedLine(); 1717 return; 1718 } 1719 } 1720 break; 1721 } 1722 case tok::equal: 1723 if ((Style.isJavaScript() || Style.isCSharp()) && 1724 FormatTok->is(TT_FatArrow)) { 1725 tryToParseChildBlock(); 1726 break; 1727 } 1728 1729 nextToken(); 1730 if (FormatTok->Tok.is(tok::l_brace)) { 1731 // Block kind should probably be set to BK_BracedInit for any language. 1732 // C# needs this change to ensure that array initialisers and object 1733 // initialisers are indented the same way. 1734 if (Style.isCSharp()) 1735 FormatTok->setBlockKind(BK_BracedInit); 1736 nextToken(); 1737 parseBracedList(); 1738 } else if (Style.Language == FormatStyle::LK_Proto && 1739 FormatTok->Tok.is(tok::less)) { 1740 nextToken(); 1741 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1742 /*ClosingBraceKind=*/tok::greater); 1743 } 1744 break; 1745 case tok::l_square: 1746 parseSquare(); 1747 break; 1748 case tok::kw_new: 1749 parseNew(); 1750 break; 1751 default: 1752 nextToken(); 1753 break; 1754 } 1755 } while (!eof()); 1756 } 1757 1758 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1759 assert(FormatTok->is(tok::l_brace)); 1760 if (!Style.isCSharp()) 1761 return false; 1762 // See if it's a property accessor. 1763 if (FormatTok->Previous->isNot(tok::identifier)) 1764 return false; 1765 1766 // See if we are inside a property accessor. 1767 // 1768 // Record the current tokenPosition so that we can advance and 1769 // reset the current token. `Next` is not set yet so we need 1770 // another way to advance along the token stream. 1771 unsigned int StoredPosition = Tokens->getPosition(); 1772 FormatToken *Tok = Tokens->getNextToken(); 1773 1774 // A trivial property accessor is of the form: 1775 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } 1776 // Track these as they do not require line breaks to be introduced. 1777 bool HasGetOrSet = false; 1778 bool IsTrivialPropertyAccessor = true; 1779 while (!eof()) { 1780 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1781 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1782 Keywords.kw_set)) { 1783 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) 1784 HasGetOrSet = true; 1785 Tok = Tokens->getNextToken(); 1786 continue; 1787 } 1788 if (Tok->isNot(tok::r_brace)) 1789 IsTrivialPropertyAccessor = false; 1790 break; 1791 } 1792 1793 if (!HasGetOrSet) { 1794 Tokens->setPosition(StoredPosition); 1795 return false; 1796 } 1797 1798 // Try to parse the property accessor: 1799 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1800 Tokens->setPosition(StoredPosition); 1801 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1802 addUnwrappedLine(); 1803 nextToken(); 1804 do { 1805 switch (FormatTok->Tok.getKind()) { 1806 case tok::r_brace: 1807 nextToken(); 1808 if (FormatTok->is(tok::equal)) { 1809 while (!eof() && FormatTok->isNot(tok::semi)) 1810 nextToken(); 1811 nextToken(); 1812 } 1813 addUnwrappedLine(); 1814 return true; 1815 case tok::l_brace: 1816 ++Line->Level; 1817 parseBlock(/*MustBeDeclaration=*/true); 1818 addUnwrappedLine(); 1819 --Line->Level; 1820 break; 1821 case tok::equal: 1822 if (FormatTok->is(TT_FatArrow)) { 1823 ++Line->Level; 1824 do { 1825 nextToken(); 1826 } while (!eof() && FormatTok->isNot(tok::semi)); 1827 nextToken(); 1828 addUnwrappedLine(); 1829 --Line->Level; 1830 break; 1831 } 1832 nextToken(); 1833 break; 1834 default: 1835 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && 1836 !IsTrivialPropertyAccessor) { 1837 // Non-trivial get/set needs to be on its own line. 1838 addUnwrappedLine(); 1839 } 1840 nextToken(); 1841 } 1842 } while (!eof()); 1843 1844 // Unreachable for well-formed code (paired '{' and '}'). 1845 return true; 1846 } 1847 1848 bool UnwrappedLineParser::tryToParseLambda() { 1849 if (!Style.isCpp()) { 1850 nextToken(); 1851 return false; 1852 } 1853 assert(FormatTok->is(tok::l_square)); 1854 FormatToken &LSquare = *FormatTok; 1855 if (!tryToParseLambdaIntroducer()) 1856 return false; 1857 1858 bool SeenArrow = false; 1859 bool InTemplateParameterList = false; 1860 1861 while (FormatTok->isNot(tok::l_brace)) { 1862 if (FormatTok->isSimpleTypeSpecifier()) { 1863 nextToken(); 1864 continue; 1865 } 1866 switch (FormatTok->Tok.getKind()) { 1867 case tok::l_brace: 1868 break; 1869 case tok::l_paren: 1870 parseParens(); 1871 break; 1872 case tok::l_square: 1873 parseSquare(); 1874 break; 1875 case tok::kw_class: 1876 case tok::kw_template: 1877 case tok::kw_typename: 1878 assert(FormatTok->Previous); 1879 if (FormatTok->Previous->is(tok::less)) 1880 InTemplateParameterList = true; 1881 nextToken(); 1882 break; 1883 case tok::amp: 1884 case tok::star: 1885 case tok::kw_const: 1886 case tok::comma: 1887 case tok::less: 1888 case tok::greater: 1889 case tok::identifier: 1890 case tok::numeric_constant: 1891 case tok::coloncolon: 1892 case tok::kw_mutable: 1893 case tok::kw_noexcept: 1894 nextToken(); 1895 break; 1896 // Specialization of a template with an integer parameter can contain 1897 // arithmetic, logical, comparison and ternary operators. 1898 // 1899 // FIXME: This also accepts sequences of operators that are not in the scope 1900 // of a template argument list. 1901 // 1902 // In a C++ lambda a template type can only occur after an arrow. We use 1903 // this as an heuristic to distinguish between Objective-C expressions 1904 // followed by an `a->b` expression, such as: 1905 // ([obj func:arg] + a->b) 1906 // Otherwise the code below would parse as a lambda. 1907 // 1908 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 1909 // explicit template lists: []<bool b = true && false>(U &&u){} 1910 case tok::plus: 1911 case tok::minus: 1912 case tok::exclaim: 1913 case tok::tilde: 1914 case tok::slash: 1915 case tok::percent: 1916 case tok::lessless: 1917 case tok::pipe: 1918 case tok::pipepipe: 1919 case tok::ampamp: 1920 case tok::caret: 1921 case tok::equalequal: 1922 case tok::exclaimequal: 1923 case tok::greaterequal: 1924 case tok::lessequal: 1925 case tok::question: 1926 case tok::colon: 1927 case tok::ellipsis: 1928 case tok::kw_true: 1929 case tok::kw_false: 1930 if (SeenArrow || InTemplateParameterList) { 1931 nextToken(); 1932 break; 1933 } 1934 return true; 1935 case tok::arrow: 1936 // This might or might not actually be a lambda arrow (this could be an 1937 // ObjC method invocation followed by a dereferencing arrow). We might 1938 // reset this back to TT_Unknown in TokenAnnotator. 1939 FormatTok->setType(TT_LambdaArrow); 1940 SeenArrow = true; 1941 nextToken(); 1942 break; 1943 default: 1944 return true; 1945 } 1946 } 1947 FormatTok->setType(TT_LambdaLBrace); 1948 LSquare.setType(TT_LambdaLSquare); 1949 parseChildBlock(); 1950 return true; 1951 } 1952 1953 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1954 const FormatToken *Previous = FormatTok->Previous; 1955 if (Previous && 1956 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1957 tok::kw_delete, tok::l_square) || 1958 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1959 Previous->isSimpleTypeSpecifier())) { 1960 nextToken(); 1961 return false; 1962 } 1963 nextToken(); 1964 if (FormatTok->is(tok::l_square)) { 1965 return false; 1966 } 1967 parseSquare(/*LambdaIntroducer=*/true); 1968 return true; 1969 } 1970 1971 void UnwrappedLineParser::tryToParseJSFunction() { 1972 assert(FormatTok->is(Keywords.kw_function) || 1973 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1974 if (FormatTok->is(Keywords.kw_async)) 1975 nextToken(); 1976 // Consume "function". 1977 nextToken(); 1978 1979 // Consume * (generator function). Treat it like C++'s overloaded operators. 1980 if (FormatTok->is(tok::star)) { 1981 FormatTok->setType(TT_OverloadedOperator); 1982 nextToken(); 1983 } 1984 1985 // Consume function name. 1986 if (FormatTok->is(tok::identifier)) 1987 nextToken(); 1988 1989 if (FormatTok->isNot(tok::l_paren)) 1990 return; 1991 1992 // Parse formal parameter list. 1993 parseParens(); 1994 1995 if (FormatTok->is(tok::colon)) { 1996 // Parse a type definition. 1997 nextToken(); 1998 1999 // Eat the type declaration. For braced inline object types, balance braces, 2000 // otherwise just parse until finding an l_brace for the function body. 2001 if (FormatTok->is(tok::l_brace)) 2002 tryToParseBracedList(); 2003 else 2004 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2005 nextToken(); 2006 } 2007 2008 if (FormatTok->is(tok::semi)) 2009 return; 2010 2011 parseChildBlock(); 2012 } 2013 2014 bool UnwrappedLineParser::tryToParseBracedList() { 2015 if (FormatTok->is(BK_Unknown)) 2016 calculateBraceTypes(); 2017 assert(FormatTok->isNot(BK_Unknown)); 2018 if (FormatTok->is(BK_Block)) 2019 return false; 2020 nextToken(); 2021 parseBracedList(); 2022 return true; 2023 } 2024 2025 bool UnwrappedLineParser::tryToParseChildBlock() { 2026 assert(Style.isJavaScript() || Style.isCSharp()); 2027 assert(FormatTok->is(TT_FatArrow)); 2028 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2029 // They always start an expression or a child block if followed by a curly 2030 // brace. 2031 nextToken(); 2032 if (FormatTok->isNot(tok::l_brace)) 2033 return false; 2034 parseChildBlock(); 2035 return true; 2036 } 2037 2038 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2039 bool IsEnum, 2040 tok::TokenKind ClosingBraceKind) { 2041 bool HasError = false; 2042 2043 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2044 // replace this by using parseAssignmentExpression() inside. 2045 do { 2046 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2047 tryToParseChildBlock()) 2048 continue; 2049 if (Style.isJavaScript()) { 2050 if (FormatTok->is(Keywords.kw_function) || 2051 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2052 tryToParseJSFunction(); 2053 continue; 2054 } 2055 if (FormatTok->is(tok::l_brace)) { 2056 // Could be a method inside of a braced list `{a() { return 1; }}`. 2057 if (tryToParseBracedList()) 2058 continue; 2059 parseChildBlock(); 2060 } 2061 } 2062 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2063 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2064 addUnwrappedLine(); 2065 nextToken(); 2066 return !HasError; 2067 } 2068 switch (FormatTok->Tok.getKind()) { 2069 case tok::l_square: 2070 if (Style.isCSharp()) 2071 parseSquare(); 2072 else 2073 tryToParseLambda(); 2074 break; 2075 case tok::l_paren: 2076 parseParens(); 2077 // JavaScript can just have free standing methods and getters/setters in 2078 // object literals. Detect them by a "{" following ")". 2079 if (Style.isJavaScript()) { 2080 if (FormatTok->is(tok::l_brace)) 2081 parseChildBlock(); 2082 break; 2083 } 2084 break; 2085 case tok::l_brace: 2086 // Assume there are no blocks inside a braced init list apart 2087 // from the ones we explicitly parse out (like lambdas). 2088 FormatTok->setBlockKind(BK_BracedInit); 2089 nextToken(); 2090 parseBracedList(); 2091 break; 2092 case tok::less: 2093 if (Style.Language == FormatStyle::LK_Proto) { 2094 nextToken(); 2095 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2096 /*ClosingBraceKind=*/tok::greater); 2097 } else { 2098 nextToken(); 2099 } 2100 break; 2101 case tok::semi: 2102 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2103 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2104 // used for error recovery if we have otherwise determined that this is 2105 // a braced list. 2106 if (Style.isJavaScript()) { 2107 nextToken(); 2108 break; 2109 } 2110 HasError = true; 2111 if (!ContinueOnSemicolons) 2112 return !HasError; 2113 nextToken(); 2114 break; 2115 case tok::comma: 2116 nextToken(); 2117 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2118 addUnwrappedLine(); 2119 break; 2120 default: 2121 nextToken(); 2122 break; 2123 } 2124 } while (!eof()); 2125 return false; 2126 } 2127 2128 void UnwrappedLineParser::parseParens() { 2129 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 2130 nextToken(); 2131 do { 2132 switch (FormatTok->Tok.getKind()) { 2133 case tok::l_paren: 2134 parseParens(); 2135 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2136 parseChildBlock(); 2137 break; 2138 case tok::r_paren: 2139 nextToken(); 2140 return; 2141 case tok::r_brace: 2142 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2143 return; 2144 case tok::l_square: 2145 tryToParseLambda(); 2146 break; 2147 case tok::l_brace: 2148 if (!tryToParseBracedList()) 2149 parseChildBlock(); 2150 break; 2151 case tok::at: 2152 nextToken(); 2153 if (FormatTok->Tok.is(tok::l_brace)) { 2154 nextToken(); 2155 parseBracedList(); 2156 } 2157 break; 2158 case tok::equal: 2159 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2160 tryToParseChildBlock(); 2161 else 2162 nextToken(); 2163 break; 2164 case tok::kw_class: 2165 if (Style.isJavaScript()) 2166 parseRecord(/*ParseAsExpr=*/true); 2167 else 2168 nextToken(); 2169 break; 2170 case tok::identifier: 2171 if (Style.isJavaScript() && 2172 (FormatTok->is(Keywords.kw_function) || 2173 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 2174 tryToParseJSFunction(); 2175 else 2176 nextToken(); 2177 break; 2178 default: 2179 nextToken(); 2180 break; 2181 } 2182 } while (!eof()); 2183 } 2184 2185 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2186 if (!LambdaIntroducer) { 2187 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 2188 if (tryToParseLambda()) 2189 return; 2190 } 2191 do { 2192 switch (FormatTok->Tok.getKind()) { 2193 case tok::l_paren: 2194 parseParens(); 2195 break; 2196 case tok::r_square: 2197 nextToken(); 2198 return; 2199 case tok::r_brace: 2200 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2201 return; 2202 case tok::l_square: 2203 parseSquare(); 2204 break; 2205 case tok::l_brace: { 2206 if (!tryToParseBracedList()) 2207 parseChildBlock(); 2208 break; 2209 } 2210 case tok::at: 2211 nextToken(); 2212 if (FormatTok->Tok.is(tok::l_brace)) { 2213 nextToken(); 2214 parseBracedList(); 2215 } 2216 break; 2217 default: 2218 nextToken(); 2219 break; 2220 } 2221 } while (!eof()); 2222 } 2223 2224 void UnwrappedLineParser::keepAncestorBraces() { 2225 if (!Style.RemoveBracesLLVM) 2226 return; 2227 2228 const int MaxNestingLevels = 2; 2229 const int Size = NestedTooDeep.size(); 2230 if (Size >= MaxNestingLevels) 2231 NestedTooDeep[Size - MaxNestingLevels] = true; 2232 NestedTooDeep.push_back(false); 2233 } 2234 2235 static void markOptionalBraces(FormatToken *LeftBrace) { 2236 if (!LeftBrace) 2237 return; 2238 2239 assert(LeftBrace->is(tok::l_brace)); 2240 2241 FormatToken *RightBrace = LeftBrace->MatchingParen; 2242 if (!RightBrace) { 2243 assert(!LeftBrace->Optional); 2244 return; 2245 } 2246 2247 assert(RightBrace->is(tok::r_brace)); 2248 assert(RightBrace->MatchingParen == LeftBrace); 2249 assert(LeftBrace->Optional == RightBrace->Optional); 2250 2251 LeftBrace->Optional = true; 2252 RightBrace->Optional = true; 2253 } 2254 2255 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2256 bool KeepBraces) { 2257 auto HandleAttributes = [this]() { 2258 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2259 if (FormatTok->is(TT_AttributeMacro)) 2260 nextToken(); 2261 // Handle [[likely]] / [[unlikely]] attributes. 2262 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) 2263 parseSquare(); 2264 }; 2265 2266 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 2267 nextToken(); 2268 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) 2269 nextToken(); 2270 if (FormatTok->Tok.is(tok::l_paren)) 2271 parseParens(); 2272 HandleAttributes(); 2273 2274 bool NeedsUnwrappedLine = false; 2275 keepAncestorBraces(); 2276 2277 FormatToken *IfLeftBrace = nullptr; 2278 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2279 2280 if (FormatTok->Tok.is(tok::l_brace)) { 2281 IfLeftBrace = FormatTok; 2282 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2283 IfBlockKind = parseBlock(); 2284 if (Style.BraceWrapping.BeforeElse) 2285 addUnwrappedLine(); 2286 else 2287 NeedsUnwrappedLine = true; 2288 } else { 2289 addUnwrappedLine(); 2290 ++Line->Level; 2291 parseStructuralElement(); 2292 --Line->Level; 2293 } 2294 2295 bool KeepIfBraces = false; 2296 if (Style.RemoveBracesLLVM) { 2297 assert(!NestedTooDeep.empty()); 2298 KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2299 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2300 IfBlockKind == IfStmtKind::IfElseIf; 2301 } 2302 2303 FormatToken *ElseLeftBrace = nullptr; 2304 IfStmtKind Kind = IfStmtKind::IfOnly; 2305 2306 if (FormatTok->Tok.is(tok::kw_else)) { 2307 if (Style.RemoveBracesLLVM) { 2308 NestedTooDeep.back() = false; 2309 Kind = IfStmtKind::IfElse; 2310 } 2311 nextToken(); 2312 HandleAttributes(); 2313 if (FormatTok->Tok.is(tok::l_brace)) { 2314 ElseLeftBrace = FormatTok; 2315 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2316 if (parseBlock() == IfStmtKind::IfOnly) 2317 Kind = IfStmtKind::IfElseIf; 2318 addUnwrappedLine(); 2319 } else if (FormatTok->Tok.is(tok::kw_if)) { 2320 FormatToken *Previous = Tokens->getPreviousToken(); 2321 const bool IsPrecededByComment = Previous && Previous->is(tok::comment); 2322 if (IsPrecededByComment) { 2323 addUnwrappedLine(); 2324 ++Line->Level; 2325 } 2326 bool TooDeep = true; 2327 if (Style.RemoveBracesLLVM) { 2328 Kind = IfStmtKind::IfElseIf; 2329 TooDeep = NestedTooDeep.pop_back_val(); 2330 } 2331 ElseLeftBrace = 2332 parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces); 2333 if (Style.RemoveBracesLLVM) 2334 NestedTooDeep.push_back(TooDeep); 2335 if (IsPrecededByComment) 2336 --Line->Level; 2337 } else { 2338 addUnwrappedLine(); 2339 ++Line->Level; 2340 parseStructuralElement(); 2341 if (FormatTok->is(tok::eof)) 2342 addUnwrappedLine(); 2343 --Line->Level; 2344 } 2345 } else { 2346 if (Style.RemoveBracesLLVM) 2347 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2348 if (NeedsUnwrappedLine) 2349 addUnwrappedLine(); 2350 } 2351 2352 if (!Style.RemoveBracesLLVM) 2353 return nullptr; 2354 2355 assert(!NestedTooDeep.empty()); 2356 const bool KeepElseBraces = 2357 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back(); 2358 2359 NestedTooDeep.pop_back(); 2360 2361 if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) { 2362 markOptionalBraces(IfLeftBrace); 2363 markOptionalBraces(ElseLeftBrace); 2364 } else if (IfLeftBrace) { 2365 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2366 if (IfRightBrace) { 2367 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2368 assert(!IfLeftBrace->Optional); 2369 assert(!IfRightBrace->Optional); 2370 IfLeftBrace->MatchingParen = nullptr; 2371 IfRightBrace->MatchingParen = nullptr; 2372 } 2373 } 2374 2375 if (IfKind) 2376 *IfKind = Kind; 2377 2378 return IfLeftBrace; 2379 } 2380 2381 void UnwrappedLineParser::parseTryCatch() { 2382 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2383 nextToken(); 2384 bool NeedsUnwrappedLine = false; 2385 if (FormatTok->is(tok::colon)) { 2386 // We are in a function try block, what comes is an initializer list. 2387 nextToken(); 2388 2389 // In case identifiers were removed by clang-tidy, what might follow is 2390 // multiple commas in sequence - before the first identifier. 2391 while (FormatTok->is(tok::comma)) 2392 nextToken(); 2393 2394 while (FormatTok->is(tok::identifier)) { 2395 nextToken(); 2396 if (FormatTok->is(tok::l_paren)) 2397 parseParens(); 2398 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2399 FormatTok->is(tok::l_brace)) { 2400 do { 2401 nextToken(); 2402 } while (!FormatTok->is(tok::r_brace)); 2403 nextToken(); 2404 } 2405 2406 // In case identifiers were removed by clang-tidy, what might follow is 2407 // multiple commas in sequence - after the first identifier. 2408 while (FormatTok->is(tok::comma)) 2409 nextToken(); 2410 } 2411 } 2412 // Parse try with resource. 2413 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 2414 parseParens(); 2415 } 2416 2417 keepAncestorBraces(); 2418 2419 if (FormatTok->is(tok::l_brace)) { 2420 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2421 parseBlock(); 2422 if (Style.BraceWrapping.BeforeCatch) { 2423 addUnwrappedLine(); 2424 } else { 2425 NeedsUnwrappedLine = true; 2426 } 2427 } else if (!FormatTok->is(tok::kw_catch)) { 2428 // The C++ standard requires a compound-statement after a try. 2429 // If there's none, we try to assume there's a structuralElement 2430 // and try to continue. 2431 addUnwrappedLine(); 2432 ++Line->Level; 2433 parseStructuralElement(); 2434 --Line->Level; 2435 } 2436 while (true) { 2437 if (FormatTok->is(tok::at)) 2438 nextToken(); 2439 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2440 tok::kw___finally) || 2441 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2442 FormatTok->is(Keywords.kw_finally)) || 2443 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 2444 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 2445 break; 2446 nextToken(); 2447 while (FormatTok->isNot(tok::l_brace)) { 2448 if (FormatTok->is(tok::l_paren)) { 2449 parseParens(); 2450 continue; 2451 } 2452 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2453 if (Style.RemoveBracesLLVM) 2454 NestedTooDeep.pop_back(); 2455 return; 2456 } 2457 nextToken(); 2458 } 2459 NeedsUnwrappedLine = false; 2460 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2461 parseBlock(); 2462 if (Style.BraceWrapping.BeforeCatch) 2463 addUnwrappedLine(); 2464 else 2465 NeedsUnwrappedLine = true; 2466 } 2467 2468 if (Style.RemoveBracesLLVM) 2469 NestedTooDeep.pop_back(); 2470 2471 if (NeedsUnwrappedLine) 2472 addUnwrappedLine(); 2473 } 2474 2475 void UnwrappedLineParser::parseNamespace() { 2476 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2477 "'namespace' expected"); 2478 2479 const FormatToken &InitialToken = *FormatTok; 2480 nextToken(); 2481 if (InitialToken.is(TT_NamespaceMacro)) { 2482 parseParens(); 2483 } else { 2484 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2485 tok::l_square, tok::period) || 2486 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 2487 if (FormatTok->is(tok::l_square)) 2488 parseSquare(); 2489 else 2490 nextToken(); 2491 } 2492 } 2493 if (FormatTok->Tok.is(tok::l_brace)) { 2494 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2495 addUnwrappedLine(); 2496 2497 unsigned AddLevels = 2498 Style.NamespaceIndentation == FormatStyle::NI_All || 2499 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2500 DeclarationScopeStack.size() > 1) 2501 ? 1u 2502 : 0u; 2503 bool ManageWhitesmithsBraces = 2504 AddLevels == 0u && 2505 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2506 2507 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2508 // the whole block. 2509 if (ManageWhitesmithsBraces) 2510 ++Line->Level; 2511 2512 parseBlock(/*MustBeDeclaration=*/true, AddLevels, 2513 /*MunchSemi=*/true, 2514 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); 2515 2516 // Munch the semicolon after a namespace. This is more common than one would 2517 // think. Putting the semicolon into its own line is very ugly. 2518 if (FormatTok->Tok.is(tok::semi)) 2519 nextToken(); 2520 2521 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2522 2523 if (ManageWhitesmithsBraces) 2524 --Line->Level; 2525 } 2526 // FIXME: Add error handling. 2527 } 2528 2529 void UnwrappedLineParser::parseNew() { 2530 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2531 nextToken(); 2532 2533 if (Style.isCSharp()) { 2534 do { 2535 if (FormatTok->is(tok::l_brace)) 2536 parseBracedList(); 2537 2538 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2539 return; 2540 2541 nextToken(); 2542 } while (!eof()); 2543 } 2544 2545 if (Style.Language != FormatStyle::LK_Java) 2546 return; 2547 2548 // In Java, we can parse everything up to the parens, which aren't optional. 2549 do { 2550 // There should not be a ;, { or } before the new's open paren. 2551 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2552 return; 2553 2554 // Consume the parens. 2555 if (FormatTok->is(tok::l_paren)) { 2556 parseParens(); 2557 2558 // If there is a class body of an anonymous class, consume that as child. 2559 if (FormatTok->is(tok::l_brace)) 2560 parseChildBlock(); 2561 return; 2562 } 2563 nextToken(); 2564 } while (!eof()); 2565 } 2566 2567 void UnwrappedLineParser::parseForOrWhileLoop() { 2568 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2569 "'for', 'while' or foreach macro expected"); 2570 nextToken(); 2571 // JS' for await ( ... 2572 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2573 nextToken(); 2574 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2575 nextToken(); 2576 if (FormatTok->Tok.is(tok::l_paren)) 2577 parseParens(); 2578 2579 keepAncestorBraces(); 2580 2581 if (FormatTok->Tok.is(tok::l_brace)) { 2582 FormatToken *LeftBrace = FormatTok; 2583 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2584 parseBlock(); 2585 if (Style.RemoveBracesLLVM) { 2586 assert(!NestedTooDeep.empty()); 2587 if (!NestedTooDeep.back()) 2588 markOptionalBraces(LeftBrace); 2589 } 2590 addUnwrappedLine(); 2591 } else { 2592 addUnwrappedLine(); 2593 ++Line->Level; 2594 parseStructuralElement(); 2595 --Line->Level; 2596 } 2597 2598 if (Style.RemoveBracesLLVM) 2599 NestedTooDeep.pop_back(); 2600 } 2601 2602 void UnwrappedLineParser::parseDoWhile() { 2603 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 2604 nextToken(); 2605 2606 keepAncestorBraces(); 2607 2608 if (FormatTok->Tok.is(tok::l_brace)) { 2609 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2610 parseBlock(); 2611 if (Style.BraceWrapping.BeforeWhile) 2612 addUnwrappedLine(); 2613 } else { 2614 addUnwrappedLine(); 2615 ++Line->Level; 2616 parseStructuralElement(); 2617 --Line->Level; 2618 } 2619 2620 if (Style.RemoveBracesLLVM) 2621 NestedTooDeep.pop_back(); 2622 2623 // FIXME: Add error handling. 2624 if (!FormatTok->Tok.is(tok::kw_while)) { 2625 addUnwrappedLine(); 2626 return; 2627 } 2628 2629 // If in Whitesmiths mode, the line with the while() needs to be indented 2630 // to the same level as the block. 2631 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2632 ++Line->Level; 2633 2634 nextToken(); 2635 parseStructuralElement(); 2636 } 2637 2638 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2639 nextToken(); 2640 unsigned OldLineLevel = Line->Level; 2641 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2642 --Line->Level; 2643 if (LeftAlignLabel) 2644 Line->Level = 0; 2645 2646 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2647 FormatTok->Tok.is(tok::l_brace)) { 2648 2649 CompoundStatementIndenter Indenter(this, Line->Level, 2650 Style.BraceWrapping.AfterCaseLabel, 2651 Style.BraceWrapping.IndentBraces); 2652 parseBlock(); 2653 if (FormatTok->Tok.is(tok::kw_break)) { 2654 if (Style.BraceWrapping.AfterControlStatement == 2655 FormatStyle::BWACS_Always) { 2656 addUnwrappedLine(); 2657 if (!Style.IndentCaseBlocks && 2658 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 2659 ++Line->Level; 2660 } 2661 } 2662 parseStructuralElement(); 2663 } 2664 addUnwrappedLine(); 2665 } else { 2666 if (FormatTok->is(tok::semi)) 2667 nextToken(); 2668 addUnwrappedLine(); 2669 } 2670 Line->Level = OldLineLevel; 2671 if (FormatTok->isNot(tok::l_brace)) { 2672 parseStructuralElement(); 2673 addUnwrappedLine(); 2674 } 2675 } 2676 2677 void UnwrappedLineParser::parseCaseLabel() { 2678 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 2679 2680 // FIXME: fix handling of complex expressions here. 2681 do { 2682 nextToken(); 2683 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2684 parseLabel(); 2685 } 2686 2687 void UnwrappedLineParser::parseSwitch() { 2688 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2689 nextToken(); 2690 if (FormatTok->Tok.is(tok::l_paren)) 2691 parseParens(); 2692 2693 keepAncestorBraces(); 2694 2695 if (FormatTok->Tok.is(tok::l_brace)) { 2696 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2697 parseBlock(); 2698 addUnwrappedLine(); 2699 } else { 2700 addUnwrappedLine(); 2701 ++Line->Level; 2702 parseStructuralElement(); 2703 --Line->Level; 2704 } 2705 2706 if (Style.RemoveBracesLLVM) 2707 NestedTooDeep.pop_back(); 2708 } 2709 2710 void UnwrappedLineParser::parseAccessSpecifier() { 2711 FormatToken *AccessSpecifierCandidate = FormatTok; 2712 nextToken(); 2713 // Understand Qt's slots. 2714 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2715 nextToken(); 2716 // Otherwise, we don't know what it is, and we'd better keep the next token. 2717 if (FormatTok->Tok.is(tok::colon)) { 2718 nextToken(); 2719 addUnwrappedLine(); 2720 } else if (!FormatTok->Tok.is(tok::coloncolon) && 2721 !std::binary_search(COperatorsFollowingVar.begin(), 2722 COperatorsFollowingVar.end(), 2723 FormatTok->Tok.getKind())) { 2724 // Not a variable name nor namespace name. 2725 addUnwrappedLine(); 2726 } else if (AccessSpecifierCandidate) { 2727 // Consider the access specifier to be a C identifier. 2728 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 2729 } 2730 } 2731 2732 void UnwrappedLineParser::parseConcept() { 2733 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected"); 2734 nextToken(); 2735 if (!FormatTok->Tok.is(tok::identifier)) 2736 return; 2737 nextToken(); 2738 if (!FormatTok->Tok.is(tok::equal)) 2739 return; 2740 nextToken(); 2741 if (FormatTok->Tok.is(tok::kw_requires)) { 2742 nextToken(); 2743 parseRequiresExpression(Line->Level); 2744 } else { 2745 parseConstraintExpression(Line->Level); 2746 } 2747 } 2748 2749 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) { 2750 // requires (R range) 2751 if (FormatTok->Tok.is(tok::l_paren)) { 2752 parseParens(); 2753 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2754 addUnwrappedLine(); 2755 --Line->Level; 2756 } 2757 } 2758 2759 if (FormatTok->Tok.is(tok::l_brace)) { 2760 if (Style.BraceWrapping.AfterFunction) 2761 addUnwrappedLine(); 2762 FormatTok->setType(TT_FunctionLBrace); 2763 parseBlock(); 2764 addUnwrappedLine(); 2765 } else { 2766 parseConstraintExpression(OriginalLevel); 2767 } 2768 } 2769 2770 void UnwrappedLineParser::parseConstraintExpression( 2771 unsigned int OriginalLevel) { 2772 // requires Id<T> && Id<T> || Id<T> 2773 while ( 2774 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) { 2775 nextToken(); 2776 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less, 2777 tok::greater, tok::comma, tok::ellipsis)) { 2778 if (FormatTok->Tok.is(tok::less)) { 2779 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2780 /*ClosingBraceKind=*/tok::greater); 2781 continue; 2782 } 2783 nextToken(); 2784 } 2785 if (FormatTok->Tok.is(tok::kw_requires)) { 2786 parseRequiresExpression(OriginalLevel); 2787 } 2788 if (FormatTok->Tok.is(tok::less)) { 2789 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2790 /*ClosingBraceKind=*/tok::greater); 2791 } 2792 2793 if (FormatTok->Tok.is(tok::l_paren)) { 2794 parseParens(); 2795 } 2796 if (FormatTok->Tok.is(tok::l_brace)) { 2797 if (Style.BraceWrapping.AfterFunction) 2798 addUnwrappedLine(); 2799 FormatTok->setType(TT_FunctionLBrace); 2800 parseBlock(); 2801 } 2802 if (FormatTok->Tok.is(tok::semi)) { 2803 // Eat any trailing semi. 2804 nextToken(); 2805 addUnwrappedLine(); 2806 } 2807 if (FormatTok->Tok.is(tok::colon)) { 2808 return; 2809 } 2810 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) { 2811 if (FormatTok->Previous && 2812 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires, 2813 tok::coloncolon)) { 2814 addUnwrappedLine(); 2815 } 2816 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2817 --Line->Level; 2818 } 2819 break; 2820 } else { 2821 FormatTok->setType(TT_ConstraintJunctions); 2822 } 2823 2824 nextToken(); 2825 } 2826 } 2827 2828 void UnwrappedLineParser::parseRequires() { 2829 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected"); 2830 2831 unsigned OriginalLevel = Line->Level; 2832 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) { 2833 addUnwrappedLine(); 2834 if (Style.IndentRequires) { 2835 ++Line->Level; 2836 } 2837 } 2838 nextToken(); 2839 2840 parseRequiresExpression(OriginalLevel); 2841 } 2842 2843 bool UnwrappedLineParser::parseEnum() { 2844 const FormatToken &InitialToken = *FormatTok; 2845 2846 // Won't be 'enum' for NS_ENUMs. 2847 if (FormatTok->Tok.is(tok::kw_enum)) 2848 nextToken(); 2849 2850 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2851 // declarations. An "enum" keyword followed by a colon would be a syntax 2852 // error and thus assume it is just an identifier. 2853 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 2854 return false; 2855 2856 // In protobuf, "enum" can be used as a field name. 2857 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2858 return false; 2859 2860 // Eat up enum class ... 2861 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2862 nextToken(); 2863 2864 while (FormatTok->Tok.getIdentifierInfo() || 2865 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2866 tok::greater, tok::comma, tok::question)) { 2867 nextToken(); 2868 // We can have macros or attributes in between 'enum' and the enum name. 2869 if (FormatTok->is(tok::l_paren)) 2870 parseParens(); 2871 if (FormatTok->is(tok::identifier)) { 2872 nextToken(); 2873 // If there are two identifiers in a row, this is likely an elaborate 2874 // return type. In Java, this can be "implements", etc. 2875 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2876 return false; 2877 } 2878 } 2879 2880 // Just a declaration or something is wrong. 2881 if (FormatTok->isNot(tok::l_brace)) 2882 return true; 2883 FormatTok->setType(TT_RecordLBrace); 2884 FormatTok->setBlockKind(BK_Block); 2885 2886 if (Style.Language == FormatStyle::LK_Java) { 2887 // Java enums are different. 2888 parseJavaEnumBody(); 2889 return true; 2890 } 2891 if (Style.Language == FormatStyle::LK_Proto) { 2892 parseBlock(/*MustBeDeclaration=*/true); 2893 return true; 2894 } 2895 2896 if (!Style.AllowShortEnumsOnASingleLine && 2897 ShouldBreakBeforeBrace(Style, InitialToken)) 2898 addUnwrappedLine(); 2899 // Parse enum body. 2900 nextToken(); 2901 if (!Style.AllowShortEnumsOnASingleLine) { 2902 addUnwrappedLine(); 2903 Line->Level += 1; 2904 } 2905 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 2906 /*IsEnum=*/true); 2907 if (!Style.AllowShortEnumsOnASingleLine) 2908 Line->Level -= 1; 2909 if (HasError) { 2910 if (FormatTok->is(tok::semi)) 2911 nextToken(); 2912 addUnwrappedLine(); 2913 } 2914 return true; 2915 2916 // There is no addUnwrappedLine() here so that we fall through to parsing a 2917 // structural element afterwards. Thus, in "enum A {} n, m;", 2918 // "} n, m;" will end up in one unwrapped line. 2919 } 2920 2921 bool UnwrappedLineParser::parseStructLike() { 2922 // parseRecord falls through and does not yet add an unwrapped line as a 2923 // record declaration or definition can start a structural element. 2924 parseRecord(); 2925 // This does not apply to Java, JavaScript and C#. 2926 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 2927 Style.isCSharp()) { 2928 if (FormatTok->is(tok::semi)) 2929 nextToken(); 2930 addUnwrappedLine(); 2931 return true; 2932 } 2933 return false; 2934 } 2935 2936 namespace { 2937 // A class used to set and restore the Token position when peeking 2938 // ahead in the token source. 2939 class ScopedTokenPosition { 2940 unsigned StoredPosition; 2941 FormatTokenSource *Tokens; 2942 2943 public: 2944 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 2945 assert(Tokens && "Tokens expected to not be null"); 2946 StoredPosition = Tokens->getPosition(); 2947 } 2948 2949 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 2950 }; 2951 } // namespace 2952 2953 // Look to see if we have [[ by looking ahead, if 2954 // its not then rewind to the original position. 2955 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 2956 ScopedTokenPosition AutoPosition(Tokens); 2957 FormatToken *Tok = Tokens->getNextToken(); 2958 // We already read the first [ check for the second. 2959 if (!Tok->is(tok::l_square)) { 2960 return false; 2961 } 2962 // Double check that the attribute is just something 2963 // fairly simple. 2964 while (Tok->isNot(tok::eof)) { 2965 if (Tok->is(tok::r_square)) { 2966 break; 2967 } 2968 Tok = Tokens->getNextToken(); 2969 } 2970 if (Tok->is(tok::eof)) 2971 return false; 2972 Tok = Tokens->getNextToken(); 2973 if (!Tok->is(tok::r_square)) { 2974 return false; 2975 } 2976 Tok = Tokens->getNextToken(); 2977 if (Tok->is(tok::semi)) { 2978 return false; 2979 } 2980 return true; 2981 } 2982 2983 void UnwrappedLineParser::parseJavaEnumBody() { 2984 // Determine whether the enum is simple, i.e. does not have a semicolon or 2985 // constants with class bodies. Simple enums can be formatted like braced 2986 // lists, contracted to a single line, etc. 2987 unsigned StoredPosition = Tokens->getPosition(); 2988 bool IsSimple = true; 2989 FormatToken *Tok = Tokens->getNextToken(); 2990 while (!Tok->is(tok::eof)) { 2991 if (Tok->is(tok::r_brace)) 2992 break; 2993 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2994 IsSimple = false; 2995 break; 2996 } 2997 // FIXME: This will also mark enums with braces in the arguments to enum 2998 // constants as "not simple". This is probably fine in practice, though. 2999 Tok = Tokens->getNextToken(); 3000 } 3001 FormatTok = Tokens->setPosition(StoredPosition); 3002 3003 if (IsSimple) { 3004 nextToken(); 3005 parseBracedList(); 3006 addUnwrappedLine(); 3007 return; 3008 } 3009 3010 // Parse the body of a more complex enum. 3011 // First add a line for everything up to the "{". 3012 nextToken(); 3013 addUnwrappedLine(); 3014 ++Line->Level; 3015 3016 // Parse the enum constants. 3017 while (FormatTok) { 3018 if (FormatTok->is(tok::l_brace)) { 3019 // Parse the constant's class body. 3020 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3021 /*MunchSemi=*/false); 3022 } else if (FormatTok->is(tok::l_paren)) { 3023 parseParens(); 3024 } else if (FormatTok->is(tok::comma)) { 3025 nextToken(); 3026 addUnwrappedLine(); 3027 } else if (FormatTok->is(tok::semi)) { 3028 nextToken(); 3029 addUnwrappedLine(); 3030 break; 3031 } else if (FormatTok->is(tok::r_brace)) { 3032 addUnwrappedLine(); 3033 break; 3034 } else { 3035 nextToken(); 3036 } 3037 } 3038 3039 // Parse the class body after the enum's ";" if any. 3040 parseLevel(/*HasOpeningBrace=*/true); 3041 nextToken(); 3042 --Line->Level; 3043 addUnwrappedLine(); 3044 } 3045 3046 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3047 const FormatToken &InitialToken = *FormatTok; 3048 nextToken(); 3049 3050 // The actual identifier can be a nested name specifier, and in macros 3051 // it is often token-pasted. 3052 // An [[attribute]] can be before the identifier. 3053 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3054 tok::kw___attribute, tok::kw___declspec, 3055 tok::kw_alignas, tok::l_square, tok::r_square) || 3056 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3057 FormatTok->isOneOf(tok::period, tok::comma))) { 3058 if (Style.isJavaScript() && 3059 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3060 // JavaScript/TypeScript supports inline object types in 3061 // extends/implements positions: 3062 // class Foo implements {bar: number} { } 3063 nextToken(); 3064 if (FormatTok->is(tok::l_brace)) { 3065 tryToParseBracedList(); 3066 continue; 3067 } 3068 } 3069 bool IsNonMacroIdentifier = 3070 FormatTok->is(tok::identifier) && 3071 FormatTok->TokenText != FormatTok->TokenText.upper(); 3072 nextToken(); 3073 // We can have macros or attributes in between 'class' and the class name. 3074 if (!IsNonMacroIdentifier) { 3075 if (FormatTok->Tok.is(tok::l_paren)) { 3076 parseParens(); 3077 } else if (FormatTok->is(TT_AttributeSquare)) { 3078 parseSquare(); 3079 // Consume the closing TT_AttributeSquare. 3080 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3081 nextToken(); 3082 } 3083 } 3084 } 3085 3086 // Note that parsing away template declarations here leads to incorrectly 3087 // accepting function declarations as record declarations. 3088 // In general, we cannot solve this problem. Consider: 3089 // class A<int> B() {} 3090 // which can be a function definition or a class definition when B() is a 3091 // macro. If we find enough real-world cases where this is a problem, we 3092 // can parse for the 'template' keyword in the beginning of the statement, 3093 // and thus rule out the record production in case there is no template 3094 // (this would still leave us with an ambiguity between template function 3095 // and class declarations). 3096 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3097 while (!eof()) { 3098 if (FormatTok->is(tok::l_brace)) { 3099 calculateBraceTypes(/*ExpectClassBody=*/true); 3100 if (!tryToParseBracedList()) 3101 break; 3102 } 3103 if (FormatTok->is(tok::l_square)) { 3104 FormatToken *Previous = FormatTok->Previous; 3105 if (!Previous || 3106 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3107 // Don't try parsing a lambda if we had a closing parenthesis before, 3108 // it was probably a pointer to an array: int (*)[]. 3109 if (!tryToParseLambda()) 3110 break; 3111 } 3112 } 3113 if (FormatTok->Tok.is(tok::semi)) 3114 return; 3115 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3116 addUnwrappedLine(); 3117 nextToken(); 3118 parseCSharpGenericTypeConstraint(); 3119 break; 3120 } 3121 nextToken(); 3122 } 3123 } 3124 if (FormatTok->Tok.is(tok::l_brace)) { 3125 FormatTok->setType(TT_RecordLBrace); 3126 if (ParseAsExpr) { 3127 parseChildBlock(); 3128 } else { 3129 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3130 addUnwrappedLine(); 3131 3132 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3133 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3134 } 3135 } 3136 // There is no addUnwrappedLine() here so that we fall through to parsing a 3137 // structural element afterwards. Thus, in "class A {} n, m;", 3138 // "} n, m;" will end up in one unwrapped line. 3139 } 3140 3141 void UnwrappedLineParser::parseObjCMethod() { 3142 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 3143 "'(' or identifier expected."); 3144 do { 3145 if (FormatTok->Tok.is(tok::semi)) { 3146 nextToken(); 3147 addUnwrappedLine(); 3148 return; 3149 } else if (FormatTok->Tok.is(tok::l_brace)) { 3150 if (Style.BraceWrapping.AfterFunction) 3151 addUnwrappedLine(); 3152 parseBlock(); 3153 addUnwrappedLine(); 3154 return; 3155 } else { 3156 nextToken(); 3157 } 3158 } while (!eof()); 3159 } 3160 3161 void UnwrappedLineParser::parseObjCProtocolList() { 3162 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 3163 do { 3164 nextToken(); 3165 // Early exit in case someone forgot a close angle. 3166 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3167 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3168 return; 3169 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 3170 nextToken(); // Skip '>'. 3171 } 3172 3173 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3174 do { 3175 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 3176 nextToken(); 3177 addUnwrappedLine(); 3178 break; 3179 } 3180 if (FormatTok->is(tok::l_brace)) { 3181 parseBlock(); 3182 // In ObjC interfaces, nothing should be following the "}". 3183 addUnwrappedLine(); 3184 } else if (FormatTok->is(tok::r_brace)) { 3185 // Ignore stray "}". parseStructuralElement doesn't consume them. 3186 nextToken(); 3187 addUnwrappedLine(); 3188 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3189 nextToken(); 3190 parseObjCMethod(); 3191 } else { 3192 parseStructuralElement(); 3193 } 3194 } while (!eof()); 3195 } 3196 3197 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3198 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3199 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3200 nextToken(); 3201 nextToken(); // interface name 3202 3203 // @interface can be followed by a lightweight generic 3204 // specialization list, then either a base class or a category. 3205 if (FormatTok->Tok.is(tok::less)) { 3206 parseObjCLightweightGenerics(); 3207 } 3208 if (FormatTok->Tok.is(tok::colon)) { 3209 nextToken(); 3210 nextToken(); // base class name 3211 // The base class can also have lightweight generics applied to it. 3212 if (FormatTok->Tok.is(tok::less)) { 3213 parseObjCLightweightGenerics(); 3214 } 3215 } else if (FormatTok->Tok.is(tok::l_paren)) 3216 // Skip category, if present. 3217 parseParens(); 3218 3219 if (FormatTok->Tok.is(tok::less)) 3220 parseObjCProtocolList(); 3221 3222 if (FormatTok->Tok.is(tok::l_brace)) { 3223 if (Style.BraceWrapping.AfterObjCDeclaration) 3224 addUnwrappedLine(); 3225 parseBlock(/*MustBeDeclaration=*/true); 3226 } 3227 3228 // With instance variables, this puts '}' on its own line. Without instance 3229 // variables, this ends the @interface line. 3230 addUnwrappedLine(); 3231 3232 parseObjCUntilAtEnd(); 3233 } 3234 3235 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3236 assert(FormatTok->Tok.is(tok::less)); 3237 // Unlike protocol lists, generic parameterizations support 3238 // nested angles: 3239 // 3240 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3241 // NSObject <NSCopying, NSSecureCoding> 3242 // 3243 // so we need to count how many open angles we have left. 3244 unsigned NumOpenAngles = 1; 3245 do { 3246 nextToken(); 3247 // Early exit in case someone forgot a close angle. 3248 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3249 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3250 break; 3251 if (FormatTok->Tok.is(tok::less)) 3252 ++NumOpenAngles; 3253 else if (FormatTok->Tok.is(tok::greater)) { 3254 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3255 --NumOpenAngles; 3256 } 3257 } while (!eof() && NumOpenAngles != 0); 3258 nextToken(); // Skip '>'. 3259 } 3260 3261 // Returns true for the declaration/definition form of @protocol, 3262 // false for the expression form. 3263 bool UnwrappedLineParser::parseObjCProtocol() { 3264 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3265 nextToken(); 3266 3267 if (FormatTok->is(tok::l_paren)) 3268 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3269 return false; 3270 3271 // The definition/declaration form, 3272 // @protocol Foo 3273 // - (int)someMethod; 3274 // @end 3275 3276 nextToken(); // protocol name 3277 3278 if (FormatTok->Tok.is(tok::less)) 3279 parseObjCProtocolList(); 3280 3281 // Check for protocol declaration. 3282 if (FormatTok->Tok.is(tok::semi)) { 3283 nextToken(); 3284 addUnwrappedLine(); 3285 return true; 3286 } 3287 3288 addUnwrappedLine(); 3289 parseObjCUntilAtEnd(); 3290 return true; 3291 } 3292 3293 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3294 bool IsImport = FormatTok->is(Keywords.kw_import); 3295 assert(IsImport || FormatTok->is(tok::kw_export)); 3296 nextToken(); 3297 3298 // Consume the "default" in "export default class/function". 3299 if (FormatTok->is(tok::kw_default)) 3300 nextToken(); 3301 3302 // Consume "async function", "function" and "default function", so that these 3303 // get parsed as free-standing JS functions, i.e. do not require a trailing 3304 // semicolon. 3305 if (FormatTok->is(Keywords.kw_async)) 3306 nextToken(); 3307 if (FormatTok->is(Keywords.kw_function)) { 3308 nextToken(); 3309 return; 3310 } 3311 3312 // For imports, `export *`, `export {...}`, consume the rest of the line up 3313 // to the terminating `;`. For everything else, just return and continue 3314 // parsing the structural element, i.e. the declaration or expression for 3315 // `export default`. 3316 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3317 !FormatTok->isStringLiteral()) 3318 return; 3319 3320 while (!eof()) { 3321 if (FormatTok->is(tok::semi)) 3322 return; 3323 if (Line->Tokens.empty()) { 3324 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3325 // import statement should terminate. 3326 return; 3327 } 3328 if (FormatTok->is(tok::l_brace)) { 3329 FormatTok->setBlockKind(BK_Block); 3330 nextToken(); 3331 parseBracedList(); 3332 } else { 3333 nextToken(); 3334 } 3335 } 3336 } 3337 3338 void UnwrappedLineParser::parseStatementMacro() { 3339 nextToken(); 3340 if (FormatTok->is(tok::l_paren)) 3341 parseParens(); 3342 if (FormatTok->is(tok::semi)) 3343 nextToken(); 3344 addUnwrappedLine(); 3345 } 3346 3347 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3348 StringRef Prefix = "") { 3349 llvm::dbgs() << Prefix << "Line(" << Line.Level 3350 << ", FSC=" << Line.FirstStartColumn << ")" 3351 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3352 for (const auto &Node : Line.Tokens) { 3353 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3354 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3355 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3356 } 3357 for (const auto &Node : Line.Tokens) 3358 for (const auto &ChildNode : Node.Children) 3359 printDebugInfo(ChildNode, "\nChild: "); 3360 3361 llvm::dbgs() << "\n"; 3362 } 3363 3364 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3365 if (Line->Tokens.empty()) 3366 return; 3367 LLVM_DEBUG({ 3368 if (CurrentLines == &Lines) 3369 printDebugInfo(*Line); 3370 }); 3371 3372 // If this line closes a block when in Whitesmiths mode, remember that 3373 // information so that the level can be decreased after the line is added. 3374 // This has to happen after the addition of the line since the line itself 3375 // needs to be indented. 3376 bool ClosesWhitesmithsBlock = 3377 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3378 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3379 3380 CurrentLines->push_back(std::move(*Line)); 3381 Line->Tokens.clear(); 3382 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3383 Line->FirstStartColumn = 0; 3384 3385 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3386 --Line->Level; 3387 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3388 CurrentLines->append( 3389 std::make_move_iterator(PreprocessorDirectives.begin()), 3390 std::make_move_iterator(PreprocessorDirectives.end())); 3391 PreprocessorDirectives.clear(); 3392 } 3393 // Disconnect the current token from the last token on the previous line. 3394 FormatTok->Previous = nullptr; 3395 } 3396 3397 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 3398 3399 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3400 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3401 FormatTok.NewlinesBefore > 0; 3402 } 3403 3404 // Checks if \p FormatTok is a line comment that continues the line comment 3405 // section on \p Line. 3406 static bool 3407 continuesLineCommentSection(const FormatToken &FormatTok, 3408 const UnwrappedLine &Line, 3409 const llvm::Regex &CommentPragmasRegex) { 3410 if (Line.Tokens.empty()) 3411 return false; 3412 3413 StringRef IndentContent = FormatTok.TokenText; 3414 if (FormatTok.TokenText.startswith("//") || 3415 FormatTok.TokenText.startswith("/*")) 3416 IndentContent = FormatTok.TokenText.substr(2); 3417 if (CommentPragmasRegex.match(IndentContent)) 3418 return false; 3419 3420 // If Line starts with a line comment, then FormatTok continues the comment 3421 // section if its original column is greater or equal to the original start 3422 // column of the line. 3423 // 3424 // Define the min column token of a line as follows: if a line ends in '{' or 3425 // contains a '{' followed by a line comment, then the min column token is 3426 // that '{'. Otherwise, the min column token of the line is the first token of 3427 // the line. 3428 // 3429 // If Line starts with a token other than a line comment, then FormatTok 3430 // continues the comment section if its original column is greater than the 3431 // original start column of the min column token of the line. 3432 // 3433 // For example, the second line comment continues the first in these cases: 3434 // 3435 // // first line 3436 // // second line 3437 // 3438 // and: 3439 // 3440 // // first line 3441 // // second line 3442 // 3443 // and: 3444 // 3445 // int i; // first line 3446 // // second line 3447 // 3448 // and: 3449 // 3450 // do { // first line 3451 // // second line 3452 // int i; 3453 // } while (true); 3454 // 3455 // and: 3456 // 3457 // enum { 3458 // a, // first line 3459 // // second line 3460 // b 3461 // }; 3462 // 3463 // The second line comment doesn't continue the first in these cases: 3464 // 3465 // // first line 3466 // // second line 3467 // 3468 // and: 3469 // 3470 // int i; // first line 3471 // // second line 3472 // 3473 // and: 3474 // 3475 // do { // first line 3476 // // second line 3477 // int i; 3478 // } while (true); 3479 // 3480 // and: 3481 // 3482 // enum { 3483 // a, // first line 3484 // // second line 3485 // }; 3486 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3487 3488 // Scan for '{//'. If found, use the column of '{' as a min column for line 3489 // comment section continuation. 3490 const FormatToken *PreviousToken = nullptr; 3491 for (const UnwrappedLineNode &Node : Line.Tokens) { 3492 if (PreviousToken && PreviousToken->is(tok::l_brace) && 3493 isLineComment(*Node.Tok)) { 3494 MinColumnToken = PreviousToken; 3495 break; 3496 } 3497 PreviousToken = Node.Tok; 3498 3499 // Grab the last newline preceding a token in this unwrapped line. 3500 if (Node.Tok->NewlinesBefore > 0) { 3501 MinColumnToken = Node.Tok; 3502 } 3503 } 3504 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 3505 MinColumnToken = PreviousToken; 3506 } 3507 3508 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 3509 MinColumnToken); 3510 } 3511 3512 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 3513 bool JustComments = Line->Tokens.empty(); 3514 for (FormatToken *Tok : CommentsBeforeNextToken) { 3515 // Line comments that belong to the same line comment section are put on the 3516 // same line since later we might want to reflow content between them. 3517 // Additional fine-grained breaking of line comment sections is controlled 3518 // by the class BreakableLineCommentSection in case it is desirable to keep 3519 // several line comment sections in the same unwrapped line. 3520 // 3521 // FIXME: Consider putting separate line comment sections as children to the 3522 // unwrapped line instead. 3523 Tok->ContinuesLineCommentSection = 3524 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 3525 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 3526 addUnwrappedLine(); 3527 pushToken(Tok); 3528 } 3529 if (NewlineBeforeNext && JustComments) 3530 addUnwrappedLine(); 3531 CommentsBeforeNextToken.clear(); 3532 } 3533 3534 void UnwrappedLineParser::nextToken(int LevelDifference) { 3535 if (eof()) 3536 return; 3537 flushComments(isOnNewLine(*FormatTok)); 3538 pushToken(FormatTok); 3539 FormatToken *Previous = FormatTok; 3540 if (!Style.isJavaScript()) 3541 readToken(LevelDifference); 3542 else 3543 readTokenWithJavaScriptASI(); 3544 FormatTok->Previous = Previous; 3545 } 3546 3547 void UnwrappedLineParser::distributeComments( 3548 const SmallVectorImpl<FormatToken *> &Comments, 3549 const FormatToken *NextTok) { 3550 // Whether or not a line comment token continues a line is controlled by 3551 // the method continuesLineCommentSection, with the following caveat: 3552 // 3553 // Define a trail of Comments to be a nonempty proper postfix of Comments such 3554 // that each comment line from the trail is aligned with the next token, if 3555 // the next token exists. If a trail exists, the beginning of the maximal 3556 // trail is marked as a start of a new comment section. 3557 // 3558 // For example in this code: 3559 // 3560 // int a; // line about a 3561 // // line 1 about b 3562 // // line 2 about b 3563 // int b; 3564 // 3565 // the two lines about b form a maximal trail, so there are two sections, the 3566 // first one consisting of the single comment "// line about a" and the 3567 // second one consisting of the next two comments. 3568 if (Comments.empty()) 3569 return; 3570 bool ShouldPushCommentsInCurrentLine = true; 3571 bool HasTrailAlignedWithNextToken = false; 3572 unsigned StartOfTrailAlignedWithNextToken = 0; 3573 if (NextTok) { 3574 // We are skipping the first element intentionally. 3575 for (unsigned i = Comments.size() - 1; i > 0; --i) { 3576 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 3577 HasTrailAlignedWithNextToken = true; 3578 StartOfTrailAlignedWithNextToken = i; 3579 } 3580 } 3581 } 3582 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 3583 FormatToken *FormatTok = Comments[i]; 3584 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 3585 FormatTok->ContinuesLineCommentSection = false; 3586 } else { 3587 FormatTok->ContinuesLineCommentSection = 3588 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 3589 } 3590 if (!FormatTok->ContinuesLineCommentSection && 3591 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 3592 ShouldPushCommentsInCurrentLine = false; 3593 } 3594 if (ShouldPushCommentsInCurrentLine) { 3595 pushToken(FormatTok); 3596 } else { 3597 CommentsBeforeNextToken.push_back(FormatTok); 3598 } 3599 } 3600 } 3601 3602 void UnwrappedLineParser::readToken(int LevelDifference) { 3603 SmallVector<FormatToken *, 1> Comments; 3604 do { 3605 FormatTok = Tokens->getNextToken(); 3606 assert(FormatTok); 3607 while (FormatTok->getType() == TT_ConflictStart || 3608 FormatTok->getType() == TT_ConflictEnd || 3609 FormatTok->getType() == TT_ConflictAlternative) { 3610 if (FormatTok->getType() == TT_ConflictStart) { 3611 conditionalCompilationStart(/*Unreachable=*/false); 3612 } else if (FormatTok->getType() == TT_ConflictAlternative) { 3613 conditionalCompilationAlternative(); 3614 } else if (FormatTok->getType() == TT_ConflictEnd) { 3615 conditionalCompilationEnd(); 3616 } 3617 FormatTok = Tokens->getNextToken(); 3618 FormatTok->MustBreakBefore = true; 3619 } 3620 3621 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 3622 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 3623 distributeComments(Comments, FormatTok); 3624 Comments.clear(); 3625 // If there is an unfinished unwrapped line, we flush the preprocessor 3626 // directives only after that unwrapped line was finished later. 3627 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 3628 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 3629 assert((LevelDifference >= 0 || 3630 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 3631 "LevelDifference makes Line->Level negative"); 3632 Line->Level += LevelDifference; 3633 // Comments stored before the preprocessor directive need to be output 3634 // before the preprocessor directive, at the same level as the 3635 // preprocessor directive, as we consider them to apply to the directive. 3636 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 3637 PPBranchLevel > 0) 3638 Line->Level += PPBranchLevel; 3639 flushComments(isOnNewLine(*FormatTok)); 3640 parsePPDirective(); 3641 } 3642 3643 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 3644 !Line->InPPDirective) { 3645 continue; 3646 } 3647 3648 if (!FormatTok->Tok.is(tok::comment)) { 3649 distributeComments(Comments, FormatTok); 3650 Comments.clear(); 3651 return; 3652 } 3653 3654 Comments.push_back(FormatTok); 3655 } while (!eof()); 3656 3657 distributeComments(Comments, nullptr); 3658 Comments.clear(); 3659 } 3660 3661 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 3662 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 3663 if (MustBreakBeforeNextToken) { 3664 Line->Tokens.back().Tok->MustBreakBefore = true; 3665 MustBreakBeforeNextToken = false; 3666 } 3667 } 3668 3669 } // end namespace format 3670 } // end namespace clang 3671