1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = llvm::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : CompoundStatementIndenter(Parser, LineLevel, 176 Style.BraceWrapping.AfterControlStatement, 177 Style.BraceWrapping.IndentBraces) { 178 } 179 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 180 bool WrapBrace, bool IndentBrace) 181 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 182 if (WrapBrace) 183 Parser->addUnwrappedLine(); 184 if (IndentBrace) 185 ++LineLevel; 186 } 187 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 188 189 private: 190 unsigned &LineLevel; 191 unsigned OldLineLevel; 192 }; 193 194 namespace { 195 196 class IndexedTokenSource : public FormatTokenSource { 197 public: 198 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 199 : Tokens(Tokens), Position(-1) {} 200 201 FormatToken *getNextToken() override { 202 ++Position; 203 return Tokens[Position]; 204 } 205 206 unsigned getPosition() override { 207 assert(Position >= 0); 208 return Position; 209 } 210 211 FormatToken *setPosition(unsigned P) override { 212 Position = P; 213 return Tokens[Position]; 214 } 215 216 void reset() { Position = -1; } 217 218 private: 219 ArrayRef<FormatToken *> Tokens; 220 int Position; 221 }; 222 223 } // end anonymous namespace 224 225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 226 const AdditionalKeywords &Keywords, 227 unsigned FirstStartColumn, 228 ArrayRef<FormatToken *> Tokens, 229 UnwrappedLineConsumer &Callback) 230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 231 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 234 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 235 ? IG_Rejected 236 : IG_Inited), 237 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 238 239 void UnwrappedLineParser::reset() { 240 PPBranchLevel = -1; 241 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 242 ? IG_Rejected 243 : IG_Inited; 244 IncludeGuardToken = nullptr; 245 Line.reset(new UnwrappedLine); 246 CommentsBeforeNextToken.clear(); 247 FormatTok = nullptr; 248 MustBreakBeforeNextToken = false; 249 PreprocessorDirectives.clear(); 250 CurrentLines = &Lines; 251 DeclarationScopeStack.clear(); 252 PPStack.clear(); 253 Line->FirstStartColumn = FirstStartColumn; 254 } 255 256 void UnwrappedLineParser::parse() { 257 IndexedTokenSource TokenSource(AllTokens); 258 Line->FirstStartColumn = FirstStartColumn; 259 do { 260 LLVM_DEBUG(llvm::dbgs() << "----\n"); 261 reset(); 262 Tokens = &TokenSource; 263 TokenSource.reset(); 264 265 readToken(); 266 parseFile(); 267 268 // If we found an include guard then all preprocessor directives (other than 269 // the guard) are over-indented by one. 270 if (IncludeGuard == IG_Found) 271 for (auto &Line : Lines) 272 if (Line.InPPDirective && Line.Level > 0) 273 --Line.Level; 274 275 // Create line with eof token. 276 pushToken(FormatTok); 277 addUnwrappedLine(); 278 279 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 280 E = Lines.end(); 281 I != E; ++I) { 282 Callback.consumeUnwrappedLine(*I); 283 } 284 Callback.finishRun(); 285 Lines.clear(); 286 while (!PPLevelBranchIndex.empty() && 287 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 288 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 289 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 290 } 291 if (!PPLevelBranchIndex.empty()) { 292 ++PPLevelBranchIndex.back(); 293 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 294 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 295 } 296 } while (!PPLevelBranchIndex.empty()); 297 } 298 299 void UnwrappedLineParser::parseFile() { 300 // The top-level context in a file always has declarations, except for pre- 301 // processor directives and JavaScript files. 302 bool MustBeDeclaration = 303 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 304 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 305 MustBeDeclaration); 306 if (Style.Language == FormatStyle::LK_TextProto) 307 parseBracedList(); 308 else 309 parseLevel(/*HasOpeningBrace=*/false); 310 // Make sure to format the remaining tokens. 311 // 312 // LK_TextProto is special since its top-level is parsed as the body of a 313 // braced list, which does not necessarily have natural line separators such 314 // as a semicolon. Comments after the last entry that have been determined to 315 // not belong to that line, as in: 316 // key: value 317 // // endfile comment 318 // do not have a chance to be put on a line of their own until this point. 319 // Here we add this newline before end-of-file comments. 320 if (Style.Language == FormatStyle::LK_TextProto && 321 !CommentsBeforeNextToken.empty()) 322 addUnwrappedLine(); 323 flushComments(true); 324 addUnwrappedLine(); 325 } 326 327 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 328 bool SwitchLabelEncountered = false; 329 do { 330 tok::TokenKind kind = FormatTok->Tok.getKind(); 331 if (FormatTok->Type == TT_MacroBlockBegin) { 332 kind = tok::l_brace; 333 } else if (FormatTok->Type == TT_MacroBlockEnd) { 334 kind = tok::r_brace; 335 } 336 337 switch (kind) { 338 case tok::comment: 339 nextToken(); 340 addUnwrappedLine(); 341 break; 342 case tok::l_brace: 343 // FIXME: Add parameter whether this can happen - if this happens, we must 344 // be in a non-declaration context. 345 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 346 continue; 347 parseBlock(/*MustBeDeclaration=*/false); 348 addUnwrappedLine(); 349 break; 350 case tok::r_brace: 351 if (HasOpeningBrace) 352 return; 353 nextToken(); 354 addUnwrappedLine(); 355 break; 356 case tok::kw_default: { 357 unsigned StoredPosition = Tokens->getPosition(); 358 FormatToken *Next; 359 do { 360 Next = Tokens->getNextToken(); 361 } while (Next && Next->is(tok::comment)); 362 FormatTok = Tokens->setPosition(StoredPosition); 363 if (Next && Next->isNot(tok::colon)) { 364 // default not followed by ':' is not a case label; treat it like 365 // an identifier. 366 parseStructuralElement(); 367 break; 368 } 369 // Else, if it is 'default:', fall through to the case handling. 370 LLVM_FALLTHROUGH; 371 } 372 case tok::kw_case: 373 if (Style.Language == FormatStyle::LK_JavaScript && 374 Line->MustBeDeclaration) { 375 // A 'case: string' style field declaration. 376 parseStructuralElement(); 377 break; 378 } 379 if (!SwitchLabelEncountered && 380 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 381 ++Line->Level; 382 SwitchLabelEncountered = true; 383 parseStructuralElement(); 384 break; 385 default: 386 parseStructuralElement(); 387 break; 388 } 389 } while (!eof()); 390 } 391 392 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 393 // We'll parse forward through the tokens until we hit 394 // a closing brace or eof - note that getNextToken() will 395 // parse macros, so this will magically work inside macro 396 // definitions, too. 397 unsigned StoredPosition = Tokens->getPosition(); 398 FormatToken *Tok = FormatTok; 399 const FormatToken *PrevTok = Tok->Previous; 400 // Keep a stack of positions of lbrace tokens. We will 401 // update information about whether an lbrace starts a 402 // braced init list or a different block during the loop. 403 SmallVector<FormatToken *, 8> LBraceStack; 404 assert(Tok->Tok.is(tok::l_brace)); 405 do { 406 // Get next non-comment token. 407 FormatToken *NextTok; 408 unsigned ReadTokens = 0; 409 do { 410 NextTok = Tokens->getNextToken(); 411 ++ReadTokens; 412 } while (NextTok->is(tok::comment)); 413 414 switch (Tok->Tok.getKind()) { 415 case tok::l_brace: 416 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 417 if (PrevTok->isOneOf(tok::colon, tok::less)) 418 // A ':' indicates this code is in a type, or a braced list 419 // following a label in an object literal ({a: {b: 1}}). 420 // A '<' could be an object used in a comparison, but that is nonsense 421 // code (can never return true), so more likely it is a generic type 422 // argument (`X<{a: string; b: number}>`). 423 // The code below could be confused by semicolons between the 424 // individual members in a type member list, which would normally 425 // trigger BK_Block. In both cases, this must be parsed as an inline 426 // braced init. 427 Tok->BlockKind = BK_BracedInit; 428 else if (PrevTok->is(tok::r_paren)) 429 // `) { }` can only occur in function or method declarations in JS. 430 Tok->BlockKind = BK_Block; 431 } else { 432 Tok->BlockKind = BK_Unknown; 433 } 434 LBraceStack.push_back(Tok); 435 break; 436 case tok::r_brace: 437 if (LBraceStack.empty()) 438 break; 439 if (LBraceStack.back()->BlockKind == BK_Unknown) { 440 bool ProbablyBracedList = false; 441 if (Style.Language == FormatStyle::LK_Proto) { 442 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 443 } else { 444 // Using OriginalColumn to distinguish between ObjC methods and 445 // binary operators is a bit hacky. 446 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 447 NextTok->OriginalColumn == 0; 448 449 // If there is a comma, semicolon or right paren after the closing 450 // brace, we assume this is a braced initializer list. Note that 451 // regardless how we mark inner braces here, we will overwrite the 452 // BlockKind later if we parse a braced list (where all blocks 453 // inside are by default braced lists), or when we explicitly detect 454 // blocks (for example while parsing lambdas). 455 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 456 // braced list in JS. 457 ProbablyBracedList = 458 (Style.Language == FormatStyle::LK_JavaScript && 459 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 460 Keywords.kw_as)) || 461 (Style.isCpp() && NextTok->is(tok::l_paren)) || 462 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 463 tok::r_paren, tok::r_square, tok::l_brace, 464 tok::ellipsis) || 465 (NextTok->is(tok::identifier) && 466 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 467 (NextTok->is(tok::semi) && 468 (!ExpectClassBody || LBraceStack.size() != 1)) || 469 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 470 if (NextTok->is(tok::l_square)) { 471 // We can have an array subscript after a braced init 472 // list, but C++11 attributes are expected after blocks. 473 NextTok = Tokens->getNextToken(); 474 ++ReadTokens; 475 ProbablyBracedList = NextTok->isNot(tok::l_square); 476 } 477 } 478 if (ProbablyBracedList) { 479 Tok->BlockKind = BK_BracedInit; 480 LBraceStack.back()->BlockKind = BK_BracedInit; 481 } else { 482 Tok->BlockKind = BK_Block; 483 LBraceStack.back()->BlockKind = BK_Block; 484 } 485 } 486 LBraceStack.pop_back(); 487 break; 488 case tok::identifier: 489 if (!Tok->is(TT_StatementMacro)) 490 break; 491 LLVM_FALLTHROUGH; 492 case tok::at: 493 case tok::semi: 494 case tok::kw_if: 495 case tok::kw_while: 496 case tok::kw_for: 497 case tok::kw_switch: 498 case tok::kw_try: 499 case tok::kw___try: 500 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 501 LBraceStack.back()->BlockKind = BK_Block; 502 break; 503 default: 504 break; 505 } 506 PrevTok = Tok; 507 Tok = NextTok; 508 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 509 510 // Assume other blocks for all unclosed opening braces. 511 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 512 if (LBraceStack[i]->BlockKind == BK_Unknown) 513 LBraceStack[i]->BlockKind = BK_Block; 514 } 515 516 FormatTok = Tokens->setPosition(StoredPosition); 517 } 518 519 template <class T> 520 static inline void hash_combine(std::size_t &seed, const T &v) { 521 std::hash<T> hasher; 522 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 523 } 524 525 size_t UnwrappedLineParser::computePPHash() const { 526 size_t h = 0; 527 for (const auto &i : PPStack) { 528 hash_combine(h, size_t(i.Kind)); 529 hash_combine(h, i.Line); 530 } 531 return h; 532 } 533 534 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 535 bool MunchSemi) { 536 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 537 "'{' or macro block token expected"); 538 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 539 FormatTok->BlockKind = BK_Block; 540 541 size_t PPStartHash = computePPHash(); 542 543 unsigned InitialLevel = Line->Level; 544 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 545 546 if (MacroBlock && FormatTok->is(tok::l_paren)) 547 parseParens(); 548 549 size_t NbPreprocessorDirectives = 550 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 551 addUnwrappedLine(); 552 size_t OpeningLineIndex = 553 CurrentLines->empty() 554 ? (UnwrappedLine::kInvalidIndex) 555 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 556 557 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 558 MustBeDeclaration); 559 if (AddLevel) 560 ++Line->Level; 561 parseLevel(/*HasOpeningBrace=*/true); 562 563 if (eof()) 564 return; 565 566 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 567 : !FormatTok->is(tok::r_brace)) { 568 Line->Level = InitialLevel; 569 FormatTok->BlockKind = BK_Block; 570 return; 571 } 572 573 size_t PPEndHash = computePPHash(); 574 575 // Munch the closing brace. 576 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 577 578 if (MacroBlock && FormatTok->is(tok::l_paren)) 579 parseParens(); 580 581 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 582 nextToken(); 583 Line->Level = InitialLevel; 584 585 if (PPStartHash == PPEndHash) { 586 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 587 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 588 // Update the opening line to add the forward reference as well 589 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 590 CurrentLines->size() - 1; 591 } 592 } 593 } 594 595 static bool isGoogScope(const UnwrappedLine &Line) { 596 // FIXME: Closure-library specific stuff should not be hard-coded but be 597 // configurable. 598 if (Line.Tokens.size() < 4) 599 return false; 600 auto I = Line.Tokens.begin(); 601 if (I->Tok->TokenText != "goog") 602 return false; 603 ++I; 604 if (I->Tok->isNot(tok::period)) 605 return false; 606 ++I; 607 if (I->Tok->TokenText != "scope") 608 return false; 609 ++I; 610 return I->Tok->is(tok::l_paren); 611 } 612 613 static bool isIIFE(const UnwrappedLine &Line, 614 const AdditionalKeywords &Keywords) { 615 // Look for the start of an immediately invoked anonymous function. 616 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 617 // This is commonly done in JavaScript to create a new, anonymous scope. 618 // Example: (function() { ... })() 619 if (Line.Tokens.size() < 3) 620 return false; 621 auto I = Line.Tokens.begin(); 622 if (I->Tok->isNot(tok::l_paren)) 623 return false; 624 ++I; 625 if (I->Tok->isNot(Keywords.kw_function)) 626 return false; 627 ++I; 628 return I->Tok->is(tok::l_paren); 629 } 630 631 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 632 const FormatToken &InitialToken) { 633 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 634 return Style.BraceWrapping.AfterNamespace; 635 if (InitialToken.is(tok::kw_class)) 636 return Style.BraceWrapping.AfterClass; 637 if (InitialToken.is(tok::kw_union)) 638 return Style.BraceWrapping.AfterUnion; 639 if (InitialToken.is(tok::kw_struct)) 640 return Style.BraceWrapping.AfterStruct; 641 return false; 642 } 643 644 void UnwrappedLineParser::parseChildBlock() { 645 FormatTok->BlockKind = BK_Block; 646 nextToken(); 647 { 648 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 649 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 650 ScopedLineState LineState(*this); 651 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 652 /*MustBeDeclaration=*/false); 653 Line->Level += SkipIndent ? 0 : 1; 654 parseLevel(/*HasOpeningBrace=*/true); 655 flushComments(isOnNewLine(*FormatTok)); 656 Line->Level -= SkipIndent ? 0 : 1; 657 } 658 nextToken(); 659 } 660 661 void UnwrappedLineParser::parsePPDirective() { 662 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 663 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 664 665 nextToken(); 666 667 if (!FormatTok->Tok.getIdentifierInfo()) { 668 parsePPUnknown(); 669 return; 670 } 671 672 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 673 case tok::pp_define: 674 parsePPDefine(); 675 return; 676 case tok::pp_if: 677 parsePPIf(/*IfDef=*/false); 678 break; 679 case tok::pp_ifdef: 680 case tok::pp_ifndef: 681 parsePPIf(/*IfDef=*/true); 682 break; 683 case tok::pp_else: 684 parsePPElse(); 685 break; 686 case tok::pp_elif: 687 parsePPElIf(); 688 break; 689 case tok::pp_endif: 690 parsePPEndIf(); 691 break; 692 default: 693 parsePPUnknown(); 694 break; 695 } 696 } 697 698 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 699 size_t Line = CurrentLines->size(); 700 if (CurrentLines == &PreprocessorDirectives) 701 Line += Lines.size(); 702 703 if (Unreachable || 704 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 705 PPStack.push_back({PP_Unreachable, Line}); 706 else 707 PPStack.push_back({PP_Conditional, Line}); 708 } 709 710 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 711 ++PPBranchLevel; 712 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 713 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 714 PPLevelBranchIndex.push_back(0); 715 PPLevelBranchCount.push_back(0); 716 } 717 PPChainBranchIndex.push(0); 718 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 719 conditionalCompilationCondition(Unreachable || Skip); 720 } 721 722 void UnwrappedLineParser::conditionalCompilationAlternative() { 723 if (!PPStack.empty()) 724 PPStack.pop_back(); 725 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 726 if (!PPChainBranchIndex.empty()) 727 ++PPChainBranchIndex.top(); 728 conditionalCompilationCondition( 729 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 730 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 731 } 732 733 void UnwrappedLineParser::conditionalCompilationEnd() { 734 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 735 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 736 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 737 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 738 } 739 } 740 // Guard against #endif's without #if. 741 if (PPBranchLevel > -1) 742 --PPBranchLevel; 743 if (!PPChainBranchIndex.empty()) 744 PPChainBranchIndex.pop(); 745 if (!PPStack.empty()) 746 PPStack.pop_back(); 747 } 748 749 void UnwrappedLineParser::parsePPIf(bool IfDef) { 750 bool IfNDef = FormatTok->is(tok::pp_ifndef); 751 nextToken(); 752 bool Unreachable = false; 753 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 754 Unreachable = true; 755 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 756 Unreachable = true; 757 conditionalCompilationStart(Unreachable); 758 FormatToken *IfCondition = FormatTok; 759 // If there's a #ifndef on the first line, and the only lines before it are 760 // comments, it could be an include guard. 761 bool MaybeIncludeGuard = IfNDef; 762 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 763 for (auto &Line : Lines) { 764 if (!Line.Tokens.front().Tok->is(tok::comment)) { 765 MaybeIncludeGuard = false; 766 IncludeGuard = IG_Rejected; 767 break; 768 } 769 } 770 --PPBranchLevel; 771 parsePPUnknown(); 772 ++PPBranchLevel; 773 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 774 IncludeGuard = IG_IfNdefed; 775 IncludeGuardToken = IfCondition; 776 } 777 } 778 779 void UnwrappedLineParser::parsePPElse() { 780 // If a potential include guard has an #else, it's not an include guard. 781 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 782 IncludeGuard = IG_Rejected; 783 conditionalCompilationAlternative(); 784 if (PPBranchLevel > -1) 785 --PPBranchLevel; 786 parsePPUnknown(); 787 ++PPBranchLevel; 788 } 789 790 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 791 792 void UnwrappedLineParser::parsePPEndIf() { 793 conditionalCompilationEnd(); 794 parsePPUnknown(); 795 // If the #endif of a potential include guard is the last thing in the file, 796 // then we found an include guard. 797 unsigned TokenPosition = Tokens->getPosition(); 798 FormatToken *PeekNext = AllTokens[TokenPosition]; 799 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 800 PeekNext->is(tok::eof) && 801 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 802 IncludeGuard = IG_Found; 803 } 804 805 void UnwrappedLineParser::parsePPDefine() { 806 nextToken(); 807 808 if (!FormatTok->Tok.getIdentifierInfo()) { 809 IncludeGuard = IG_Rejected; 810 IncludeGuardToken = nullptr; 811 parsePPUnknown(); 812 return; 813 } 814 815 if (IncludeGuard == IG_IfNdefed && 816 IncludeGuardToken->TokenText == FormatTok->TokenText) { 817 IncludeGuard = IG_Defined; 818 IncludeGuardToken = nullptr; 819 for (auto &Line : Lines) { 820 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 821 IncludeGuard = IG_Rejected; 822 break; 823 } 824 } 825 } 826 827 nextToken(); 828 if (FormatTok->Tok.getKind() == tok::l_paren && 829 FormatTok->WhitespaceRange.getBegin() == 830 FormatTok->WhitespaceRange.getEnd()) { 831 parseParens(); 832 } 833 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 834 Line->Level += PPBranchLevel + 1; 835 addUnwrappedLine(); 836 ++Line->Level; 837 838 // Errors during a preprocessor directive can only affect the layout of the 839 // preprocessor directive, and thus we ignore them. An alternative approach 840 // would be to use the same approach we use on the file level (no 841 // re-indentation if there was a structural error) within the macro 842 // definition. 843 parseFile(); 844 } 845 846 void UnwrappedLineParser::parsePPUnknown() { 847 do { 848 nextToken(); 849 } while (!eof()); 850 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 851 Line->Level += PPBranchLevel + 1; 852 addUnwrappedLine(); 853 } 854 855 // Here we blacklist certain tokens that are not usually the first token in an 856 // unwrapped line. This is used in attempt to distinguish macro calls without 857 // trailing semicolons from other constructs split to several lines. 858 static bool tokenCanStartNewLine(const clang::Token &Tok) { 859 // Semicolon can be a null-statement, l_square can be a start of a macro or 860 // a C++11 attribute, but this doesn't seem to be common. 861 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 862 Tok.isNot(tok::l_square) && 863 // Tokens that can only be used as binary operators and a part of 864 // overloaded operator names. 865 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 866 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 867 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 868 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 869 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 870 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 871 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 872 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 873 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 874 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 875 Tok.isNot(tok::lesslessequal) && 876 // Colon is used in labels, base class lists, initializer lists, 877 // range-based for loops, ternary operator, but should never be the 878 // first token in an unwrapped line. 879 Tok.isNot(tok::colon) && 880 // 'noexcept' is a trailing annotation. 881 Tok.isNot(tok::kw_noexcept); 882 } 883 884 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 885 const FormatToken *FormatTok) { 886 // FIXME: This returns true for C/C++ keywords like 'struct'. 887 return FormatTok->is(tok::identifier) && 888 (FormatTok->Tok.getIdentifierInfo() == nullptr || 889 !FormatTok->isOneOf( 890 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 891 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 892 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 893 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 894 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 895 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 896 Keywords.kw_from)); 897 } 898 899 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 900 const FormatToken *FormatTok) { 901 return FormatTok->Tok.isLiteral() || 902 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 903 mustBeJSIdent(Keywords, FormatTok); 904 } 905 906 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 907 // when encountered after a value (see mustBeJSIdentOrValue). 908 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 909 const FormatToken *FormatTok) { 910 return FormatTok->isOneOf( 911 tok::kw_return, Keywords.kw_yield, 912 // conditionals 913 tok::kw_if, tok::kw_else, 914 // loops 915 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 916 // switch/case 917 tok::kw_switch, tok::kw_case, 918 // exceptions 919 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 920 // declaration 921 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 922 Keywords.kw_async, Keywords.kw_function, 923 // import/export 924 Keywords.kw_import, tok::kw_export); 925 } 926 927 // readTokenWithJavaScriptASI reads the next token and terminates the current 928 // line if JavaScript Automatic Semicolon Insertion must 929 // happen between the current token and the next token. 930 // 931 // This method is conservative - it cannot cover all edge cases of JavaScript, 932 // but only aims to correctly handle certain well known cases. It *must not* 933 // return true in speculative cases. 934 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 935 FormatToken *Previous = FormatTok; 936 readToken(); 937 FormatToken *Next = FormatTok; 938 939 bool IsOnSameLine = 940 CommentsBeforeNextToken.empty() 941 ? Next->NewlinesBefore == 0 942 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 943 if (IsOnSameLine) 944 return; 945 946 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 947 bool PreviousStartsTemplateExpr = 948 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 949 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 950 // If the line contains an '@' sign, the previous token might be an 951 // annotation, which can precede another identifier/value. 952 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 953 [](UnwrappedLineNode &LineNode) { 954 return LineNode.Tok->is(tok::at); 955 }) != Line->Tokens.end(); 956 if (HasAt) 957 return; 958 } 959 if (Next->is(tok::exclaim) && PreviousMustBeValue) 960 return addUnwrappedLine(); 961 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 962 bool NextEndsTemplateExpr = 963 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 964 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 965 (PreviousMustBeValue || 966 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 967 tok::minusminus))) 968 return addUnwrappedLine(); 969 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 970 isJSDeclOrStmt(Keywords, Next)) 971 return addUnwrappedLine(); 972 } 973 974 void UnwrappedLineParser::parseStructuralElement() { 975 assert(!FormatTok->is(tok::l_brace)); 976 if (Style.Language == FormatStyle::LK_TableGen && 977 FormatTok->is(tok::pp_include)) { 978 nextToken(); 979 if (FormatTok->is(tok::string_literal)) 980 nextToken(); 981 addUnwrappedLine(); 982 return; 983 } 984 switch (FormatTok->Tok.getKind()) { 985 case tok::kw_asm: 986 nextToken(); 987 if (FormatTok->is(tok::l_brace)) { 988 FormatTok->Type = TT_InlineASMBrace; 989 nextToken(); 990 while (FormatTok && FormatTok->isNot(tok::eof)) { 991 if (FormatTok->is(tok::r_brace)) { 992 FormatTok->Type = TT_InlineASMBrace; 993 nextToken(); 994 addUnwrappedLine(); 995 break; 996 } 997 FormatTok->Finalized = true; 998 nextToken(); 999 } 1000 } 1001 break; 1002 case tok::kw_namespace: 1003 parseNamespace(); 1004 return; 1005 case tok::kw_public: 1006 case tok::kw_protected: 1007 case tok::kw_private: 1008 if (Style.Language == FormatStyle::LK_Java || 1009 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) 1010 nextToken(); 1011 else 1012 parseAccessSpecifier(); 1013 return; 1014 case tok::kw_if: 1015 parseIfThenElse(); 1016 return; 1017 case tok::kw_for: 1018 case tok::kw_while: 1019 parseForOrWhileLoop(); 1020 return; 1021 case tok::kw_do: 1022 parseDoWhile(); 1023 return; 1024 case tok::kw_switch: 1025 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1026 // 'switch: string' field declaration. 1027 break; 1028 parseSwitch(); 1029 return; 1030 case tok::kw_default: 1031 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1032 // 'default: string' field declaration. 1033 break; 1034 nextToken(); 1035 if (FormatTok->is(tok::colon)) { 1036 parseLabel(); 1037 return; 1038 } 1039 // e.g. "default void f() {}" in a Java interface. 1040 break; 1041 case tok::kw_case: 1042 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1043 // 'case: string' field declaration. 1044 break; 1045 parseCaseLabel(); 1046 return; 1047 case tok::kw_try: 1048 case tok::kw___try: 1049 parseTryCatch(); 1050 return; 1051 case tok::kw_extern: 1052 nextToken(); 1053 if (FormatTok->Tok.is(tok::string_literal)) { 1054 nextToken(); 1055 if (FormatTok->Tok.is(tok::l_brace)) { 1056 if (Style.BraceWrapping.AfterExternBlock) { 1057 addUnwrappedLine(); 1058 parseBlock(/*MustBeDeclaration=*/true); 1059 } else { 1060 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1061 } 1062 addUnwrappedLine(); 1063 return; 1064 } 1065 } 1066 break; 1067 case tok::kw_export: 1068 if (Style.Language == FormatStyle::LK_JavaScript) { 1069 parseJavaScriptEs6ImportExport(); 1070 return; 1071 } 1072 if (!Style.isCpp()) 1073 break; 1074 // Handle C++ "(inline|export) namespace". 1075 LLVM_FALLTHROUGH; 1076 case tok::kw_inline: 1077 nextToken(); 1078 if (FormatTok->Tok.is(tok::kw_namespace)) { 1079 parseNamespace(); 1080 return; 1081 } 1082 break; 1083 case tok::identifier: 1084 if (FormatTok->is(TT_ForEachMacro)) { 1085 parseForOrWhileLoop(); 1086 return; 1087 } 1088 if (FormatTok->is(TT_MacroBlockBegin)) { 1089 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1090 /*MunchSemi=*/false); 1091 return; 1092 } 1093 if (FormatTok->is(Keywords.kw_import)) { 1094 if (Style.Language == FormatStyle::LK_JavaScript) { 1095 parseJavaScriptEs6ImportExport(); 1096 return; 1097 } 1098 if (Style.Language == FormatStyle::LK_Proto) { 1099 nextToken(); 1100 if (FormatTok->is(tok::kw_public)) 1101 nextToken(); 1102 if (!FormatTok->is(tok::string_literal)) 1103 return; 1104 nextToken(); 1105 if (FormatTok->is(tok::semi)) 1106 nextToken(); 1107 addUnwrappedLine(); 1108 return; 1109 } 1110 } 1111 if (Style.isCpp() && 1112 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1113 Keywords.kw_slots, Keywords.kw_qslots)) { 1114 nextToken(); 1115 if (FormatTok->is(tok::colon)) { 1116 nextToken(); 1117 addUnwrappedLine(); 1118 return; 1119 } 1120 } 1121 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1122 parseStatementMacro(); 1123 return; 1124 } 1125 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1126 parseNamespace(); 1127 return; 1128 } 1129 // In all other cases, parse the declaration. 1130 break; 1131 default: 1132 break; 1133 } 1134 do { 1135 const FormatToken *Previous = FormatTok->Previous; 1136 switch (FormatTok->Tok.getKind()) { 1137 case tok::at: 1138 nextToken(); 1139 if (FormatTok->Tok.is(tok::l_brace)) { 1140 nextToken(); 1141 parseBracedList(); 1142 break; 1143 } else if (Style.Language == FormatStyle::LK_Java && 1144 FormatTok->is(Keywords.kw_interface)) { 1145 nextToken(); 1146 break; 1147 } 1148 switch (FormatTok->Tok.getObjCKeywordID()) { 1149 case tok::objc_public: 1150 case tok::objc_protected: 1151 case tok::objc_package: 1152 case tok::objc_private: 1153 return parseAccessSpecifier(); 1154 case tok::objc_interface: 1155 case tok::objc_implementation: 1156 return parseObjCInterfaceOrImplementation(); 1157 case tok::objc_protocol: 1158 if (parseObjCProtocol()) 1159 return; 1160 break; 1161 case tok::objc_end: 1162 return; // Handled by the caller. 1163 case tok::objc_optional: 1164 case tok::objc_required: 1165 nextToken(); 1166 addUnwrappedLine(); 1167 return; 1168 case tok::objc_autoreleasepool: 1169 nextToken(); 1170 if (FormatTok->Tok.is(tok::l_brace)) { 1171 if (Style.BraceWrapping.AfterControlStatement) 1172 addUnwrappedLine(); 1173 parseBlock(/*MustBeDeclaration=*/false); 1174 } 1175 addUnwrappedLine(); 1176 return; 1177 case tok::objc_synchronized: 1178 nextToken(); 1179 if (FormatTok->Tok.is(tok::l_paren)) 1180 // Skip synchronization object 1181 parseParens(); 1182 if (FormatTok->Tok.is(tok::l_brace)) { 1183 if (Style.BraceWrapping.AfterControlStatement) 1184 addUnwrappedLine(); 1185 parseBlock(/*MustBeDeclaration=*/false); 1186 } 1187 addUnwrappedLine(); 1188 return; 1189 case tok::objc_try: 1190 // This branch isn't strictly necessary (the kw_try case below would 1191 // do this too after the tok::at is parsed above). But be explicit. 1192 parseTryCatch(); 1193 return; 1194 default: 1195 break; 1196 } 1197 break; 1198 case tok::kw_enum: 1199 // Ignore if this is part of "template <enum ...". 1200 if (Previous && Previous->is(tok::less)) { 1201 nextToken(); 1202 break; 1203 } 1204 1205 // parseEnum falls through and does not yet add an unwrapped line as an 1206 // enum definition can start a structural element. 1207 if (!parseEnum()) 1208 break; 1209 // This only applies for C++. 1210 if (!Style.isCpp()) { 1211 addUnwrappedLine(); 1212 return; 1213 } 1214 break; 1215 case tok::kw_typedef: 1216 nextToken(); 1217 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1218 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1219 parseEnum(); 1220 break; 1221 case tok::kw_struct: 1222 case tok::kw_union: 1223 case tok::kw_class: 1224 // parseRecord falls through and does not yet add an unwrapped line as a 1225 // record declaration or definition can start a structural element. 1226 parseRecord(); 1227 // This does not apply for Java, JavaScript and C#. 1228 if (Style.Language == FormatStyle::LK_Java || 1229 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { 1230 if (FormatTok->is(tok::semi)) 1231 nextToken(); 1232 addUnwrappedLine(); 1233 return; 1234 } 1235 break; 1236 case tok::period: 1237 nextToken(); 1238 // In Java, classes have an implicit static member "class". 1239 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1240 FormatTok->is(tok::kw_class)) 1241 nextToken(); 1242 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1243 FormatTok->Tok.getIdentifierInfo()) 1244 // JavaScript only has pseudo keywords, all keywords are allowed to 1245 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1246 nextToken(); 1247 break; 1248 case tok::semi: 1249 nextToken(); 1250 addUnwrappedLine(); 1251 return; 1252 case tok::r_brace: 1253 addUnwrappedLine(); 1254 return; 1255 case tok::l_paren: 1256 parseParens(); 1257 break; 1258 case tok::kw_operator: 1259 nextToken(); 1260 if (FormatTok->isBinaryOperator()) 1261 nextToken(); 1262 break; 1263 case tok::caret: 1264 nextToken(); 1265 if (FormatTok->Tok.isAnyIdentifier() || 1266 FormatTok->isSimpleTypeSpecifier()) 1267 nextToken(); 1268 if (FormatTok->is(tok::l_paren)) 1269 parseParens(); 1270 if (FormatTok->is(tok::l_brace)) 1271 parseChildBlock(); 1272 break; 1273 case tok::l_brace: 1274 if (!tryToParseBracedList()) { 1275 // A block outside of parentheses must be the last part of a 1276 // structural element. 1277 // FIXME: Figure out cases where this is not true, and add projections 1278 // for them (the one we know is missing are lambdas). 1279 if (Style.BraceWrapping.AfterFunction) 1280 addUnwrappedLine(); 1281 FormatTok->Type = TT_FunctionLBrace; 1282 parseBlock(/*MustBeDeclaration=*/false); 1283 addUnwrappedLine(); 1284 return; 1285 } 1286 // Otherwise this was a braced init list, and the structural 1287 // element continues. 1288 break; 1289 case tok::kw_try: 1290 // We arrive here when parsing function-try blocks. 1291 if (Style.BraceWrapping.AfterFunction) 1292 addUnwrappedLine(); 1293 parseTryCatch(); 1294 return; 1295 case tok::identifier: { 1296 if (FormatTok->is(TT_MacroBlockEnd)) { 1297 addUnwrappedLine(); 1298 return; 1299 } 1300 1301 // Function declarations (as opposed to function expressions) are parsed 1302 // on their own unwrapped line by continuing this loop. Function 1303 // expressions (functions that are not on their own line) must not create 1304 // a new unwrapped line, so they are special cased below. 1305 size_t TokenCount = Line->Tokens.size(); 1306 if (Style.Language == FormatStyle::LK_JavaScript && 1307 FormatTok->is(Keywords.kw_function) && 1308 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1309 Keywords.kw_async)))) { 1310 tryToParseJSFunction(); 1311 break; 1312 } 1313 if ((Style.Language == FormatStyle::LK_JavaScript || 1314 Style.Language == FormatStyle::LK_Java) && 1315 FormatTok->is(Keywords.kw_interface)) { 1316 if (Style.Language == FormatStyle::LK_JavaScript) { 1317 // In JavaScript/TypeScript, "interface" can be used as a standalone 1318 // identifier, e.g. in `var interface = 1;`. If "interface" is 1319 // followed by another identifier, it is very like to be an actual 1320 // interface declaration. 1321 unsigned StoredPosition = Tokens->getPosition(); 1322 FormatToken *Next = Tokens->getNextToken(); 1323 FormatTok = Tokens->setPosition(StoredPosition); 1324 if (Next && !mustBeJSIdent(Keywords, Next)) { 1325 nextToken(); 1326 break; 1327 } 1328 } 1329 parseRecord(); 1330 addUnwrappedLine(); 1331 return; 1332 } 1333 1334 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1335 parseStatementMacro(); 1336 return; 1337 } 1338 1339 // See if the following token should start a new unwrapped line. 1340 StringRef Text = FormatTok->TokenText; 1341 nextToken(); 1342 1343 // JS doesn't have macros, and within classes colons indicate fields, not 1344 // labels. 1345 if (Style.Language == FormatStyle::LK_JavaScript) 1346 break; 1347 1348 TokenCount = Line->Tokens.size(); 1349 if (TokenCount == 1 || 1350 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1351 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1352 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1353 parseLabel(); 1354 return; 1355 } 1356 // Recognize function-like macro usages without trailing semicolon as 1357 // well as free-standing macros like Q_OBJECT. 1358 bool FunctionLike = FormatTok->is(tok::l_paren); 1359 if (FunctionLike) 1360 parseParens(); 1361 1362 bool FollowedByNewline = 1363 CommentsBeforeNextToken.empty() 1364 ? FormatTok->NewlinesBefore > 0 1365 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1366 1367 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1368 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1369 addUnwrappedLine(); 1370 return; 1371 } 1372 } 1373 break; 1374 } 1375 case tok::equal: 1376 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1377 // TT_JsFatArrow. The always start an expression or a child block if 1378 // followed by a curly. 1379 if (FormatTok->is(TT_JsFatArrow)) { 1380 nextToken(); 1381 if (FormatTok->is(tok::l_brace)) 1382 parseChildBlock(); 1383 break; 1384 } 1385 1386 nextToken(); 1387 if (FormatTok->Tok.is(tok::l_brace)) { 1388 nextToken(); 1389 parseBracedList(); 1390 } else if (Style.Language == FormatStyle::LK_Proto && 1391 FormatTok->Tok.is(tok::less)) { 1392 nextToken(); 1393 parseBracedList(/*ContinueOnSemicolons=*/false, 1394 /*ClosingBraceKind=*/tok::greater); 1395 } 1396 break; 1397 case tok::l_square: 1398 parseSquare(); 1399 break; 1400 case tok::kw_new: 1401 parseNew(); 1402 break; 1403 default: 1404 nextToken(); 1405 break; 1406 } 1407 } while (!eof()); 1408 } 1409 1410 bool UnwrappedLineParser::tryToParseLambda() { 1411 if (!Style.isCpp()) { 1412 nextToken(); 1413 return false; 1414 } 1415 assert(FormatTok->is(tok::l_square)); 1416 FormatToken &LSquare = *FormatTok; 1417 if (!tryToParseLambdaIntroducer()) 1418 return false; 1419 1420 bool SeenArrow = false; 1421 1422 while (FormatTok->isNot(tok::l_brace)) { 1423 if (FormatTok->isSimpleTypeSpecifier()) { 1424 nextToken(); 1425 continue; 1426 } 1427 switch (FormatTok->Tok.getKind()) { 1428 case tok::l_brace: 1429 break; 1430 case tok::l_paren: 1431 parseParens(); 1432 break; 1433 case tok::amp: 1434 case tok::star: 1435 case tok::kw_const: 1436 case tok::comma: 1437 case tok::less: 1438 case tok::greater: 1439 case tok::identifier: 1440 case tok::numeric_constant: 1441 case tok::coloncolon: 1442 case tok::kw_mutable: 1443 case tok::kw_noexcept: 1444 nextToken(); 1445 break; 1446 // Specialization of a template with an integer parameter can contain 1447 // arithmetic, logical, comparison and ternary operators. 1448 // 1449 // FIXME: This also accepts sequences of operators that are not in the scope 1450 // of a template argument list. 1451 // 1452 // In a C++ lambda a template type can only occur after an arrow. We use 1453 // this as an heuristic to distinguish between Objective-C expressions 1454 // followed by an `a->b` expression, such as: 1455 // ([obj func:arg] + a->b) 1456 // Otherwise the code below would parse as a lambda. 1457 case tok::plus: 1458 case tok::minus: 1459 case tok::exclaim: 1460 case tok::tilde: 1461 case tok::slash: 1462 case tok::percent: 1463 case tok::lessless: 1464 case tok::pipe: 1465 case tok::pipepipe: 1466 case tok::ampamp: 1467 case tok::caret: 1468 case tok::equalequal: 1469 case tok::exclaimequal: 1470 case tok::greaterequal: 1471 case tok::lessequal: 1472 case tok::question: 1473 case tok::colon: 1474 case tok::kw_true: 1475 case tok::kw_false: 1476 if (SeenArrow) { 1477 nextToken(); 1478 break; 1479 } 1480 return true; 1481 case tok::arrow: 1482 // This might or might not actually be a lambda arrow (this could be an 1483 // ObjC method invocation followed by a dereferencing arrow). We might 1484 // reset this back to TT_Unknown in TokenAnnotator. 1485 FormatTok->Type = TT_LambdaArrow; 1486 SeenArrow = true; 1487 nextToken(); 1488 break; 1489 default: 1490 return true; 1491 } 1492 } 1493 FormatTok->Type = TT_LambdaLBrace; 1494 LSquare.Type = TT_LambdaLSquare; 1495 parseChildBlock(); 1496 return true; 1497 } 1498 1499 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1500 const FormatToken *Previous = FormatTok->Previous; 1501 if (Previous && 1502 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1503 tok::kw_delete, tok::l_square) || 1504 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1505 Previous->isSimpleTypeSpecifier())) { 1506 nextToken(); 1507 return false; 1508 } 1509 nextToken(); 1510 if (FormatTok->is(tok::l_square)) { 1511 return false; 1512 } 1513 parseSquare(/*LambdaIntroducer=*/true); 1514 return true; 1515 } 1516 1517 void UnwrappedLineParser::tryToParseJSFunction() { 1518 assert(FormatTok->is(Keywords.kw_function) || 1519 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1520 if (FormatTok->is(Keywords.kw_async)) 1521 nextToken(); 1522 // Consume "function". 1523 nextToken(); 1524 1525 // Consume * (generator function). Treat it like C++'s overloaded operators. 1526 if (FormatTok->is(tok::star)) { 1527 FormatTok->Type = TT_OverloadedOperator; 1528 nextToken(); 1529 } 1530 1531 // Consume function name. 1532 if (FormatTok->is(tok::identifier)) 1533 nextToken(); 1534 1535 if (FormatTok->isNot(tok::l_paren)) 1536 return; 1537 1538 // Parse formal parameter list. 1539 parseParens(); 1540 1541 if (FormatTok->is(tok::colon)) { 1542 // Parse a type definition. 1543 nextToken(); 1544 1545 // Eat the type declaration. For braced inline object types, balance braces, 1546 // otherwise just parse until finding an l_brace for the function body. 1547 if (FormatTok->is(tok::l_brace)) 1548 tryToParseBracedList(); 1549 else 1550 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1551 nextToken(); 1552 } 1553 1554 if (FormatTok->is(tok::semi)) 1555 return; 1556 1557 parseChildBlock(); 1558 } 1559 1560 bool UnwrappedLineParser::tryToParseBracedList() { 1561 if (FormatTok->BlockKind == BK_Unknown) 1562 calculateBraceTypes(); 1563 assert(FormatTok->BlockKind != BK_Unknown); 1564 if (FormatTok->BlockKind == BK_Block) 1565 return false; 1566 nextToken(); 1567 parseBracedList(); 1568 return true; 1569 } 1570 1571 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1572 tok::TokenKind ClosingBraceKind) { 1573 bool HasError = false; 1574 1575 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1576 // replace this by using parseAssigmentExpression() inside. 1577 do { 1578 if (Style.Language == FormatStyle::LK_JavaScript) { 1579 if (FormatTok->is(Keywords.kw_function) || 1580 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1581 tryToParseJSFunction(); 1582 continue; 1583 } 1584 if (FormatTok->is(TT_JsFatArrow)) { 1585 nextToken(); 1586 // Fat arrows can be followed by simple expressions or by child blocks 1587 // in curly braces. 1588 if (FormatTok->is(tok::l_brace)) { 1589 parseChildBlock(); 1590 continue; 1591 } 1592 } 1593 if (FormatTok->is(tok::l_brace)) { 1594 // Could be a method inside of a braced list `{a() { return 1; }}`. 1595 if (tryToParseBracedList()) 1596 continue; 1597 parseChildBlock(); 1598 } 1599 } 1600 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1601 nextToken(); 1602 return !HasError; 1603 } 1604 switch (FormatTok->Tok.getKind()) { 1605 case tok::caret: 1606 nextToken(); 1607 if (FormatTok->is(tok::l_brace)) { 1608 parseChildBlock(); 1609 } 1610 break; 1611 case tok::l_square: 1612 tryToParseLambda(); 1613 break; 1614 case tok::l_paren: 1615 parseParens(); 1616 // JavaScript can just have free standing methods and getters/setters in 1617 // object literals. Detect them by a "{" following ")". 1618 if (Style.Language == FormatStyle::LK_JavaScript) { 1619 if (FormatTok->is(tok::l_brace)) 1620 parseChildBlock(); 1621 break; 1622 } 1623 break; 1624 case tok::l_brace: 1625 // Assume there are no blocks inside a braced init list apart 1626 // from the ones we explicitly parse out (like lambdas). 1627 FormatTok->BlockKind = BK_BracedInit; 1628 nextToken(); 1629 parseBracedList(); 1630 break; 1631 case tok::less: 1632 if (Style.Language == FormatStyle::LK_Proto) { 1633 nextToken(); 1634 parseBracedList(/*ContinueOnSemicolons=*/false, 1635 /*ClosingBraceKind=*/tok::greater); 1636 } else { 1637 nextToken(); 1638 } 1639 break; 1640 case tok::semi: 1641 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1642 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1643 // used for error recovery if we have otherwise determined that this is 1644 // a braced list. 1645 if (Style.Language == FormatStyle::LK_JavaScript) { 1646 nextToken(); 1647 break; 1648 } 1649 HasError = true; 1650 if (!ContinueOnSemicolons) 1651 return !HasError; 1652 nextToken(); 1653 break; 1654 case tok::comma: 1655 nextToken(); 1656 break; 1657 default: 1658 nextToken(); 1659 break; 1660 } 1661 } while (!eof()); 1662 return false; 1663 } 1664 1665 void UnwrappedLineParser::parseParens() { 1666 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1667 nextToken(); 1668 do { 1669 switch (FormatTok->Tok.getKind()) { 1670 case tok::l_paren: 1671 parseParens(); 1672 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1673 parseChildBlock(); 1674 break; 1675 case tok::r_paren: 1676 nextToken(); 1677 return; 1678 case tok::r_brace: 1679 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1680 return; 1681 case tok::l_square: 1682 tryToParseLambda(); 1683 break; 1684 case tok::l_brace: 1685 if (!tryToParseBracedList()) 1686 parseChildBlock(); 1687 break; 1688 case tok::at: 1689 nextToken(); 1690 if (FormatTok->Tok.is(tok::l_brace)) { 1691 nextToken(); 1692 parseBracedList(); 1693 } 1694 break; 1695 case tok::kw_class: 1696 if (Style.Language == FormatStyle::LK_JavaScript) 1697 parseRecord(/*ParseAsExpr=*/true); 1698 else 1699 nextToken(); 1700 break; 1701 case tok::identifier: 1702 if (Style.Language == FormatStyle::LK_JavaScript && 1703 (FormatTok->is(Keywords.kw_function) || 1704 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1705 tryToParseJSFunction(); 1706 else 1707 nextToken(); 1708 break; 1709 default: 1710 nextToken(); 1711 break; 1712 } 1713 } while (!eof()); 1714 } 1715 1716 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1717 if (!LambdaIntroducer) { 1718 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1719 if (tryToParseLambda()) 1720 return; 1721 } 1722 do { 1723 switch (FormatTok->Tok.getKind()) { 1724 case tok::l_paren: 1725 parseParens(); 1726 break; 1727 case tok::r_square: 1728 nextToken(); 1729 return; 1730 case tok::r_brace: 1731 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1732 return; 1733 case tok::l_square: 1734 parseSquare(); 1735 break; 1736 case tok::l_brace: { 1737 if (!tryToParseBracedList()) 1738 parseChildBlock(); 1739 break; 1740 } 1741 case tok::at: 1742 nextToken(); 1743 if (FormatTok->Tok.is(tok::l_brace)) { 1744 nextToken(); 1745 parseBracedList(); 1746 } 1747 break; 1748 default: 1749 nextToken(); 1750 break; 1751 } 1752 } while (!eof()); 1753 } 1754 1755 void UnwrappedLineParser::parseIfThenElse() { 1756 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1757 nextToken(); 1758 if (FormatTok->Tok.is(tok::kw_constexpr)) 1759 nextToken(); 1760 if (FormatTok->Tok.is(tok::l_paren)) 1761 parseParens(); 1762 bool NeedsUnwrappedLine = false; 1763 if (FormatTok->Tok.is(tok::l_brace)) { 1764 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1765 parseBlock(/*MustBeDeclaration=*/false); 1766 if (Style.BraceWrapping.BeforeElse) 1767 addUnwrappedLine(); 1768 else 1769 NeedsUnwrappedLine = true; 1770 } else { 1771 addUnwrappedLine(); 1772 ++Line->Level; 1773 parseStructuralElement(); 1774 --Line->Level; 1775 } 1776 if (FormatTok->Tok.is(tok::kw_else)) { 1777 nextToken(); 1778 if (FormatTok->Tok.is(tok::l_brace)) { 1779 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1780 parseBlock(/*MustBeDeclaration=*/false); 1781 addUnwrappedLine(); 1782 } else if (FormatTok->Tok.is(tok::kw_if)) { 1783 parseIfThenElse(); 1784 } else { 1785 addUnwrappedLine(); 1786 ++Line->Level; 1787 parseStructuralElement(); 1788 if (FormatTok->is(tok::eof)) 1789 addUnwrappedLine(); 1790 --Line->Level; 1791 } 1792 } else if (NeedsUnwrappedLine) { 1793 addUnwrappedLine(); 1794 } 1795 } 1796 1797 void UnwrappedLineParser::parseTryCatch() { 1798 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1799 nextToken(); 1800 bool NeedsUnwrappedLine = false; 1801 if (FormatTok->is(tok::colon)) { 1802 // We are in a function try block, what comes is an initializer list. 1803 nextToken(); 1804 while (FormatTok->is(tok::identifier)) { 1805 nextToken(); 1806 if (FormatTok->is(tok::l_paren)) 1807 parseParens(); 1808 if (FormatTok->is(tok::comma)) 1809 nextToken(); 1810 } 1811 } 1812 // Parse try with resource. 1813 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1814 parseParens(); 1815 } 1816 if (FormatTok->is(tok::l_brace)) { 1817 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1818 parseBlock(/*MustBeDeclaration=*/false); 1819 if (Style.BraceWrapping.BeforeCatch) { 1820 addUnwrappedLine(); 1821 } else { 1822 NeedsUnwrappedLine = true; 1823 } 1824 } else if (!FormatTok->is(tok::kw_catch)) { 1825 // The C++ standard requires a compound-statement after a try. 1826 // If there's none, we try to assume there's a structuralElement 1827 // and try to continue. 1828 addUnwrappedLine(); 1829 ++Line->Level; 1830 parseStructuralElement(); 1831 --Line->Level; 1832 } 1833 while (1) { 1834 if (FormatTok->is(tok::at)) 1835 nextToken(); 1836 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1837 tok::kw___finally) || 1838 ((Style.Language == FormatStyle::LK_Java || 1839 Style.Language == FormatStyle::LK_JavaScript) && 1840 FormatTok->is(Keywords.kw_finally)) || 1841 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1842 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1843 break; 1844 nextToken(); 1845 while (FormatTok->isNot(tok::l_brace)) { 1846 if (FormatTok->is(tok::l_paren)) { 1847 parseParens(); 1848 continue; 1849 } 1850 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1851 return; 1852 nextToken(); 1853 } 1854 NeedsUnwrappedLine = false; 1855 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1856 parseBlock(/*MustBeDeclaration=*/false); 1857 if (Style.BraceWrapping.BeforeCatch) 1858 addUnwrappedLine(); 1859 else 1860 NeedsUnwrappedLine = true; 1861 } 1862 if (NeedsUnwrappedLine) 1863 addUnwrappedLine(); 1864 } 1865 1866 void UnwrappedLineParser::parseNamespace() { 1867 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 1868 "'namespace' expected"); 1869 1870 const FormatToken &InitialToken = *FormatTok; 1871 nextToken(); 1872 if (InitialToken.is(TT_NamespaceMacro)) { 1873 parseParens(); 1874 } else { 1875 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1876 nextToken(); 1877 } 1878 if (FormatTok->Tok.is(tok::l_brace)) { 1879 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1880 addUnwrappedLine(); 1881 1882 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1883 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1884 DeclarationScopeStack.size() > 1); 1885 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1886 // Munch the semicolon after a namespace. This is more common than one would 1887 // think. Puttin the semicolon into its own line is very ugly. 1888 if (FormatTok->Tok.is(tok::semi)) 1889 nextToken(); 1890 addUnwrappedLine(); 1891 } 1892 // FIXME: Add error handling. 1893 } 1894 1895 void UnwrappedLineParser::parseNew() { 1896 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1897 nextToken(); 1898 if (Style.Language != FormatStyle::LK_Java) 1899 return; 1900 1901 // In Java, we can parse everything up to the parens, which aren't optional. 1902 do { 1903 // There should not be a ;, { or } before the new's open paren. 1904 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1905 return; 1906 1907 // Consume the parens. 1908 if (FormatTok->is(tok::l_paren)) { 1909 parseParens(); 1910 1911 // If there is a class body of an anonymous class, consume that as child. 1912 if (FormatTok->is(tok::l_brace)) 1913 parseChildBlock(); 1914 return; 1915 } 1916 nextToken(); 1917 } while (!eof()); 1918 } 1919 1920 void UnwrappedLineParser::parseForOrWhileLoop() { 1921 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1922 "'for', 'while' or foreach macro expected"); 1923 nextToken(); 1924 // JS' for await ( ... 1925 if (Style.Language == FormatStyle::LK_JavaScript && 1926 FormatTok->is(Keywords.kw_await)) 1927 nextToken(); 1928 if (FormatTok->Tok.is(tok::l_paren)) 1929 parseParens(); 1930 if (FormatTok->Tok.is(tok::l_brace)) { 1931 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1932 parseBlock(/*MustBeDeclaration=*/false); 1933 addUnwrappedLine(); 1934 } else { 1935 addUnwrappedLine(); 1936 ++Line->Level; 1937 parseStructuralElement(); 1938 --Line->Level; 1939 } 1940 } 1941 1942 void UnwrappedLineParser::parseDoWhile() { 1943 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1944 nextToken(); 1945 if (FormatTok->Tok.is(tok::l_brace)) { 1946 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1947 parseBlock(/*MustBeDeclaration=*/false); 1948 if (Style.BraceWrapping.IndentBraces) 1949 addUnwrappedLine(); 1950 } else { 1951 addUnwrappedLine(); 1952 ++Line->Level; 1953 parseStructuralElement(); 1954 --Line->Level; 1955 } 1956 1957 // FIXME: Add error handling. 1958 if (!FormatTok->Tok.is(tok::kw_while)) { 1959 addUnwrappedLine(); 1960 return; 1961 } 1962 1963 nextToken(); 1964 parseStructuralElement(); 1965 } 1966 1967 void UnwrappedLineParser::parseLabel() { 1968 nextToken(); 1969 unsigned OldLineLevel = Line->Level; 1970 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1971 --Line->Level; 1972 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1973 CompoundStatementIndenter Indenter(this, Line->Level, 1974 Style.BraceWrapping.AfterCaseLabel, 1975 Style.BraceWrapping.IndentBraces); 1976 parseBlock(/*MustBeDeclaration=*/false); 1977 if (FormatTok->Tok.is(tok::kw_break)) { 1978 if (Style.BraceWrapping.AfterControlStatement) 1979 addUnwrappedLine(); 1980 parseStructuralElement(); 1981 } 1982 addUnwrappedLine(); 1983 } else { 1984 if (FormatTok->is(tok::semi)) 1985 nextToken(); 1986 addUnwrappedLine(); 1987 } 1988 Line->Level = OldLineLevel; 1989 if (FormatTok->isNot(tok::l_brace)) { 1990 parseStructuralElement(); 1991 addUnwrappedLine(); 1992 } 1993 } 1994 1995 void UnwrappedLineParser::parseCaseLabel() { 1996 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1997 // FIXME: fix handling of complex expressions here. 1998 do { 1999 nextToken(); 2000 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2001 parseLabel(); 2002 } 2003 2004 void UnwrappedLineParser::parseSwitch() { 2005 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2006 nextToken(); 2007 if (FormatTok->Tok.is(tok::l_paren)) 2008 parseParens(); 2009 if (FormatTok->Tok.is(tok::l_brace)) { 2010 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2011 parseBlock(/*MustBeDeclaration=*/false); 2012 addUnwrappedLine(); 2013 } else { 2014 addUnwrappedLine(); 2015 ++Line->Level; 2016 parseStructuralElement(); 2017 --Line->Level; 2018 } 2019 } 2020 2021 void UnwrappedLineParser::parseAccessSpecifier() { 2022 nextToken(); 2023 // Understand Qt's slots. 2024 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2025 nextToken(); 2026 // Otherwise, we don't know what it is, and we'd better keep the next token. 2027 if (FormatTok->Tok.is(tok::colon)) 2028 nextToken(); 2029 addUnwrappedLine(); 2030 } 2031 2032 bool UnwrappedLineParser::parseEnum() { 2033 // Won't be 'enum' for NS_ENUMs. 2034 if (FormatTok->Tok.is(tok::kw_enum)) 2035 nextToken(); 2036 2037 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2038 // declarations. An "enum" keyword followed by a colon would be a syntax 2039 // error and thus assume it is just an identifier. 2040 if (Style.Language == FormatStyle::LK_JavaScript && 2041 FormatTok->isOneOf(tok::colon, tok::question)) 2042 return false; 2043 2044 // In protobuf, "enum" can be used as a field name. 2045 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2046 return false; 2047 2048 // Eat up enum class ... 2049 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2050 nextToken(); 2051 2052 while (FormatTok->Tok.getIdentifierInfo() || 2053 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2054 tok::greater, tok::comma, tok::question)) { 2055 nextToken(); 2056 // We can have macros or attributes in between 'enum' and the enum name. 2057 if (FormatTok->is(tok::l_paren)) 2058 parseParens(); 2059 if (FormatTok->is(tok::identifier)) { 2060 nextToken(); 2061 // If there are two identifiers in a row, this is likely an elaborate 2062 // return type. In Java, this can be "implements", etc. 2063 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2064 return false; 2065 } 2066 } 2067 2068 // Just a declaration or something is wrong. 2069 if (FormatTok->isNot(tok::l_brace)) 2070 return true; 2071 FormatTok->BlockKind = BK_Block; 2072 2073 if (Style.Language == FormatStyle::LK_Java) { 2074 // Java enums are different. 2075 parseJavaEnumBody(); 2076 return true; 2077 } 2078 if (Style.Language == FormatStyle::LK_Proto) { 2079 parseBlock(/*MustBeDeclaration=*/true); 2080 return true; 2081 } 2082 2083 // Parse enum body. 2084 nextToken(); 2085 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2086 if (HasError) { 2087 if (FormatTok->is(tok::semi)) 2088 nextToken(); 2089 addUnwrappedLine(); 2090 } 2091 return true; 2092 2093 // There is no addUnwrappedLine() here so that we fall through to parsing a 2094 // structural element afterwards. Thus, in "enum A {} n, m;", 2095 // "} n, m;" will end up in one unwrapped line. 2096 } 2097 2098 void UnwrappedLineParser::parseJavaEnumBody() { 2099 // Determine whether the enum is simple, i.e. does not have a semicolon or 2100 // constants with class bodies. Simple enums can be formatted like braced 2101 // lists, contracted to a single line, etc. 2102 unsigned StoredPosition = Tokens->getPosition(); 2103 bool IsSimple = true; 2104 FormatToken *Tok = Tokens->getNextToken(); 2105 while (Tok) { 2106 if (Tok->is(tok::r_brace)) 2107 break; 2108 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2109 IsSimple = false; 2110 break; 2111 } 2112 // FIXME: This will also mark enums with braces in the arguments to enum 2113 // constants as "not simple". This is probably fine in practice, though. 2114 Tok = Tokens->getNextToken(); 2115 } 2116 FormatTok = Tokens->setPosition(StoredPosition); 2117 2118 if (IsSimple) { 2119 nextToken(); 2120 parseBracedList(); 2121 addUnwrappedLine(); 2122 return; 2123 } 2124 2125 // Parse the body of a more complex enum. 2126 // First add a line for everything up to the "{". 2127 nextToken(); 2128 addUnwrappedLine(); 2129 ++Line->Level; 2130 2131 // Parse the enum constants. 2132 while (FormatTok) { 2133 if (FormatTok->is(tok::l_brace)) { 2134 // Parse the constant's class body. 2135 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2136 /*MunchSemi=*/false); 2137 } else if (FormatTok->is(tok::l_paren)) { 2138 parseParens(); 2139 } else if (FormatTok->is(tok::comma)) { 2140 nextToken(); 2141 addUnwrappedLine(); 2142 } else if (FormatTok->is(tok::semi)) { 2143 nextToken(); 2144 addUnwrappedLine(); 2145 break; 2146 } else if (FormatTok->is(tok::r_brace)) { 2147 addUnwrappedLine(); 2148 break; 2149 } else { 2150 nextToken(); 2151 } 2152 } 2153 2154 // Parse the class body after the enum's ";" if any. 2155 parseLevel(/*HasOpeningBrace=*/true); 2156 nextToken(); 2157 --Line->Level; 2158 addUnwrappedLine(); 2159 } 2160 2161 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2162 const FormatToken &InitialToken = *FormatTok; 2163 nextToken(); 2164 2165 // The actual identifier can be a nested name specifier, and in macros 2166 // it is often token-pasted. 2167 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2168 tok::kw___attribute, tok::kw___declspec, 2169 tok::kw_alignas) || 2170 ((Style.Language == FormatStyle::LK_Java || 2171 Style.Language == FormatStyle::LK_JavaScript) && 2172 FormatTok->isOneOf(tok::period, tok::comma))) { 2173 if (Style.Language == FormatStyle::LK_JavaScript && 2174 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2175 // JavaScript/TypeScript supports inline object types in 2176 // extends/implements positions: 2177 // class Foo implements {bar: number} { } 2178 nextToken(); 2179 if (FormatTok->is(tok::l_brace)) { 2180 tryToParseBracedList(); 2181 continue; 2182 } 2183 } 2184 bool IsNonMacroIdentifier = 2185 FormatTok->is(tok::identifier) && 2186 FormatTok->TokenText != FormatTok->TokenText.upper(); 2187 nextToken(); 2188 // We can have macros or attributes in between 'class' and the class name. 2189 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2190 parseParens(); 2191 } 2192 2193 // Note that parsing away template declarations here leads to incorrectly 2194 // accepting function declarations as record declarations. 2195 // In general, we cannot solve this problem. Consider: 2196 // class A<int> B() {} 2197 // which can be a function definition or a class definition when B() is a 2198 // macro. If we find enough real-world cases where this is a problem, we 2199 // can parse for the 'template' keyword in the beginning of the statement, 2200 // and thus rule out the record production in case there is no template 2201 // (this would still leave us with an ambiguity between template function 2202 // and class declarations). 2203 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2204 while (!eof()) { 2205 if (FormatTok->is(tok::l_brace)) { 2206 calculateBraceTypes(/*ExpectClassBody=*/true); 2207 if (!tryToParseBracedList()) 2208 break; 2209 } 2210 if (FormatTok->Tok.is(tok::semi)) 2211 return; 2212 nextToken(); 2213 } 2214 } 2215 if (FormatTok->Tok.is(tok::l_brace)) { 2216 if (ParseAsExpr) { 2217 parseChildBlock(); 2218 } else { 2219 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2220 addUnwrappedLine(); 2221 2222 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2223 /*MunchSemi=*/false); 2224 } 2225 } 2226 // There is no addUnwrappedLine() here so that we fall through to parsing a 2227 // structural element afterwards. Thus, in "class A {} n, m;", 2228 // "} n, m;" will end up in one unwrapped line. 2229 } 2230 2231 void UnwrappedLineParser::parseObjCMethod() { 2232 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2233 "'(' or identifier expected."); 2234 do { 2235 if (FormatTok->Tok.is(tok::semi)) { 2236 nextToken(); 2237 addUnwrappedLine(); 2238 return; 2239 } else if (FormatTok->Tok.is(tok::l_brace)) { 2240 if (Style.BraceWrapping.AfterFunction) 2241 addUnwrappedLine(); 2242 parseBlock(/*MustBeDeclaration=*/false); 2243 addUnwrappedLine(); 2244 return; 2245 } else { 2246 nextToken(); 2247 } 2248 } while (!eof()); 2249 } 2250 2251 void UnwrappedLineParser::parseObjCProtocolList() { 2252 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2253 do { 2254 nextToken(); 2255 // Early exit in case someone forgot a close angle. 2256 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2257 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2258 return; 2259 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2260 nextToken(); // Skip '>'. 2261 } 2262 2263 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2264 do { 2265 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2266 nextToken(); 2267 addUnwrappedLine(); 2268 break; 2269 } 2270 if (FormatTok->is(tok::l_brace)) { 2271 parseBlock(/*MustBeDeclaration=*/false); 2272 // In ObjC interfaces, nothing should be following the "}". 2273 addUnwrappedLine(); 2274 } else if (FormatTok->is(tok::r_brace)) { 2275 // Ignore stray "}". parseStructuralElement doesn't consume them. 2276 nextToken(); 2277 addUnwrappedLine(); 2278 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2279 nextToken(); 2280 parseObjCMethod(); 2281 } else { 2282 parseStructuralElement(); 2283 } 2284 } while (!eof()); 2285 } 2286 2287 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2288 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2289 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2290 nextToken(); 2291 nextToken(); // interface name 2292 2293 // @interface can be followed by a lightweight generic 2294 // specialization list, then either a base class or a category. 2295 if (FormatTok->Tok.is(tok::less)) { 2296 // Unlike protocol lists, generic parameterizations support 2297 // nested angles: 2298 // 2299 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2300 // NSObject <NSCopying, NSSecureCoding> 2301 // 2302 // so we need to count how many open angles we have left. 2303 unsigned NumOpenAngles = 1; 2304 do { 2305 nextToken(); 2306 // Early exit in case someone forgot a close angle. 2307 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2308 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2309 break; 2310 if (FormatTok->Tok.is(tok::less)) 2311 ++NumOpenAngles; 2312 else if (FormatTok->Tok.is(tok::greater)) { 2313 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2314 --NumOpenAngles; 2315 } 2316 } while (!eof() && NumOpenAngles != 0); 2317 nextToken(); // Skip '>'. 2318 } 2319 if (FormatTok->Tok.is(tok::colon)) { 2320 nextToken(); 2321 nextToken(); // base class name 2322 } else if (FormatTok->Tok.is(tok::l_paren)) 2323 // Skip category, if present. 2324 parseParens(); 2325 2326 if (FormatTok->Tok.is(tok::less)) 2327 parseObjCProtocolList(); 2328 2329 if (FormatTok->Tok.is(tok::l_brace)) { 2330 if (Style.BraceWrapping.AfterObjCDeclaration) 2331 addUnwrappedLine(); 2332 parseBlock(/*MustBeDeclaration=*/true); 2333 } 2334 2335 // With instance variables, this puts '}' on its own line. Without instance 2336 // variables, this ends the @interface line. 2337 addUnwrappedLine(); 2338 2339 parseObjCUntilAtEnd(); 2340 } 2341 2342 // Returns true for the declaration/definition form of @protocol, 2343 // false for the expression form. 2344 bool UnwrappedLineParser::parseObjCProtocol() { 2345 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2346 nextToken(); 2347 2348 if (FormatTok->is(tok::l_paren)) 2349 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2350 return false; 2351 2352 // The definition/declaration form, 2353 // @protocol Foo 2354 // - (int)someMethod; 2355 // @end 2356 2357 nextToken(); // protocol name 2358 2359 if (FormatTok->Tok.is(tok::less)) 2360 parseObjCProtocolList(); 2361 2362 // Check for protocol declaration. 2363 if (FormatTok->Tok.is(tok::semi)) { 2364 nextToken(); 2365 addUnwrappedLine(); 2366 return true; 2367 } 2368 2369 addUnwrappedLine(); 2370 parseObjCUntilAtEnd(); 2371 return true; 2372 } 2373 2374 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2375 bool IsImport = FormatTok->is(Keywords.kw_import); 2376 assert(IsImport || FormatTok->is(tok::kw_export)); 2377 nextToken(); 2378 2379 // Consume the "default" in "export default class/function". 2380 if (FormatTok->is(tok::kw_default)) 2381 nextToken(); 2382 2383 // Consume "async function", "function" and "default function", so that these 2384 // get parsed as free-standing JS functions, i.e. do not require a trailing 2385 // semicolon. 2386 if (FormatTok->is(Keywords.kw_async)) 2387 nextToken(); 2388 if (FormatTok->is(Keywords.kw_function)) { 2389 nextToken(); 2390 return; 2391 } 2392 2393 // For imports, `export *`, `export {...}`, consume the rest of the line up 2394 // to the terminating `;`. For everything else, just return and continue 2395 // parsing the structural element, i.e. the declaration or expression for 2396 // `export default`. 2397 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2398 !FormatTok->isStringLiteral()) 2399 return; 2400 2401 while (!eof()) { 2402 if (FormatTok->is(tok::semi)) 2403 return; 2404 if (Line->Tokens.empty()) { 2405 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2406 // import statement should terminate. 2407 return; 2408 } 2409 if (FormatTok->is(tok::l_brace)) { 2410 FormatTok->BlockKind = BK_Block; 2411 nextToken(); 2412 parseBracedList(); 2413 } else { 2414 nextToken(); 2415 } 2416 } 2417 } 2418 2419 void UnwrappedLineParser::parseStatementMacro() { 2420 nextToken(); 2421 if (FormatTok->is(tok::l_paren)) 2422 parseParens(); 2423 if (FormatTok->is(tok::semi)) 2424 nextToken(); 2425 addUnwrappedLine(); 2426 } 2427 2428 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2429 StringRef Prefix = "") { 2430 llvm::dbgs() << Prefix << "Line(" << Line.Level 2431 << ", FSC=" << Line.FirstStartColumn << ")" 2432 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2433 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2434 E = Line.Tokens.end(); 2435 I != E; ++I) { 2436 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2437 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2438 << "] "; 2439 } 2440 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2441 E = Line.Tokens.end(); 2442 I != E; ++I) { 2443 const UnwrappedLineNode &Node = *I; 2444 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2445 I = Node.Children.begin(), 2446 E = Node.Children.end(); 2447 I != E; ++I) { 2448 printDebugInfo(*I, "\nChild: "); 2449 } 2450 } 2451 llvm::dbgs() << "\n"; 2452 } 2453 2454 void UnwrappedLineParser::addUnwrappedLine() { 2455 if (Line->Tokens.empty()) 2456 return; 2457 LLVM_DEBUG({ 2458 if (CurrentLines == &Lines) 2459 printDebugInfo(*Line); 2460 }); 2461 CurrentLines->push_back(std::move(*Line)); 2462 Line->Tokens.clear(); 2463 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2464 Line->FirstStartColumn = 0; 2465 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2466 CurrentLines->append( 2467 std::make_move_iterator(PreprocessorDirectives.begin()), 2468 std::make_move_iterator(PreprocessorDirectives.end())); 2469 PreprocessorDirectives.clear(); 2470 } 2471 // Disconnect the current token from the last token on the previous line. 2472 FormatTok->Previous = nullptr; 2473 } 2474 2475 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2476 2477 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2478 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2479 FormatTok.NewlinesBefore > 0; 2480 } 2481 2482 // Checks if \p FormatTok is a line comment that continues the line comment 2483 // section on \p Line. 2484 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2485 const UnwrappedLine &Line, 2486 llvm::Regex &CommentPragmasRegex) { 2487 if (Line.Tokens.empty()) 2488 return false; 2489 2490 StringRef IndentContent = FormatTok.TokenText; 2491 if (FormatTok.TokenText.startswith("//") || 2492 FormatTok.TokenText.startswith("/*")) 2493 IndentContent = FormatTok.TokenText.substr(2); 2494 if (CommentPragmasRegex.match(IndentContent)) 2495 return false; 2496 2497 // If Line starts with a line comment, then FormatTok continues the comment 2498 // section if its original column is greater or equal to the original start 2499 // column of the line. 2500 // 2501 // Define the min column token of a line as follows: if a line ends in '{' or 2502 // contains a '{' followed by a line comment, then the min column token is 2503 // that '{'. Otherwise, the min column token of the line is the first token of 2504 // the line. 2505 // 2506 // If Line starts with a token other than a line comment, then FormatTok 2507 // continues the comment section if its original column is greater than the 2508 // original start column of the min column token of the line. 2509 // 2510 // For example, the second line comment continues the first in these cases: 2511 // 2512 // // first line 2513 // // second line 2514 // 2515 // and: 2516 // 2517 // // first line 2518 // // second line 2519 // 2520 // and: 2521 // 2522 // int i; // first line 2523 // // second line 2524 // 2525 // and: 2526 // 2527 // do { // first line 2528 // // second line 2529 // int i; 2530 // } while (true); 2531 // 2532 // and: 2533 // 2534 // enum { 2535 // a, // first line 2536 // // second line 2537 // b 2538 // }; 2539 // 2540 // The second line comment doesn't continue the first in these cases: 2541 // 2542 // // first line 2543 // // second line 2544 // 2545 // and: 2546 // 2547 // int i; // first line 2548 // // second line 2549 // 2550 // and: 2551 // 2552 // do { // first line 2553 // // second line 2554 // int i; 2555 // } while (true); 2556 // 2557 // and: 2558 // 2559 // enum { 2560 // a, // first line 2561 // // second line 2562 // }; 2563 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2564 2565 // Scan for '{//'. If found, use the column of '{' as a min column for line 2566 // comment section continuation. 2567 const FormatToken *PreviousToken = nullptr; 2568 for (const UnwrappedLineNode &Node : Line.Tokens) { 2569 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2570 isLineComment(*Node.Tok)) { 2571 MinColumnToken = PreviousToken; 2572 break; 2573 } 2574 PreviousToken = Node.Tok; 2575 2576 // Grab the last newline preceding a token in this unwrapped line. 2577 if (Node.Tok->NewlinesBefore > 0) { 2578 MinColumnToken = Node.Tok; 2579 } 2580 } 2581 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2582 MinColumnToken = PreviousToken; 2583 } 2584 2585 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2586 MinColumnToken); 2587 } 2588 2589 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2590 bool JustComments = Line->Tokens.empty(); 2591 for (SmallVectorImpl<FormatToken *>::const_iterator 2592 I = CommentsBeforeNextToken.begin(), 2593 E = CommentsBeforeNextToken.end(); 2594 I != E; ++I) { 2595 // Line comments that belong to the same line comment section are put on the 2596 // same line since later we might want to reflow content between them. 2597 // Additional fine-grained breaking of line comment sections is controlled 2598 // by the class BreakableLineCommentSection in case it is desirable to keep 2599 // several line comment sections in the same unwrapped line. 2600 // 2601 // FIXME: Consider putting separate line comment sections as children to the 2602 // unwrapped line instead. 2603 (*I)->ContinuesLineCommentSection = 2604 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2605 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2606 addUnwrappedLine(); 2607 pushToken(*I); 2608 } 2609 if (NewlineBeforeNext && JustComments) 2610 addUnwrappedLine(); 2611 CommentsBeforeNextToken.clear(); 2612 } 2613 2614 void UnwrappedLineParser::nextToken(int LevelDifference) { 2615 if (eof()) 2616 return; 2617 flushComments(isOnNewLine(*FormatTok)); 2618 pushToken(FormatTok); 2619 FormatToken *Previous = FormatTok; 2620 if (Style.Language != FormatStyle::LK_JavaScript) 2621 readToken(LevelDifference); 2622 else 2623 readTokenWithJavaScriptASI(); 2624 FormatTok->Previous = Previous; 2625 } 2626 2627 void UnwrappedLineParser::distributeComments( 2628 const SmallVectorImpl<FormatToken *> &Comments, 2629 const FormatToken *NextTok) { 2630 // Whether or not a line comment token continues a line is controlled by 2631 // the method continuesLineCommentSection, with the following caveat: 2632 // 2633 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2634 // that each comment line from the trail is aligned with the next token, if 2635 // the next token exists. If a trail exists, the beginning of the maximal 2636 // trail is marked as a start of a new comment section. 2637 // 2638 // For example in this code: 2639 // 2640 // int a; // line about a 2641 // // line 1 about b 2642 // // line 2 about b 2643 // int b; 2644 // 2645 // the two lines about b form a maximal trail, so there are two sections, the 2646 // first one consisting of the single comment "// line about a" and the 2647 // second one consisting of the next two comments. 2648 if (Comments.empty()) 2649 return; 2650 bool ShouldPushCommentsInCurrentLine = true; 2651 bool HasTrailAlignedWithNextToken = false; 2652 unsigned StartOfTrailAlignedWithNextToken = 0; 2653 if (NextTok) { 2654 // We are skipping the first element intentionally. 2655 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2656 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2657 HasTrailAlignedWithNextToken = true; 2658 StartOfTrailAlignedWithNextToken = i; 2659 } 2660 } 2661 } 2662 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2663 FormatToken *FormatTok = Comments[i]; 2664 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2665 FormatTok->ContinuesLineCommentSection = false; 2666 } else { 2667 FormatTok->ContinuesLineCommentSection = 2668 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2669 } 2670 if (!FormatTok->ContinuesLineCommentSection && 2671 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2672 ShouldPushCommentsInCurrentLine = false; 2673 } 2674 if (ShouldPushCommentsInCurrentLine) { 2675 pushToken(FormatTok); 2676 } else { 2677 CommentsBeforeNextToken.push_back(FormatTok); 2678 } 2679 } 2680 } 2681 2682 void UnwrappedLineParser::readToken(int LevelDifference) { 2683 SmallVector<FormatToken *, 1> Comments; 2684 do { 2685 FormatTok = Tokens->getNextToken(); 2686 assert(FormatTok); 2687 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2688 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2689 distributeComments(Comments, FormatTok); 2690 Comments.clear(); 2691 // If there is an unfinished unwrapped line, we flush the preprocessor 2692 // directives only after that unwrapped line was finished later. 2693 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2694 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2695 assert((LevelDifference >= 0 || 2696 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2697 "LevelDifference makes Line->Level negative"); 2698 Line->Level += LevelDifference; 2699 // Comments stored before the preprocessor directive need to be output 2700 // before the preprocessor directive, at the same level as the 2701 // preprocessor directive, as we consider them to apply to the directive. 2702 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 2703 PPBranchLevel > 0) 2704 Line->Level += PPBranchLevel; 2705 flushComments(isOnNewLine(*FormatTok)); 2706 parsePPDirective(); 2707 } 2708 while (FormatTok->Type == TT_ConflictStart || 2709 FormatTok->Type == TT_ConflictEnd || 2710 FormatTok->Type == TT_ConflictAlternative) { 2711 if (FormatTok->Type == TT_ConflictStart) { 2712 conditionalCompilationStart(/*Unreachable=*/false); 2713 } else if (FormatTok->Type == TT_ConflictAlternative) { 2714 conditionalCompilationAlternative(); 2715 } else if (FormatTok->Type == TT_ConflictEnd) { 2716 conditionalCompilationEnd(); 2717 } 2718 FormatTok = Tokens->getNextToken(); 2719 FormatTok->MustBreakBefore = true; 2720 } 2721 2722 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2723 !Line->InPPDirective) { 2724 continue; 2725 } 2726 2727 if (!FormatTok->Tok.is(tok::comment)) { 2728 distributeComments(Comments, FormatTok); 2729 Comments.clear(); 2730 return; 2731 } 2732 2733 Comments.push_back(FormatTok); 2734 } while (!eof()); 2735 2736 distributeComments(Comments, nullptr); 2737 Comments.clear(); 2738 } 2739 2740 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2741 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2742 if (MustBreakBeforeNextToken) { 2743 Line->Tokens.back().Tok->MustBreakBefore = true; 2744 MustBreakBeforeNextToken = false; 2745 } 2746 } 2747 2748 } // end namespace format 2749 } // end namespace clang 2750